From 3a076beb1340f304c1582b94b1fb94a797700dcd Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Sun, 12 Apr 2026 17:49:59 +0200 Subject: [PATCH] ir3/analyze_ubo_ranges: don't over-align consts when loaded via preamble Consts don't need to be const_upload_unit aligned when initialized in the preamble. Totals from 121136 (68.73% of 176258) affected shaders: MaxWaves: 1731152 -> 1731238 (+0.00%); split: +0.01%, -0.01% Instrs: 41003924 -> 41006922 (+0.01%); split: -0.04%, +0.04% CodeSize: 83451224 -> 84153136 (+0.84%); split: -0.15%, +0.99% NOPs: 6920243 -> 6923031 (+0.04%); split: -0.20%, +0.24% MOVs: 1202942 -> 1203260 (+0.03%); split: -0.13%, +0.16% COVs: 654863 -> 654827 (-0.01%); split: -0.01%, +0.00% Full: 1356271 -> 1356003 (-0.02%); split: -0.03%, +0.01% (ss): 1019993 -> 1019657 (-0.03%); split: -0.16%, +0.12% (sy): 489430 -> 489607 (+0.04%); split: -0.07%, +0.10% (ss)-stall: 3878805 -> 3875997 (-0.07%); split: -0.21%, +0.13% (sy)-stall: 14655425 -> 14660516 (+0.03%); split: -0.08%, +0.11% STPs: 9680 -> 9653 (-0.28%) LDPs: 17026 -> 16999 (-0.16%) Preamble Instrs: 8805343 -> 9195550 (+4.43%); split: -1.65%, +6.08% Early Preamble: 101798 -> 103143 (+1.32%); split: +1.44%, -0.12% Constlen: 5761784 -> 4356540 (-24.39%); split: -24.40%, +0.02% Subgroup size: 832 -> 1664 (+100.00%) Cat0: 7631222 -> 7634040 (+0.04%); split: -0.18%, +0.22% Cat1: 1897357 -> 1897579 (+0.01%); split: -0.09%, +0.10% Cat2: 15537632 -> 15537426 (-0.00%); split: -0.00%, +0.00% Cat6: 424903 -> 424996 (+0.02%); split: -0.00%, +0.02% Cat7: 1002957 -> 1003028 (+0.01%); split: -0.07%, +0.08% Signed-off-by: Job Noorman Part-of: --- src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c index 63e487f10c8..622f1d6f729 100644 --- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c +++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c @@ -545,6 +545,8 @@ ir3_nir_lower_const_global_loads(nir_shader *nir, struct ir3_shader_variant *v) if (ir3_shader_debug & IR3_DBG_NOUBOOPT) return false; + uint32_t align_vec4 = const_align_vec4(compiler); + unsigned max_upload; uint32_t global_offset = 0; if (v->binning_pass) { @@ -569,7 +571,7 @@ ir3_nir_lower_const_global_loads(nir_shader *nir, struct ir3_shader_variant *v) if (instr_is_load_const(instr) && ir3_def_is_rematerializable_for_preamble(nir_instr_as_intrinsic(instr)->src[0].ssa, NULL)) gather_ubo_ranges(nir, nir_instr_as_intrinsic(instr), &state, - compiler->const_upload_unit, + align_vec4, &upload_remaining); } } @@ -595,7 +597,7 @@ ir3_nir_lower_const_global_loads(nir_shader *nir, struct ir3_shader_variant *v) continue; progress |= lower_ubo_load_to_uniform( nir_instr_as_intrinsic(instr), &builder, &state, NULL, - compiler->const_upload_unit); + align_vec4); } } @@ -651,7 +653,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v) nir_foreach_instr (instr, block) { if (instr_is_load_ubo(instr)) gather_ubo_ranges(nir, nir_instr_as_intrinsic(instr), state, - compiler->const_upload_unit, + align_vec4, &upload_remaining); } } @@ -678,6 +680,8 @@ ir3_nir_lower_ubo_loads(nir_shader *nir, struct ir3_shader_variant *v) const struct ir3_const_state *const_state = ir3_const_state(v); const struct ir3_ubo_analysis_state *state = &const_state->ubo_state; + uint32_t align_vec4 = const_align_vec4(compiler); + int num_ubos = 0; bool progress = false; bool has_preamble = false; @@ -696,7 +700,7 @@ ir3_nir_lower_ubo_loads(nir_shader *nir, struct ir3_shader_variant *v) continue; progress |= lower_ubo_load_to_uniform( nir_instr_as_intrinsic(instr), &builder, state, &num_ubos, - compiler->const_upload_unit); + align_vec4); } }