From 6d779e900cb1394dfd9e4161357cc2b05d2c012a Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Fri, 8 Aug 2025 13:12:55 +0200 Subject: [PATCH] ir3: allow shared srcs for ldc This works just fine and opens up a lot more opportunities for early preamble. Note that I haven't seen actual cases where the index is large enough to need a register but verified in computerator that it works. Totals: MaxWaves: 2377396 -> 2377428 (+0.00%); split: +0.00%, -0.00% Instrs: 48194946 -> 48173991 (-0.04%); split: -0.10%, +0.06% CodeSize: 101878832 -> 101892290 (+0.01%); split: -0.06%, +0.07% NOPs: 8383282 -> 8362061 (-0.25%); split: -0.46%, +0.20% MOVs: 1468620 -> 1464570 (-0.28%); split: -1.14%, +0.86% COVs: 823514 -> 823506 (-0.00%); split: -0.00%, +0.00% Full: 1716474 -> 1714338 (-0.12%); split: -0.13%, +0.01% (ss): 1112861 -> 1117802 (+0.44%); split: -0.52%, +0.96% (sy): 552143 -> 553148 (+0.18%); split: -0.31%, +0.49% (ss)-stall: 4011510 -> 4018364 (+0.17%); split: -0.26%, +0.43% (sy)-stall: 16736200 -> 16766871 (+0.18%); split: -0.39%, +0.57% STPs: 18895 -> 18887 (-0.04%) LDPs: 23853 -> 23845 (-0.03%) Preamble Instrs: 11502184 -> 11461058 (-0.36%); split: -0.48%, +0.12% Early Preamble: 121333 -> 125474 (+3.41%) Last helper: 11683394 -> 11675574 (-0.07%); split: -0.14%, +0.07% Subgroup size: 12951168 -> 12952320 (+0.01%) Cat0: 9238171 -> 9217761 (-0.22%); split: -0.42%, +0.20% Cat1: 2352968 -> 2348920 (-0.17%); split: -0.71%, +0.54% Cat2: 17464877 -> 17468941 (+0.02%); split: -0.00%, +0.03% Cat6: 515664 -> 515648 (-0.00%) Cat7: 1636736 -> 1636191 (-0.03%); split: -0.33%, +0.29% Totals from 12861 (7.81% of 164705) affected shaders: MaxWaves: 141814 -> 141846 (+0.02%); split: +0.07%, -0.05% Instrs: 12731084 -> 12710129 (-0.16%); split: -0.39%, +0.22% CodeSize: 24749138 -> 24762596 (+0.05%); split: -0.23%, +0.29% NOPs: 2744093 -> 2722872 (-0.77%); split: -1.40%, +0.62% MOVs: 492373 -> 488323 (-0.82%); split: -3.39%, +2.57% COVs: 170074 -> 170066 (-0.00%); split: -0.01%, +0.00% Full: 224044 -> 221908 (-0.95%); split: -1.00%, +0.05% (ss): 325836 -> 330777 (+1.52%); split: -1.76%, +3.28% (sy): 143661 -> 144666 (+0.70%); split: -1.20%, +1.89% (ss)-stall: 1397335 -> 1404189 (+0.49%); split: -0.75%, +1.24% (sy)-stall: 4286193 -> 4316864 (+0.72%); split: -1.52%, +2.24% STPs: 1628 -> 1620 (-0.49%) LDPs: 2183 -> 2175 (-0.37%) Preamble Instrs: 2486870 -> 2445744 (-1.65%); split: -2.23%, +0.57% Early Preamble: 170 -> 4311 (+2435.88%) Last helper: 3053311 -> 3045491 (-0.26%); split: -0.52%, +0.26% Subgroup size: 991296 -> 992448 (+0.12%) Cat0: 3031604 -> 3011194 (-0.67%); split: -1.29%, +0.61% Cat1: 667377 -> 663329 (-0.61%); split: -2.50%, +1.90% Cat2: 4485219 -> 4489283 (+0.09%); split: -0.01%, +0.10% Cat6: 87365 -> 87349 (-0.02%) Cat7: 731126 -> 730581 (-0.07%); split: -0.73%, +0.65% Signed-off-by: Job Noorman Part-of: --- src/freedreno/ir3/ir3_compiler_nir.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 18d97ecea17..1a95812002f 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -1177,8 +1177,11 @@ emit_intrinsic_load_ubo_ldc(struct ir3_context *ctx, nir_intrinsic_instr *intr, assert(nir_intrinsic_base(intr) == 0); unsigned ncomp = intr->num_components; - struct ir3_instruction *offset = ir3_get_src(ctx, &intr->src[1])[0]; - struct ir3_instruction *idx = ir3_get_src(ctx, &intr->src[0])[0]; + bool use_shared = !intr->def.divergent && ctx->compiler->has_scalar_alu; + struct ir3_instruction *offset = + ir3_get_src_shared(ctx, &intr->src[1], use_shared)[0]; + struct ir3_instruction *idx = + ir3_get_src_shared(ctx, &intr->src[0], use_shared)[0]; struct ir3_instruction *ldc = ir3_LDC(b, idx, 0, offset, 0); ldc->dsts[0]->wrmask = MASK(ncomp); ldc->cat6.iim_val = ncomp; @@ -1190,8 +1193,7 @@ emit_intrinsic_load_ubo_ldc(struct ir3_context *ctx, nir_intrinsic_instr *intr, ctx->so->bindless_ubo = true; ir3_handle_nonuniform(ldc, intr); - if (!intr->def.divergent && - ctx->compiler->has_scalar_alu) { + if (use_shared) { ldc->dsts[0]->flags |= IR3_REG_SHARED; ldc->flags |= IR3_INSTR_U; }