From 35ec960f6f3f6ef61615af02eae77f1ec2b177eb Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Sun, 6 Apr 2025 16:06:11 +0200 Subject: [PATCH] ir3: run cp after ir3_imm_const_to_preamble Now that ir3_cp has an option to not lower immediates to const registers, we can use it after ir3_imm_const_to_preamble instead of manually propagating immediates. This fixes a lot of missed opportunities for early-preamble as we didn't propagate the mova1 immediate which a caused a GPR to be used in many preambles. Totals: Instrs: 49704517 -> 49703700 (-0.00%); split: -0.16%, +0.16% CodeSize: 103917968 -> 103187072 (-0.70%); split: -0.82%, +0.11% NOPs: 8516944 -> 8511764 (-0.06%); split: -0.78%, +0.72% MOVs: 1534023 -> 1536385 (+0.15%); split: -1.12%, +1.27% Full: 1816517 -> 1816548 (+0.00%); split: -0.05%, +0.06% (ss): 1162108 -> 1161490 (-0.05%); split: -1.03%, +0.98% (sy): 611398 -> 610311 (-0.18%); split: -0.80%, +0.62% (ss)-stall: 4384529 -> 4388096 (+0.08%); split: -1.22%, +1.30% (sy)-stall: 17858701 -> 17837101 (-0.12%); split: -0.87%, +0.74% STPs: 25096 -> 25491 (+1.57%); split: -0.05%, +1.63% LDPs: 37635 -> 38030 (+1.05%); split: -0.03%, +1.08% Preamble Instrs: 12589113 -> 11391946 (-9.51%); split: -9.75%, +0.24% Early Preamble: 115946 -> 122893 (+5.99%); split: +6.05%, -0.06% Cat0: 9374513 -> 9370393 (-0.04%); split: -0.71%, +0.67% Cat1: 2443348 -> 2446546 (+0.13%); split: -0.82%, +0.95% Cat2: 18731502 -> 18731478 (-0.00%); split: -0.00%, +0.00% Cat7: 1410092 -> 1410221 (+0.01%); split: -0.61%, +0.62% Totals from 39189 (23.81% of 164575) affected shaders: Instrs: 30656115 -> 30655298 (-0.00%); split: -0.26%, +0.26% CodeSize: 61714230 -> 60983334 (-1.18%); split: -1.37%, +0.19% NOPs: 6074700 -> 6069520 (-0.09%); split: -1.10%, +1.01% MOVs: 1010392 -> 1012754 (+0.23%); split: -1.70%, +1.93% Full: 617108 -> 617139 (+0.01%); split: -0.16%, +0.16% (ss): 778842 -> 778224 (-0.08%); split: -1.54%, +1.46% (sy): 362803 -> 361716 (-0.30%); split: -1.35%, +1.05% (ss)-stall: 3203827 -> 3207394 (+0.11%); split: -1.67%, +1.78% (sy)-stall: 9507680 -> 9486080 (-0.23%); split: -1.63%, +1.40% STPs: 23004 -> 23399 (+1.72%); split: -0.06%, +1.77% LDPs: 33942 -> 34337 (+1.16%); split: -0.04%, +1.20% Preamble Instrs: 8090918 -> 6893751 (-14.80%); split: -15.18%, +0.38% Early Preamble: 12246 -> 19193 (+56.73%); split: +57.25%, -0.52% Cat0: 6656706 -> 6652586 (-0.06%); split: -1.00%, +0.94% Cat1: 1546399 -> 1549597 (+0.21%); split: -1.30%, +1.50% Cat2: 11642214 -> 11642190 (-0.00%); split: -0.00%, +0.00% Cat7: 943911 -> 944040 (+0.01%); split: -0.91%, +0.92% Signed-off-by: Job Noorman Part-of: --- src/freedreno/ir3/ir3_compiler_nir.c | 5 +++++ src/freedreno/ir3/ir3_preamble.c | 14 +------------- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 0746756d319..692f4f310d9 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -5818,6 +5818,11 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, if (IR3_PASS(ir, ir3_imm_const_to_preamble, so)) { progress = true; + /* Propagate immediates created by ir3_imm_const_to_preamble but make sure + * we don't lower any more immediates to const registers. + */ + IR3_PASS(ir, ir3_cp, so, false); + /* ir3_imm_const_to_preamble might create duplicate a1.x movs. */ IR3_PASS(ir, ir3_cse); } diff --git a/src/freedreno/ir3/ir3_preamble.c b/src/freedreno/ir3/ir3_preamble.c index 33cc2fae447..bbfa7fb01b5 100644 --- a/src/freedreno/ir3/ir3_preamble.c +++ b/src/freedreno/ir3/ir3_preamble.c @@ -44,19 +44,7 @@ ir3_imm_const_to_preamble(struct ir3 *ir, struct ir3_shader_variant *so) struct ir3_instruction *src = ir3_create_collect(&build, movs, components); unsigned dst = ir3_const_imm_index_to_reg(consts, i); - struct ir3_instruction *stc = ir3_store_const(so, &build, src, dst); - - /* We cannot run ir3_cp anymore as that would potentially lower more - * immediates to const registers because we reset count to 0 below (which - * is necessary to stop the driver from uploading the immediates). So we - * have to manually propagate the stc immediate. - */ - struct ir3_instruction *mov_imm = stc->srcs[0]->def->instr; - assert(mov_imm->opc == OPC_MOV); - assert(mov_imm->srcs[0]->flags & IR3_REG_IMMED); - - stc->srcs[0] = mov_imm->srcs[0]; - list_del(&mov_imm->node); + ir3_store_const(so, &build, src, dst); } imms->count = 0;