From f46e2baeb37d6cdac82ef8287c470b885602a5ce Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Fri, 8 Aug 2025 10:13:42 +0200 Subject: [PATCH] ir3/spill: initialize base reg as late as possible We currently insert the base reg at the very start of the shader. This prevents enabling early preamble even if nothing is spilled in the preamble. Prevent this by keeping track of the least common ancestor of all block that spill/reload and moving the base reg there. Totals: Instrs: 48207402 -> 48210556 (+0.01%); split: -0.00%, +0.01% CodeSize: 101907026 -> 101909942 (+0.00%); split: -0.00%, +0.00% NOPs: 8386320 -> 8387956 (+0.02%); split: -0.01%, +0.03% MOVs: 1468853 -> 1469173 (+0.02%); split: -0.02%, +0.04% COVs: 823724 -> 823852 (+0.02%); split: -0.00%, +0.02% (ss): 1113167 -> 1113157 (-0.00%); split: -0.01%, +0.01% (sy): 552317 -> 552306 (-0.00%); split: -0.01%, +0.00% (ss)-stall: 4013046 -> 4013109 (+0.00%); split: -0.00%, +0.00% (sy)-stall: 16741190 -> 16740000 (-0.01%); split: -0.02%, +0.01% Preamble Instrs: 11506988 -> 11506257 (-0.01%); split: -0.01%, +0.00% Early Preamble: 121339 -> 121367 (+0.02%) Last helper: 11686328 -> 11686316 (-0.00%); split: -0.00%, +0.00% Cat0: 9241457 -> 9243099 (+0.02%); split: -0.01%, +0.03% Cat1: 2353411 -> 2354995 (+0.07%); split: -0.04%, +0.11% Cat2: 17468471 -> 17468507 (+0.00%); split: -0.00%, +0.00% Cat7: 1637795 -> 1637687 (-0.01%); split: -0.01%, +0.00% Totals from 48 (0.03% of 164705) affected shaders: Instrs: 347473 -> 350627 (+0.91%); split: -0.40%, +1.31% CodeSize: 565490 -> 568406 (+0.52%); split: -0.23%, +0.74% NOPs: 70496 -> 72132 (+2.32%); split: -1.07%, +3.39% MOVs: 27524 -> 27844 (+1.16%); split: -1.23%, +2.39% COVs: 6275 -> 6403 (+2.04%); split: -0.38%, +2.42% (ss): 8850 -> 8840 (-0.11%); split: -0.76%, +0.64% (sy): 4666 -> 4655 (-0.24%); split: -0.69%, +0.45% (ss)-stall: 12116 -> 12179 (+0.52%); split: -0.65%, +1.17% (sy)-stall: 266208 -> 265018 (-0.45%); split: -1.08%, +0.63% Preamble Instrs: 20657 -> 19926 (-3.54%); split: -3.56%, +0.02% Early Preamble: 0 -> 28 (+inf%) Last helper: 25507 -> 25495 (-0.05%); split: -0.12%, +0.07% Cat0: 76458 -> 78100 (+2.15%); split: -0.99%, +3.14% Cat1: 82669 -> 84253 (+1.92%); split: -1.11%, +3.03% Cat2: 89414 -> 89450 (+0.04%); split: -0.09%, +0.13% Cat7: 8595 -> 8487 (-1.26%); split: -1.33%, +0.07% Signed-off-by: Job Noorman Part-of: --- src/freedreno/ir3/ir3.h | 1 + src/freedreno/ir3/ir3_dominance.c | 16 ++++++++++++++++ src/freedreno/ir3/ir3_spill.c | 16 ++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index f74327ce51b..02819b6d153 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -856,6 +856,7 @@ unsigned ir3_block_get_pred_index(struct ir3_block *block, void ir3_calc_dominance(struct ir3 *ir); bool ir3_block_dominates(struct ir3_block *a, struct ir3_block *b); +struct ir3_block *ir3_dominance_lca(struct ir3_block *b1, struct ir3_block *b2); struct ir3_shader_variant; diff --git a/src/freedreno/ir3/ir3_dominance.c b/src/freedreno/ir3/ir3_dominance.c index 7d04145de31..fc94a648cd6 100644 --- a/src/freedreno/ir3/ir3_dominance.c +++ b/src/freedreno/ir3/ir3_dominance.c @@ -105,3 +105,19 @@ ir3_block_dominates(struct ir3_block *a, struct ir3_block *b) return a->dom_pre_index <= b->dom_pre_index && a->dom_post_index >= b->dom_post_index; } + +/** + * Computes the least common ancestor of two blocks. If one of the blocks is + * null, the other block is returned. + */ +struct ir3_block * +ir3_dominance_lca(struct ir3_block *b1, struct ir3_block *b2) +{ + if (b1 == NULL) + return b2; + + if (b2 == NULL) + return b1; + + return intersect(b1, b2); +} diff --git a/src/freedreno/ir3/ir3_spill.c b/src/freedreno/ir3/ir3_spill.c index 857ff971072..c665eb18abc 100644 --- a/src/freedreno/ir3/ir3_spill.c +++ b/src/freedreno/ir3/ir3_spill.c @@ -109,6 +109,13 @@ struct ra_spill_ctx { */ struct ir3_register *base_reg; + /* During spilling/reloading, we keep track of the least common ancestor of + * all spill/reload blocks and move base_reg there. This prevents using a GPR + * in the preamble, end hence disabling early preamble, if nothing is spilled + * there. + */ + struct ir3_block *base_reg_block; + /* Current pvtmem offset in bytes. */ unsigned spill_slot; @@ -757,6 +764,8 @@ spill(struct ra_spill_ctx *ctx, const struct reg_or_immed *val, } else { src->wrmask = reg->wrmask; } + + ctx->base_reg_block = ir3_dominance_lca(ctx->base_reg_block, spill->block); } static void @@ -927,6 +936,7 @@ reload(struct ra_spill_ctx *ctx, struct ir3_register *reg, dst->merge_set_offset = reg->merge_set_offset; dst->interval_start = reg->interval_start; dst->interval_end = reg->interval_end; + ctx->base_reg_block = ir3_dominance_lca(ctx->base_reg_block, reload->block); return dst; } @@ -2164,6 +2174,12 @@ ir3_spill(struct ir3 *ir, struct ir3_shader_variant *v, ir3_create_parallel_copies(ir); + if (ctx->base_reg_block && + ctx->base_reg_block != ctx->base_reg->instr->block) { + ir3_instr_move_after_phis(ctx->base_reg->instr, ctx->base_reg_block); + ctx->base_reg->instr->block = ctx->base_reg_block; + } + /* After this point, we're done mutating the IR. Liveness has been trashed, * so recalculate it. We'll need it for recalculating the merge sets. */