ir3/spill: initialize base reg as late as possible

We currently insert the base reg at the very start of the shader. This
prevents enabling early preamble even if nothing is spilled in the
preamble.

Prevent this by keeping track of the least common ancestor of all block
that spill/reload and moving the base reg there.

Totals:
Instrs: 48207402 -> 48210556 (+0.01%); split: -0.00%, +0.01%
CodeSize: 101907026 -> 101909942 (+0.00%); split: -0.00%, +0.00%
NOPs: 8386320 -> 8387956 (+0.02%); split: -0.01%, +0.03%
MOVs: 1468853 -> 1469173 (+0.02%); split: -0.02%, +0.04%
COVs: 823724 -> 823852 (+0.02%); split: -0.00%, +0.02%
(ss): 1113167 -> 1113157 (-0.00%); split: -0.01%, +0.01%
(sy): 552317 -> 552306 (-0.00%); split: -0.01%, +0.00%
(ss)-stall: 4013046 -> 4013109 (+0.00%); split: -0.00%, +0.00%
(sy)-stall: 16741190 -> 16740000 (-0.01%); split: -0.02%, +0.01%
Preamble Instrs: 11506988 -> 11506257 (-0.01%); split: -0.01%, +0.00%
Early Preamble: 121339 -> 121367 (+0.02%)
Last helper: 11686328 -> 11686316 (-0.00%); split: -0.00%, +0.00%
Cat0: 9241457 -> 9243099 (+0.02%); split: -0.01%, +0.03%
Cat1: 2353411 -> 2354995 (+0.07%); split: -0.04%, +0.11%
Cat2: 17468471 -> 17468507 (+0.00%); split: -0.00%, +0.00%
Cat7: 1637795 -> 1637687 (-0.01%); split: -0.01%, +0.00%

Totals from 48 (0.03% of 164705) affected shaders:
Instrs: 347473 -> 350627 (+0.91%); split: -0.40%, +1.31%
CodeSize: 565490 -> 568406 (+0.52%); split: -0.23%, +0.74%
NOPs: 70496 -> 72132 (+2.32%); split: -1.07%, +3.39%
MOVs: 27524 -> 27844 (+1.16%); split: -1.23%, +2.39%
COVs: 6275 -> 6403 (+2.04%); split: -0.38%, +2.42%
(ss): 8850 -> 8840 (-0.11%); split: -0.76%, +0.64%
(sy): 4666 -> 4655 (-0.24%); split: -0.69%, +0.45%
(ss)-stall: 12116 -> 12179 (+0.52%); split: -0.65%, +1.17%
(sy)-stall: 266208 -> 265018 (-0.45%); split: -1.08%, +0.63%
Preamble Instrs: 20657 -> 19926 (-3.54%); split: -3.56%, +0.02%
Early Preamble: 0 -> 28 (+inf%)
Last helper: 25507 -> 25495 (-0.05%); split: -0.12%, +0.07%
Cat0: 76458 -> 78100 (+2.15%); split: -0.99%, +3.14%
Cat1: 82669 -> 84253 (+1.92%); split: -1.11%, +3.03%
Cat2: 89414 -> 89450 (+0.04%); split: -0.09%, +0.13%
Cat7: 8595 -> 8487 (-1.26%); split: -1.33%, +0.07%

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36667>
This commit is contained in:
Job Noorman 2025-08-08 10:13:42 +02:00 committed by Marge Bot
parent 4e253184de
commit f46e2baeb3
3 changed files with 33 additions and 0 deletions

View file

@ -856,6 +856,7 @@ unsigned ir3_block_get_pred_index(struct ir3_block *block,
void ir3_calc_dominance(struct ir3 *ir);
bool ir3_block_dominates(struct ir3_block *a, struct ir3_block *b);
struct ir3_block *ir3_dominance_lca(struct ir3_block *b1, struct ir3_block *b2);
struct ir3_shader_variant;

View file

@ -105,3 +105,19 @@ ir3_block_dominates(struct ir3_block *a, struct ir3_block *b)
return a->dom_pre_index <= b->dom_pre_index &&
a->dom_post_index >= b->dom_post_index;
}
/**
* Computes the least common ancestor of two blocks. If one of the blocks is
* null, the other block is returned.
*/
struct ir3_block *
ir3_dominance_lca(struct ir3_block *b1, struct ir3_block *b2)
{
if (b1 == NULL)
return b2;
if (b2 == NULL)
return b1;
return intersect(b1, b2);
}

View file

@ -109,6 +109,13 @@ struct ra_spill_ctx {
*/
struct ir3_register *base_reg;
/* During spilling/reloading, we keep track of the least common ancestor of
* all spill/reload blocks and move base_reg there. This prevents using a GPR
* in the preamble, end hence disabling early preamble, if nothing is spilled
* there.
*/
struct ir3_block *base_reg_block;
/* Current pvtmem offset in bytes. */
unsigned spill_slot;
@ -757,6 +764,8 @@ spill(struct ra_spill_ctx *ctx, const struct reg_or_immed *val,
} else {
src->wrmask = reg->wrmask;
}
ctx->base_reg_block = ir3_dominance_lca(ctx->base_reg_block, spill->block);
}
static void
@ -927,6 +936,7 @@ reload(struct ra_spill_ctx *ctx, struct ir3_register *reg,
dst->merge_set_offset = reg->merge_set_offset;
dst->interval_start = reg->interval_start;
dst->interval_end = reg->interval_end;
ctx->base_reg_block = ir3_dominance_lca(ctx->base_reg_block, reload->block);
return dst;
}
@ -2164,6 +2174,12 @@ ir3_spill(struct ir3 *ir, struct ir3_shader_variant *v,
ir3_create_parallel_copies(ir);
if (ctx->base_reg_block &&
ctx->base_reg_block != ctx->base_reg->instr->block) {
ir3_instr_move_after_phis(ctx->base_reg->instr, ctx->base_reg_block);
ctx->base_reg->instr->block = ctx->base_reg_block;
}
/* After this point, we're done mutating the IR. Liveness has been trashed,
* so recalculate it. We'll need it for recalculating the merge sets.
*/