ir3: don't create merge sets for subreg moves
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

There are multiple places where RA assumes merge sets are either
all-full or all-half registers. Creating merge sets for subreg moves
mixes full and half registers which may lead to RA failures.

Fix this by not creating merge sets for subreg moves anymore. Instead,
we manually try to allocate  a subreg move's src for its dst when
selecting a register during RA, similar to how ALU/SFU instructions try
to reuse their srcs.

Totals:
Instrs: 363174291 -> 363175216 (+0.00%); split: -0.00%, +0.00%
CodeSize: 922975364 -> 922977230 (+0.00%); split: -0.00%, +0.00%
NOPs: 47652421 -> 47652444 (+0.00%); split: -0.00%, +0.00%
MOVs: 15652959 -> 15653065 (+0.00%); split: -0.00%, +0.00%
COVs: 4097203 -> 4097052 (-0.00%); split: -0.01%, +0.00%
(ss): 7806025 -> 7806183 (+0.00%); split: -0.00%, +0.00%
(sy): 3981862 -> 3981855 (-0.00%); split: -0.00%, +0.00%
(ss)-stall: 26612057 -> 26612789 (+0.00%); split: -0.00%, +0.00%
(sy)-stall: 111568786 -> 111568721 (-0.00%); split: -0.00%, +0.00%
STPs: 345796 -> 345792 (-0.00%)
LDPs: 191118 -> 191111 (-0.00%)
Preamble Instrs: 160491915 -> 160492355 (+0.00%); split: -0.00%, +0.00%
Last helper: 116587870 -> 116588273 (+0.00%); split: -0.00%, +0.00%
Cat0: 53288367 -> 53288384 (+0.00%); split: -0.00%, +0.00%
Cat1: 20954383 -> 20954336 (-0.00%); split: -0.00%, +0.00%
Cat2: 155294307 -> 155295252 (+0.00%); split: -0.00%, +0.00%
Cat6: 4623070 -> 4623059 (-0.00%)
Cat7: 9302363 -> 9302384 (+0.00%); split: -0.00%, +0.00%

Totals from 979 (0.07% of 1352016) affected shaders:
Instrs: 1324850 -> 1325775 (+0.07%); split: -0.07%, +0.14%
CodeSize: 2596114 -> 2597980 (+0.07%); split: -0.04%, +0.11%
NOPs: 330197 -> 330220 (+0.01%); split: -0.23%, +0.24%
MOVs: 62592 -> 62698 (+0.17%); split: -0.35%, +0.52%
COVs: 49011 -> 48860 (-0.31%); split: -0.62%, +0.31%
(ss): 35671 -> 35829 (+0.44%); split: -0.28%, +0.73%
(sy): 18936 -> 18929 (-0.04%); split: -0.13%, +0.09%
(ss)-stall: 157929 -> 158661 (+0.46%); split: -0.36%, +0.82%
(sy)-stall: 543371 -> 543306 (-0.01%); split: -0.20%, +0.19%
STPs: 2741 -> 2737 (-0.15%)
LDPs: 3022 -> 3015 (-0.23%)
Preamble Instrs: 322588 -> 323028 (+0.14%); split: -0.01%, +0.14%
Last helper: 298996 -> 299399 (+0.13%); split: -0.05%, +0.19%
Cat0: 361575 -> 361592 (+0.00%); split: -0.21%, +0.22%
Cat1: 111733 -> 111686 (-0.04%); split: -0.45%, +0.41%
Cat2: 487366 -> 488311 (+0.19%); split: -0.04%, +0.23%
Cat6: 21239 -> 21228 (-0.05%)
Cat7: 37170 -> 37191 (+0.06%); split: -0.06%, +0.12%

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Fixes: c757b22c5f ("ir3: add subreg move optimization")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37368>
This commit is contained in:
Job Noorman 2025-09-15 12:30:58 +02:00 committed by Marge Bot
parent efc2bb64ce
commit 563b0b347a
2 changed files with 38 additions and 14 deletions

View file

@ -384,19 +384,6 @@ aggressive_coalesce_collect(struct ir3_liveness *live,
}
}
static void
aggressive_coalesce_subreg_move(struct ir3_liveness *live,
struct ir3_instruction *instr)
{
enum ir3_subreg_move subreg_move = ir3_is_subreg_move(instr);
if (subreg_move != IR3_SUBREG_MOVE_NONE &&
(instr->dsts[0]->flags & IR3_REG_SSA)) {
unsigned offset = subreg_move == IR3_SUBREG_MOVE_LOWER ? 0 : 1;
try_merge_defs(live, instr->srcs[0]->def, instr->dsts[0], offset);
}
}
static void
aggressive_coalesce_rpt(struct ir3_liveness *live,
struct ir3_instruction *instr)
@ -618,7 +605,6 @@ ir3_aggressive_coalesce(struct ir3_liveness *live,
aggressive_coalesce_parallel_copy(live, instr);
break;
default:
aggressive_coalesce_subreg_move(live, instr);
break;
}
}

View file

@ -1384,6 +1384,34 @@ try_allocate_src(struct ra_ctx *ctx, struct ra_file *file,
return ~0;
}
static physreg_t
try_allocate_src_subreg(struct ra_ctx *ctx, struct ra_file *file,
struct ir3_register *reg,
enum ir3_subreg_move subreg_move)
{
assert(subreg_move != IR3_SUBREG_MOVE_NONE);
/* Subreg moves always write a half register. */
assert(reg_elem_size(reg) == 1);
struct ir3_register *src = reg->instr->srcs[0];
if (!ra_reg_is_src(src) || ra_get_file(ctx, src) != file)
return ~0;
unsigned offset = subreg_move == IR3_SUBREG_MOVE_LOWER ? 0 : 1;
struct ra_interval *src_interval = ra_interval_get(ctx, src->def);
physreg_t src_physreg = ra_interval_get_physreg(src_interval) + offset;
unsigned file_size = reg_file_size(file, reg);
unsigned size = reg_size(reg);
if (src_physreg + size <= file_size &&
get_reg_specified(ctx, file, reg, src_physreg, false)) {
return src_physreg;
}
return ~0;
}
static bool
rpt_has_unique_merge_set(struct ir3_instruction *instr)
{
@ -1460,6 +1488,16 @@ get_reg(struct ra_ctx *ctx, struct ra_file *file, struct ir3_register *reg)
}
}
/* For subreg moves (see ir3_is_subreg_move), try to allocate half of their
* full src for their dst. If this succeeds, the instruction can be removed.
*/
enum ir3_subreg_move subreg_move = ir3_is_subreg_move(reg->instr);
if (subreg_move != IR3_SUBREG_MOVE_NONE) {
physreg_t src_reg = try_allocate_src_subreg(ctx, file, reg, subreg_move);
if (src_reg != (physreg_t)~0)
return src_reg;
}
/* For ALU and SFU instructions, if the src reg is avail to pick, use it.
* Because this doesn't introduce unnecessary dependencies, and it
* potentially avoids needing (ss) syncs for write after read hazards for