mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 19:40:10 +01:00
ir3: don't create merge sets for subreg moves
There are multiple places where RA assumes merge sets are either
all-full or all-half registers. Creating merge sets for subreg moves
mixes full and half registers which may lead to RA failures.
Fix this by not creating merge sets for subreg moves anymore. Instead,
we manually try to allocate a subreg move's src for its dst when
selecting a register during RA, similar to how ALU/SFU instructions try
to reuse their srcs.
Totals:
Instrs: 363174291 -> 363175216 (+0.00%); split: -0.00%, +0.00%
CodeSize: 922975364 -> 922977230 (+0.00%); split: -0.00%, +0.00%
NOPs: 47652421 -> 47652444 (+0.00%); split: -0.00%, +0.00%
MOVs: 15652959 -> 15653065 (+0.00%); split: -0.00%, +0.00%
COVs: 4097203 -> 4097052 (-0.00%); split: -0.01%, +0.00%
(ss): 7806025 -> 7806183 (+0.00%); split: -0.00%, +0.00%
(sy): 3981862 -> 3981855 (-0.00%); split: -0.00%, +0.00%
(ss)-stall: 26612057 -> 26612789 (+0.00%); split: -0.00%, +0.00%
(sy)-stall: 111568786 -> 111568721 (-0.00%); split: -0.00%, +0.00%
STPs: 345796 -> 345792 (-0.00%)
LDPs: 191118 -> 191111 (-0.00%)
Preamble Instrs: 160491915 -> 160492355 (+0.00%); split: -0.00%, +0.00%
Last helper: 116587870 -> 116588273 (+0.00%); split: -0.00%, +0.00%
Cat0: 53288367 -> 53288384 (+0.00%); split: -0.00%, +0.00%
Cat1: 20954383 -> 20954336 (-0.00%); split: -0.00%, +0.00%
Cat2: 155294307 -> 155295252 (+0.00%); split: -0.00%, +0.00%
Cat6: 4623070 -> 4623059 (-0.00%)
Cat7: 9302363 -> 9302384 (+0.00%); split: -0.00%, +0.00%
Totals from 979 (0.07% of 1352016) affected shaders:
Instrs: 1324850 -> 1325775 (+0.07%); split: -0.07%, +0.14%
CodeSize: 2596114 -> 2597980 (+0.07%); split: -0.04%, +0.11%
NOPs: 330197 -> 330220 (+0.01%); split: -0.23%, +0.24%
MOVs: 62592 -> 62698 (+0.17%); split: -0.35%, +0.52%
COVs: 49011 -> 48860 (-0.31%); split: -0.62%, +0.31%
(ss): 35671 -> 35829 (+0.44%); split: -0.28%, +0.73%
(sy): 18936 -> 18929 (-0.04%); split: -0.13%, +0.09%
(ss)-stall: 157929 -> 158661 (+0.46%); split: -0.36%, +0.82%
(sy)-stall: 543371 -> 543306 (-0.01%); split: -0.20%, +0.19%
STPs: 2741 -> 2737 (-0.15%)
LDPs: 3022 -> 3015 (-0.23%)
Preamble Instrs: 322588 -> 323028 (+0.14%); split: -0.01%, +0.14%
Last helper: 298996 -> 299399 (+0.13%); split: -0.05%, +0.19%
Cat0: 361575 -> 361592 (+0.00%); split: -0.21%, +0.22%
Cat1: 111733 -> 111686 (-0.04%); split: -0.45%, +0.41%
Cat2: 487366 -> 488311 (+0.19%); split: -0.04%, +0.23%
Cat6: 21239 -> 21228 (-0.05%)
Cat7: 37170 -> 37191 (+0.06%); split: -0.06%, +0.12%
Signed-off-by: Job Noorman <jnoorman@igalia.com>
Fixes: c757b22c5f ("ir3: add subreg move optimization")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37368>
This commit is contained in:
parent
efc2bb64ce
commit
563b0b347a
2 changed files with 38 additions and 14 deletions
|
|
@ -384,19 +384,6 @@ aggressive_coalesce_collect(struct ir3_liveness *live,
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
aggressive_coalesce_subreg_move(struct ir3_liveness *live,
|
||||
struct ir3_instruction *instr)
|
||||
{
|
||||
enum ir3_subreg_move subreg_move = ir3_is_subreg_move(instr);
|
||||
|
||||
if (subreg_move != IR3_SUBREG_MOVE_NONE &&
|
||||
(instr->dsts[0]->flags & IR3_REG_SSA)) {
|
||||
unsigned offset = subreg_move == IR3_SUBREG_MOVE_LOWER ? 0 : 1;
|
||||
try_merge_defs(live, instr->srcs[0]->def, instr->dsts[0], offset);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
aggressive_coalesce_rpt(struct ir3_liveness *live,
|
||||
struct ir3_instruction *instr)
|
||||
|
|
@ -618,7 +605,6 @@ ir3_aggressive_coalesce(struct ir3_liveness *live,
|
|||
aggressive_coalesce_parallel_copy(live, instr);
|
||||
break;
|
||||
default:
|
||||
aggressive_coalesce_subreg_move(live, instr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1384,6 +1384,34 @@ try_allocate_src(struct ra_ctx *ctx, struct ra_file *file,
|
|||
return ~0;
|
||||
}
|
||||
|
||||
static physreg_t
|
||||
try_allocate_src_subreg(struct ra_ctx *ctx, struct ra_file *file,
|
||||
struct ir3_register *reg,
|
||||
enum ir3_subreg_move subreg_move)
|
||||
{
|
||||
assert(subreg_move != IR3_SUBREG_MOVE_NONE);
|
||||
|
||||
/* Subreg moves always write a half register. */
|
||||
assert(reg_elem_size(reg) == 1);
|
||||
|
||||
struct ir3_register *src = reg->instr->srcs[0];
|
||||
if (!ra_reg_is_src(src) || ra_get_file(ctx, src) != file)
|
||||
return ~0;
|
||||
|
||||
unsigned offset = subreg_move == IR3_SUBREG_MOVE_LOWER ? 0 : 1;
|
||||
struct ra_interval *src_interval = ra_interval_get(ctx, src->def);
|
||||
physreg_t src_physreg = ra_interval_get_physreg(src_interval) + offset;
|
||||
unsigned file_size = reg_file_size(file, reg);
|
||||
unsigned size = reg_size(reg);
|
||||
|
||||
if (src_physreg + size <= file_size &&
|
||||
get_reg_specified(ctx, file, reg, src_physreg, false)) {
|
||||
return src_physreg;
|
||||
}
|
||||
|
||||
return ~0;
|
||||
}
|
||||
|
||||
static bool
|
||||
rpt_has_unique_merge_set(struct ir3_instruction *instr)
|
||||
{
|
||||
|
|
@ -1460,6 +1488,16 @@ get_reg(struct ra_ctx *ctx, struct ra_file *file, struct ir3_register *reg)
|
|||
}
|
||||
}
|
||||
|
||||
/* For subreg moves (see ir3_is_subreg_move), try to allocate half of their
|
||||
* full src for their dst. If this succeeds, the instruction can be removed.
|
||||
*/
|
||||
enum ir3_subreg_move subreg_move = ir3_is_subreg_move(reg->instr);
|
||||
if (subreg_move != IR3_SUBREG_MOVE_NONE) {
|
||||
physreg_t src_reg = try_allocate_src_subreg(ctx, file, reg, subreg_move);
|
||||
if (src_reg != (physreg_t)~0)
|
||||
return src_reg;
|
||||
}
|
||||
|
||||
/* For ALU and SFU instructions, if the src reg is avail to pick, use it.
|
||||
* Because this doesn't introduce unnecessary dependencies, and it
|
||||
* potentially avoids needing (ss) syncs for write after read hazards for
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue