ir3: add subreg move optimization
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Certain instructions essentially behave as a move of half of their full
src to their half dst. More specifically:
- `cov.u32u16 hdst, src`: moves lower half of src to hdst.
- `[a]shr.b hdst, src, 16`: moves upper half of src to hdst.

In mergedregs mode, if the src and dst of these instructions are
assigned overlapping registers, they can be removed.

Implement this by 1) merging the src and dst merge sets of such
instruction before RA, and 2) removing them if RA assigned overlapping
registers.

Totals from 7483 (4.55% of 164575) affected shaders:
Instrs: 8913039 -> 8859209 (-0.60%); split: -0.62%, +0.01%
CodeSize: 16588988 -> 16489082 (-0.60%); split: -0.61%, +0.00%
NOPs: 2020848 -> 2013070 (-0.38%); split: -0.71%, +0.33%
MOVs: 352179 -> 352146 (-0.01%); split: -0.06%, +0.05%
COVs: 256946 -> 242972 (-5.44%)
Full: 145737 -> 145738 (+0.00%)
(ss): 224816 -> 222102 (-1.21%); split: -1.24%, +0.03%
(sy): 109208 -> 109222 (+0.01%); split: -0.01%, +0.02%
(ss)-stall: 842387 -> 831457 (-1.30%); split: -1.63%, +0.33%
(sy)-stall: 3353188 -> 3337732 (-0.46%); split: -0.62%, +0.16%
Preamble Instrs: 1403333 -> 1401362 (-0.14%)
Cat0: 2219312 -> 2211530 (-0.35%); split: -0.65%, +0.30%
Cat1: 690367 -> 677240 (-1.90%); split: -1.99%, +0.09%
Cat2: 3279215 -> 3246293 (-1.00%)
Cat7: 412865 -> 412866 (+0.00%)

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35858>
This commit is contained in:
Job Noorman 2025-07-03 17:09:26 +02:00 committed by Marge Bot
parent 28e810eb74
commit c757b22c5f
4 changed files with 103 additions and 0 deletions

View file

@ -1905,3 +1905,44 @@ ir3_supports_rpt(struct ir3_compiler *compiler, unsigned opc)
return false; return false;
} }
} }
static bool
is_unmodified_full_gpr(struct ir3_register *src)
{
return !(src->flags & (IR3_REG_HALF | IR3_REG_CONST | IR3_REG_IMMED |
IR3_REG_RELATIV | IR3_REG_FNEG | IR3_REG_FABS |
IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT));
}
/* Does `instr` move half of its full GPR src to its half dst? If this is the
* case, and RA assigns overlapping registers to src and dst, the instruction
* can be removed in mergedregs mode.
*/
enum ir3_subreg_move
ir3_is_subreg_move(struct ir3_instruction *instr)
{
if (instr->opc == OPC_MOV) {
/* `cov.u32u16 hdst, src`: moves lower half of src to hdst. */
struct ir3_register *src = instr->srcs[0];
struct ir3_register *dst = instr->dsts[0];
if (instr->cat1.src_type == TYPE_U32 &&
instr->cat1.dst_type == TYPE_U16 && is_unmodified_full_gpr(src) &&
(src->flags & IR3_REG_SHARED) == (dst->flags & IR3_REG_SHARED)) {
return IR3_SUBREG_MOVE_LOWER;
}
} else if (instr->opc == OPC_SHR_B || instr->opc == OPC_ASHR_B) {
/* `[a]shr.b hdst, src, 16`: moves upper half of src to hdst. */
struct ir3_register *src = instr->srcs[0];
struct ir3_register *shamt = instr->srcs[1];
struct ir3_register *dst = instr->dsts[0];
if ((dst->flags & IR3_REG_HALF) && is_unmodified_full_gpr(src) &&
((src->flags & IR3_REG_SHARED) == (dst->flags & IR3_REG_SHARED)) &&
(shamt->flags & IR3_REG_IMMED) && shamt->uim_val == 16) {
return IR3_SUBREG_MOVE_UPPER;
}
}
return IR3_SUBREG_MOVE_NONE;
}

View file

@ -1143,6 +1143,14 @@ is_subgroup_cond_mov_macro(struct ir3_instruction *instr)
} }
} }
enum ir3_subreg_move {
IR3_SUBREG_MOVE_NONE,
IR3_SUBREG_MOVE_LOWER,
IR3_SUBREG_MOVE_UPPER,
};
enum ir3_subreg_move ir3_is_subreg_move(struct ir3_instruction *instr);
static inline bool static inline bool
is_alu(struct ir3_instruction *instr) is_alu(struct ir3_instruction *instr)
{ {

View file

@ -20,6 +20,7 @@
#include "instr-a3xx.h" #include "instr-a3xx.h"
#include "ir3.h" #include "ir3.h"
#include "ir3_context.h" #include "ir3_context.h"
#include "ir3_ra.h"
static struct ir3_instruction_rpt static struct ir3_instruction_rpt
rpt_instr(struct ir3_instruction *instr, unsigned nrpt) rpt_instr(struct ir3_instruction *instr, unsigned nrpt)
@ -5515,6 +5516,44 @@ collect_tex_prefetches(struct ir3_context *ctx, struct ir3 *ir)
} }
} }
static bool
is_noop_subreg_move(struct ir3_instruction *instr)
{
enum ir3_subreg_move subreg_move = ir3_is_subreg_move(instr);
if (subreg_move == IR3_SUBREG_MOVE_NONE) {
return false;
}
struct ir3_register *src = instr->srcs[0];
struct ir3_register *dst = instr->dsts[0];
unsigned offset = subreg_move == IR3_SUBREG_MOVE_LOWER ? 0 : 1;
return ra_num_to_physreg(dst->num, dst->flags) ==
ra_num_to_physreg(src->num, src->flags) + offset;
}
static bool
ir3_remove_noop_subreg_moves(struct ir3 *ir)
{
if (!ir->compiler->mergedregs) {
return false;
}
bool progress = false;
foreach_block (block, &ir->block_list) {
foreach_instr_safe (instr, &block->instr_list) {
if (is_noop_subreg_move(instr)) {
ir3_instr_remove(instr);
progress = true;
}
}
}
return progress;
}
int int
ir3_compile_shader_nir(struct ir3_compiler *compiler, ir3_compile_shader_nir(struct ir3_compiler *compiler,
struct ir3_shader *shader, struct ir3_shader *shader,
@ -5846,6 +5885,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
goto out; goto out;
} }
IR3_PASS(ir, ir3_remove_noop_subreg_moves);
IR3_PASS(ir, ir3_merge_rpt, so); IR3_PASS(ir, ir3_merge_rpt, so);
IR3_PASS(ir, ir3_postsched, so); IR3_PASS(ir, ir3_postsched, so);

View file

@ -384,6 +384,19 @@ aggressive_coalesce_collect(struct ir3_liveness *live,
} }
} }
static void
aggressive_coalesce_subreg_move(struct ir3_liveness *live,
struct ir3_instruction *instr)
{
enum ir3_subreg_move subreg_move = ir3_is_subreg_move(instr);
if (subreg_move != IR3_SUBREG_MOVE_NONE &&
(instr->dsts[0]->flags & IR3_REG_SSA)) {
unsigned offset = subreg_move == IR3_SUBREG_MOVE_LOWER ? 0 : 1;
try_merge_defs(live, instr->srcs[0]->def, instr->dsts[0], offset);
}
}
static void static void
aggressive_coalesce_rpt(struct ir3_liveness *live, aggressive_coalesce_rpt(struct ir3_liveness *live,
struct ir3_instruction *instr) struct ir3_instruction *instr)
@ -605,6 +618,7 @@ ir3_aggressive_coalesce(struct ir3_liveness *live,
aggressive_coalesce_parallel_copy(live, instr); aggressive_coalesce_parallel_copy(live, instr);
break; break;
default: default:
aggressive_coalesce_subreg_move(live, instr);
break; break;
} }
} }