From c757b22c5fac6bb7c1e363244c96c5843683ba82 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Thu, 3 Jul 2025 17:09:26 +0200 Subject: [PATCH] ir3: add subreg move optimization Certain instructions essentially behave as a move of half of their full src to their half dst. More specifically: - `cov.u32u16 hdst, src`: moves lower half of src to hdst. - `[a]shr.b hdst, src, 16`: moves upper half of src to hdst. In mergedregs mode, if the src and dst of these instructions are assigned overlapping registers, they can be removed. Implement this by 1) merging the src and dst merge sets of such instruction before RA, and 2) removing them if RA assigned overlapping registers. Totals from 7483 (4.55% of 164575) affected shaders: Instrs: 8913039 -> 8859209 (-0.60%); split: -0.62%, +0.01% CodeSize: 16588988 -> 16489082 (-0.60%); split: -0.61%, +0.00% NOPs: 2020848 -> 2013070 (-0.38%); split: -0.71%, +0.33% MOVs: 352179 -> 352146 (-0.01%); split: -0.06%, +0.05% COVs: 256946 -> 242972 (-5.44%) Full: 145737 -> 145738 (+0.00%) (ss): 224816 -> 222102 (-1.21%); split: -1.24%, +0.03% (sy): 109208 -> 109222 (+0.01%); split: -0.01%, +0.02% (ss)-stall: 842387 -> 831457 (-1.30%); split: -1.63%, +0.33% (sy)-stall: 3353188 -> 3337732 (-0.46%); split: -0.62%, +0.16% Preamble Instrs: 1403333 -> 1401362 (-0.14%) Cat0: 2219312 -> 2211530 (-0.35%); split: -0.65%, +0.30% Cat1: 690367 -> 677240 (-1.90%); split: -1.99%, +0.09% Cat2: 3279215 -> 3246293 (-1.00%) Cat7: 412865 -> 412866 (+0.00%) Signed-off-by: Job Noorman Part-of: --- src/freedreno/ir3/ir3.c | 41 ++++++++++++++++++++++++++++ src/freedreno/ir3/ir3.h | 8 ++++++ src/freedreno/ir3/ir3_compiler_nir.c | 40 +++++++++++++++++++++++++++ src/freedreno/ir3/ir3_merge_regs.c | 14 ++++++++++ 4 files changed, 103 insertions(+) diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 6d6dbe70ff9..b175e338c51 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -1905,3 +1905,44 @@ ir3_supports_rpt(struct ir3_compiler *compiler, unsigned opc) return false; } } + +static bool +is_unmodified_full_gpr(struct ir3_register *src) +{ + return !(src->flags & (IR3_REG_HALF | IR3_REG_CONST | IR3_REG_IMMED | + IR3_REG_RELATIV | IR3_REG_FNEG | IR3_REG_FABS | + IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT)); +} + +/* Does `instr` move half of its full GPR src to its half dst? If this is the + * case, and RA assigns overlapping registers to src and dst, the instruction + * can be removed in mergedregs mode. + */ +enum ir3_subreg_move +ir3_is_subreg_move(struct ir3_instruction *instr) +{ + if (instr->opc == OPC_MOV) { + /* `cov.u32u16 hdst, src`: moves lower half of src to hdst. */ + struct ir3_register *src = instr->srcs[0]; + struct ir3_register *dst = instr->dsts[0]; + + if (instr->cat1.src_type == TYPE_U32 && + instr->cat1.dst_type == TYPE_U16 && is_unmodified_full_gpr(src) && + (src->flags & IR3_REG_SHARED) == (dst->flags & IR3_REG_SHARED)) { + return IR3_SUBREG_MOVE_LOWER; + } + } else if (instr->opc == OPC_SHR_B || instr->opc == OPC_ASHR_B) { + /* `[a]shr.b hdst, src, 16`: moves upper half of src to hdst. */ + struct ir3_register *src = instr->srcs[0]; + struct ir3_register *shamt = instr->srcs[1]; + struct ir3_register *dst = instr->dsts[0]; + + if ((dst->flags & IR3_REG_HALF) && is_unmodified_full_gpr(src) && + ((src->flags & IR3_REG_SHARED) == (dst->flags & IR3_REG_SHARED)) && + (shamt->flags & IR3_REG_IMMED) && shamt->uim_val == 16) { + return IR3_SUBREG_MOVE_UPPER; + } + } + + return IR3_SUBREG_MOVE_NONE; +} diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index aa7e29165ed..b46000354c5 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1143,6 +1143,14 @@ is_subgroup_cond_mov_macro(struct ir3_instruction *instr) } } +enum ir3_subreg_move { + IR3_SUBREG_MOVE_NONE, + IR3_SUBREG_MOVE_LOWER, + IR3_SUBREG_MOVE_UPPER, +}; + +enum ir3_subreg_move ir3_is_subreg_move(struct ir3_instruction *instr); + static inline bool is_alu(struct ir3_instruction *instr) { diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index c901653281b..378a874cbfa 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -20,6 +20,7 @@ #include "instr-a3xx.h" #include "ir3.h" #include "ir3_context.h" +#include "ir3_ra.h" static struct ir3_instruction_rpt rpt_instr(struct ir3_instruction *instr, unsigned nrpt) @@ -5515,6 +5516,44 @@ collect_tex_prefetches(struct ir3_context *ctx, struct ir3 *ir) } } +static bool +is_noop_subreg_move(struct ir3_instruction *instr) +{ + enum ir3_subreg_move subreg_move = ir3_is_subreg_move(instr); + + if (subreg_move == IR3_SUBREG_MOVE_NONE) { + return false; + } + + struct ir3_register *src = instr->srcs[0]; + struct ir3_register *dst = instr->dsts[0]; + unsigned offset = subreg_move == IR3_SUBREG_MOVE_LOWER ? 0 : 1; + + return ra_num_to_physreg(dst->num, dst->flags) == + ra_num_to_physreg(src->num, src->flags) + offset; +} + +static bool +ir3_remove_noop_subreg_moves(struct ir3 *ir) +{ + if (!ir->compiler->mergedregs) { + return false; + } + + bool progress = false; + + foreach_block (block, &ir->block_list) { + foreach_instr_safe (instr, &block->instr_list) { + if (is_noop_subreg_move(instr)) { + ir3_instr_remove(instr); + progress = true; + } + } + } + + return progress; +} + int ir3_compile_shader_nir(struct ir3_compiler *compiler, struct ir3_shader *shader, @@ -5846,6 +5885,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, goto out; } + IR3_PASS(ir, ir3_remove_noop_subreg_moves); IR3_PASS(ir, ir3_merge_rpt, so); IR3_PASS(ir, ir3_postsched, so); diff --git a/src/freedreno/ir3/ir3_merge_regs.c b/src/freedreno/ir3/ir3_merge_regs.c index dba68af0d37..cd4309387eb 100644 --- a/src/freedreno/ir3/ir3_merge_regs.c +++ b/src/freedreno/ir3/ir3_merge_regs.c @@ -384,6 +384,19 @@ aggressive_coalesce_collect(struct ir3_liveness *live, } } +static void +aggressive_coalesce_subreg_move(struct ir3_liveness *live, + struct ir3_instruction *instr) +{ + enum ir3_subreg_move subreg_move = ir3_is_subreg_move(instr); + + if (subreg_move != IR3_SUBREG_MOVE_NONE && + (instr->dsts[0]->flags & IR3_REG_SSA)) { + unsigned offset = subreg_move == IR3_SUBREG_MOVE_LOWER ? 0 : 1; + try_merge_defs(live, instr->srcs[0]->def, instr->dsts[0], offset); + } +} + static void aggressive_coalesce_rpt(struct ir3_liveness *live, struct ir3_instruction *instr) @@ -605,6 +618,7 @@ ir3_aggressive_coalesce(struct ir3_liveness *live, aggressive_coalesce_parallel_copy(live, instr); break; default: + aggressive_coalesce_subreg_move(live, instr); break; } }