diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 6d6dbe70ff9..b175e338c51 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -1905,3 +1905,44 @@ ir3_supports_rpt(struct ir3_compiler *compiler, unsigned opc) return false; } } + +static bool +is_unmodified_full_gpr(struct ir3_register *src) +{ + return !(src->flags & (IR3_REG_HALF | IR3_REG_CONST | IR3_REG_IMMED | + IR3_REG_RELATIV | IR3_REG_FNEG | IR3_REG_FABS | + IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT)); +} + +/* Does `instr` move half of its full GPR src to its half dst? If this is the + * case, and RA assigns overlapping registers to src and dst, the instruction + * can be removed in mergedregs mode. + */ +enum ir3_subreg_move +ir3_is_subreg_move(struct ir3_instruction *instr) +{ + if (instr->opc == OPC_MOV) { + /* `cov.u32u16 hdst, src`: moves lower half of src to hdst. */ + struct ir3_register *src = instr->srcs[0]; + struct ir3_register *dst = instr->dsts[0]; + + if (instr->cat1.src_type == TYPE_U32 && + instr->cat1.dst_type == TYPE_U16 && is_unmodified_full_gpr(src) && + (src->flags & IR3_REG_SHARED) == (dst->flags & IR3_REG_SHARED)) { + return IR3_SUBREG_MOVE_LOWER; + } + } else if (instr->opc == OPC_SHR_B || instr->opc == OPC_ASHR_B) { + /* `[a]shr.b hdst, src, 16`: moves upper half of src to hdst. */ + struct ir3_register *src = instr->srcs[0]; + struct ir3_register *shamt = instr->srcs[1]; + struct ir3_register *dst = instr->dsts[0]; + + if ((dst->flags & IR3_REG_HALF) && is_unmodified_full_gpr(src) && + ((src->flags & IR3_REG_SHARED) == (dst->flags & IR3_REG_SHARED)) && + (shamt->flags & IR3_REG_IMMED) && shamt->uim_val == 16) { + return IR3_SUBREG_MOVE_UPPER; + } + } + + return IR3_SUBREG_MOVE_NONE; +} diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index aa7e29165ed..b46000354c5 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1143,6 +1143,14 @@ is_subgroup_cond_mov_macro(struct ir3_instruction *instr) } } +enum ir3_subreg_move { + IR3_SUBREG_MOVE_NONE, + IR3_SUBREG_MOVE_LOWER, + IR3_SUBREG_MOVE_UPPER, +}; + +enum ir3_subreg_move ir3_is_subreg_move(struct ir3_instruction *instr); + static inline bool is_alu(struct ir3_instruction *instr) { diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index c901653281b..378a874cbfa 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -20,6 +20,7 @@ #include "instr-a3xx.h" #include "ir3.h" #include "ir3_context.h" +#include "ir3_ra.h" static struct ir3_instruction_rpt rpt_instr(struct ir3_instruction *instr, unsigned nrpt) @@ -5515,6 +5516,44 @@ collect_tex_prefetches(struct ir3_context *ctx, struct ir3 *ir) } } +static bool +is_noop_subreg_move(struct ir3_instruction *instr) +{ + enum ir3_subreg_move subreg_move = ir3_is_subreg_move(instr); + + if (subreg_move == IR3_SUBREG_MOVE_NONE) { + return false; + } + + struct ir3_register *src = instr->srcs[0]; + struct ir3_register *dst = instr->dsts[0]; + unsigned offset = subreg_move == IR3_SUBREG_MOVE_LOWER ? 0 : 1; + + return ra_num_to_physreg(dst->num, dst->flags) == + ra_num_to_physreg(src->num, src->flags) + offset; +} + +static bool +ir3_remove_noop_subreg_moves(struct ir3 *ir) +{ + if (!ir->compiler->mergedregs) { + return false; + } + + bool progress = false; + + foreach_block (block, &ir->block_list) { + foreach_instr_safe (instr, &block->instr_list) { + if (is_noop_subreg_move(instr)) { + ir3_instr_remove(instr); + progress = true; + } + } + } + + return progress; +} + int ir3_compile_shader_nir(struct ir3_compiler *compiler, struct ir3_shader *shader, @@ -5846,6 +5885,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, goto out; } + IR3_PASS(ir, ir3_remove_noop_subreg_moves); IR3_PASS(ir, ir3_merge_rpt, so); IR3_PASS(ir, ir3_postsched, so); diff --git a/src/freedreno/ir3/ir3_merge_regs.c b/src/freedreno/ir3/ir3_merge_regs.c index dba68af0d37..cd4309387eb 100644 --- a/src/freedreno/ir3/ir3_merge_regs.c +++ b/src/freedreno/ir3/ir3_merge_regs.c @@ -384,6 +384,19 @@ aggressive_coalesce_collect(struct ir3_liveness *live, } } +static void +aggressive_coalesce_subreg_move(struct ir3_liveness *live, + struct ir3_instruction *instr) +{ + enum ir3_subreg_move subreg_move = ir3_is_subreg_move(instr); + + if (subreg_move != IR3_SUBREG_MOVE_NONE && + (instr->dsts[0]->flags & IR3_REG_SSA)) { + unsigned offset = subreg_move == IR3_SUBREG_MOVE_LOWER ? 0 : 1; + try_merge_defs(live, instr->srcs[0]->def, instr->dsts[0], offset); + } +} + static void aggressive_coalesce_rpt(struct ir3_liveness *live, struct ir3_instruction *instr) @@ -605,6 +618,7 @@ ir3_aggressive_coalesce(struct ir3_liveness *live, aggressive_coalesce_parallel_copy(live, instr); break; default: + aggressive_coalesce_subreg_move(live, instr); break; } }