diff --git a/.pick_status.json b/.pick_status.json index 750a811bde0..84712fd0a95 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -4094,7 +4094,7 @@ "description": "ir3: don't create merge sets for subreg moves", "nominated": true, "nomination_type": 2, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "c757b22c5fac6bb7c1e363244c96c5843683ba82", "notes": null diff --git a/src/freedreno/ir3/ir3_merge_regs.c b/src/freedreno/ir3/ir3_merge_regs.c index cd4309387eb..dba68af0d37 100644 --- a/src/freedreno/ir3/ir3_merge_regs.c +++ b/src/freedreno/ir3/ir3_merge_regs.c @@ -384,19 +384,6 @@ aggressive_coalesce_collect(struct ir3_liveness *live, } } -static void -aggressive_coalesce_subreg_move(struct ir3_liveness *live, - struct ir3_instruction *instr) -{ - enum ir3_subreg_move subreg_move = ir3_is_subreg_move(instr); - - if (subreg_move != IR3_SUBREG_MOVE_NONE && - (instr->dsts[0]->flags & IR3_REG_SSA)) { - unsigned offset = subreg_move == IR3_SUBREG_MOVE_LOWER ? 0 : 1; - try_merge_defs(live, instr->srcs[0]->def, instr->dsts[0], offset); - } -} - static void aggressive_coalesce_rpt(struct ir3_liveness *live, struct ir3_instruction *instr) @@ -618,7 +605,6 @@ ir3_aggressive_coalesce(struct ir3_liveness *live, aggressive_coalesce_parallel_copy(live, instr); break; default: - aggressive_coalesce_subreg_move(live, instr); break; } } diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c index eddd9eb91cc..acc81d04a43 100644 --- a/src/freedreno/ir3/ir3_ra.c +++ b/src/freedreno/ir3/ir3_ra.c @@ -1382,6 +1382,34 @@ try_allocate_src(struct ra_ctx *ctx, struct ra_file *file, return ~0; } +static physreg_t +try_allocate_src_subreg(struct ra_ctx *ctx, struct ra_file *file, + struct ir3_register *reg, + enum ir3_subreg_move subreg_move) +{ + assert(subreg_move != IR3_SUBREG_MOVE_NONE); + + /* Subreg moves always write a half register. */ + assert(reg_elem_size(reg) == 1); + + struct ir3_register *src = reg->instr->srcs[0]; + if (!ra_reg_is_src(src) || ra_get_file(ctx, src) != file) + return ~0; + + unsigned offset = subreg_move == IR3_SUBREG_MOVE_LOWER ? 0 : 1; + struct ra_interval *src_interval = ra_interval_get(ctx, src->def); + physreg_t src_physreg = ra_interval_get_physreg(src_interval) + offset; + unsigned file_size = reg_file_size(file, reg); + unsigned size = reg_size(reg); + + if (src_physreg + size <= file_size && + get_reg_specified(ctx, file, reg, src_physreg, false)) { + return src_physreg; + } + + return ~0; +} + static bool rpt_has_unique_merge_set(struct ir3_instruction *instr) { @@ -1458,6 +1486,16 @@ get_reg(struct ra_ctx *ctx, struct ra_file *file, struct ir3_register *reg) } } + /* For subreg moves (see ir3_is_subreg_move), try to allocate half of their + * full src for their dst. If this succeeds, the instruction can be removed. + */ + enum ir3_subreg_move subreg_move = ir3_is_subreg_move(reg->instr); + if (subreg_move != IR3_SUBREG_MOVE_NONE) { + physreg_t src_reg = try_allocate_src_subreg(ctx, file, reg, subreg_move); + if (src_reg != (physreg_t)~0) + return src_reg; + } + /* For ALU and SFU instructions, if the src reg is avail to pick, use it. * Because this doesn't introduce unnecessary dependencies, and it * potentially avoids needing (ss) syncs for write after read hazards for