diff --git a/.pick_status.json b/.pick_status.json index 744c546236a..7c4421605a8 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -2272,7 +2272,7 @@ "description": "ir3/cp: ir3: Prevent propagating shared regs out of loops harder", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "590efd180be05817163d1b70990273b535a82afe" }, diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c index e6c7d520d27..2c4d2891163 100644 --- a/src/freedreno/ir3/ir3_cp.c +++ b/src/freedreno/ir3/ir3_cp.c @@ -303,6 +303,22 @@ try_swap_mad_two_srcs(struct ir3_instruction *instr, unsigned new_flags) return valid_swap; } +/* Values that are uniform inside a loop can become divergent outside + * it if the loop has a divergent trip count. This means that we can't + * propagate a copy of a shared to non-shared register if it would + * make the shared reg's live range extend outside of its loop. Users + * outside the loop would see the value for the thread(s) that last + * exited the loop, rather than for their own thread. + */ +static bool +is_valid_shared_copy(struct ir3_instruction *dst_instr, + struct ir3_instruction *src_instr, + struct ir3_register *src_reg) +{ + return !(src_reg->flags & IR3_REG_SHARED) || + dst_instr->block->loop_id == src_instr->block->loop_id; +} + /** * Handle cp for a given src register. This additionally handles * the cases of collapsing immedate/const (which replace the src @@ -316,22 +332,14 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, { struct ir3_instruction *src = ssa(reg); - /* Values that are uniform inside a loop can become divergent outside - * it if the loop has a divergent trip count. This means that we can't - * propagate a copy of a shared to non-shared register if it would - * make the shared reg's live range extend outside of its loop. Users - * outside the loop would see the value for the thread(s) that last - * exited the loop, rather than for their own thread. - */ - if ((src->dsts[0]->flags & IR3_REG_SHARED) && - src->block->loop_id != instr->block->loop_id) - return false; - if (is_eligible_mov(src, instr, true)) { /* simple case, no immed/const/relativ, only mov's w/ ssa src: */ struct ir3_register *src_reg = src->srcs[0]; unsigned new_flags = reg->flags; + if (!is_valid_shared_copy(instr, src, src_reg)) + return false; + combine_flags(&new_flags, src); if (ir3_valid_flags(instr, n, new_flags)) { @@ -357,6 +365,9 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, struct ir3_register *src_reg = src->srcs[0]; unsigned new_flags = reg->flags; + if (!is_valid_shared_copy(instr, src, src_reg)) + return false; + if (src_reg->flags & IR3_REG_ARRAY) return false;