diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 0c3b54d7ba9..6e4e81270a2 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -2341,6 +2341,20 @@ regmask_set(regmask_t *regmask, struct ir3_register *reg) } } +static inline void +regmask_clear(regmask_t *regmask, struct ir3_register *reg) +{ + bool half = reg->flags & IR3_REG_HALF; + if (reg->flags & IR3_REG_RELATIV) { + for (unsigned i = 0; i < reg->size; i++) + __regmask_clear(regmask, half, reg->array.base + i); + } else { + for (unsigned mask = reg->wrmask, n = reg->num; mask; mask >>= 1, n++) + if (mask & 1) + __regmask_clear(regmask, half, n); + } +} + static inline bool regmask_get(regmask_t *regmask, struct ir3_register *reg) { diff --git a/src/freedreno/ir3/ir3_delay.c b/src/freedreno/ir3/ir3_delay.c index 8a5a57dc756..f4a748cc3eb 100644 --- a/src/freedreno/ir3/ir3_delay.c +++ b/src/freedreno/ir3/ir3_delay.c @@ -237,8 +237,11 @@ delay_calc_srcn_postra(struct ir3_instruction *assigner, static unsigned delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start, struct ir3_instruction *consumer, unsigned distance, - bool soft, bool pred, bool mergedregs) + bool soft, bool pred, regmask_t *in_mask, bool mergedregs) { + regmask_t mask; + memcpy(&mask, in_mask, sizeof(mask)); + unsigned delay = 0; /* Search backwards starting at the instruction before start, unless it's * NULL then search backwards from the block end. @@ -261,6 +264,8 @@ delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start, foreach_dst_n (dst, dst_n, assigner) { if (dst->wrmask == 0) continue; + if (!regmask_get(&mask, dst)) + continue; foreach_src_n (src, src_n, consumer) { if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) continue; @@ -269,6 +274,7 @@ delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start, assigner, consumer, dst_n, src_n, soft, mergedregs); new_delay = MAX2(new_delay, src_delay); } + regmask_clear(&mask, dst); } new_delay = new_delay > distance ? new_delay - distance : 0; @@ -298,7 +304,7 @@ delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start, for (unsigned i = 0; i < block->predecessors_count; i++) { struct ir3_block *pred = block->predecessors[i]; unsigned pred_delay = delay_calc_postra(pred, NULL, consumer, distance, - soft, pred, mergedregs); + soft, pred, &mask, mergedregs); delay = MAX2(delay, pred_delay); } @@ -323,7 +329,14 @@ unsigned ir3_delay_calc_postra(struct ir3_block *block, struct ir3_instruction *instr, bool soft, bool mergedregs) { - return delay_calc_postra(block, NULL, instr, 0, soft, false, mergedregs); + regmask_t mask; + regmask_init(&mask, mergedregs); + foreach_src (src, instr) { + if (!(src->flags & (IR3_REG_IMMED | IR3_REG_CONST))) + regmask_set(&mask, src); + } + + return delay_calc_postra(block, NULL, instr, 0, soft, false, &mask, mergedregs); } /** @@ -334,7 +347,14 @@ unsigned ir3_delay_calc_exact(struct ir3_block *block, struct ir3_instruction *instr, bool mergedregs) { - return delay_calc_postra(block, NULL, instr, 0, false, true, mergedregs); + regmask_t mask; + regmask_init(&mask, mergedregs); + foreach_src (src, instr) { + if (!(src->flags & (IR3_REG_IMMED | IR3_REG_CONST))) + regmask_set(&mask, src); + } + + return delay_calc_postra(block, NULL, instr, 0, false, true, &mask, mergedregs); } /** diff --git a/src/freedreno/ir3/tests/delay.c b/src/freedreno/ir3/tests/delay.c index 018ade53a96..4f8e072ef6b 100644 --- a/src/freedreno/ir3/tests/delay.c +++ b/src/freedreno/ir3/tests/delay.c @@ -61,6 +61,11 @@ static const struct test { mov.f32f32 r0.z, c0.z mad.f32 r0.x, r0.x, r0.y, r0.z ), + TEST(0, + mov.f32f32 r0.x, c0.x + rcp r0.x, r0.y + add.f r0.x, r0.x, c0.x + ), TEST(2, mov.f32f32 r0.x, c0.x mov.f32f32 r0.y, c0.y