ir3/sched: Convert to srcs/dsts arrays

Also change the indexing in ir3_delayslots, so it's finally sane! To do this we also have to change foreach_ssa_src_n to index srcs instead of regs, so that the indexing stays in sync. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11469>
2025-12-24 15:20:10 +01:00 · 2021-06-18 16:13:34 +02:00 · 2021-06-18 16:13:34 +02:00 · 50994eeabf
commit 50994eeabf
parent 132dfacdcb
4 changed files with 34 additions and 40 deletions
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@ -1356,12 +1356,12 @@ ir3_try_swap_signedness(opc_t opc, bool *can_swap)

 static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
 {
-	return instr->regs_count + instr->deps_count;
+	return instr->srcs_count + instr->deps_count;
 }

 static inline bool __is_false_dep(struct ir3_instruction *instr, unsigned n)
 {
-	if (n >= instr->regs_count)
+	if (n >= instr->srcs_count)
 		return true;
 	return false;
 }
@ -1370,9 +1370,9 @@ static inline struct ir3_instruction **
 __ssa_srcp_n(struct ir3_instruction *instr, unsigned n)
 {
 	if (__is_false_dep(instr, n))
-		return &instr->deps[n - instr->regs_count];
-	if (ssa(instr->regs[n]))
-		return &instr->regs[n]->def->instr;
+		return &instr->deps[n - instr->srcs_count];
+	if (ssa(instr->srcs[n]))
+		return &instr->srcs[n]->def->instr;
 	return NULL;
 }

--- a/src/freedreno/ir3/ir3_delay.c
+++ b/src/freedreno/ir3/ir3_delay.c
@ -95,19 +95,16 @@ ir3_delayslots(struct ir3_instruction *assigner,
 			is_mem(consumer)) {
 		return 6;
 	} else {
-		/* assigner and consumer are both alu */
-		assert(n > 0);
-
 		/* In mergedregs mode, there is an extra 2-cycle penalty when half of
 		 * a full-reg is read as a half-reg or when a half-reg is read as a
 		 * full-reg.
 		 */
 		bool mismatched_half =
-			(assigner->regs[0]->flags & IR3_REG_HALF) !=
-			(consumer->regs[n - 1]->flags & IR3_REG_HALF);
+			(assigner->dsts[0]->flags & IR3_REG_HALF) !=
+			(consumer->srcs[n]->flags & IR3_REG_HALF);
 		unsigned penalty = mismatched_half ? 2 : 0;
 		if ((is_mad(consumer->opc) || is_madsh(consumer->opc)) &&
-			(n == 3)) {
+			(n == 2)) {
 			/* special case, 3rd src to cat3 not required on first cycle */
 			return 1 + penalty;
 		} else {
@ -188,7 +185,7 @@ ir3_delay_calc_prera(struct ir3_block *block, struct ir3_instruction *instr)
 		unsigned d = 0;

 		if (src->def && src->def->instr->block == block) {
-			d = delay_calc_srcn_prera(block, src->def->instr, instr, i+1);
+			d = delay_calc_srcn_prera(block, src->def->instr, instr, i);
 		}

 		delay = MAX2(delay, d);
@ -221,8 +218,8 @@ static unsigned
 delay_calc_srcn_postra(struct ir3_instruction *assigner, struct ir3_instruction *consumer,
 					   unsigned n, bool soft, bool mergedregs)
 {
-	struct ir3_register *src = consumer->regs[n];
-	struct ir3_register *dst = assigner->regs[0];
+	struct ir3_register *src = consumer->srcs[n];
+	struct ir3_register *dst = assigner->dsts[0];
 	bool mismatched_half =
 		(src->flags & IR3_REG_HALF) != (dst->flags & IR3_REG_HALF);

@ -322,7 +319,7 @@ delay_calc_postra(struct ir3_block *block,
 				if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST))
 					continue;

-				unsigned src_delay = delay_calc_srcn_postra(assigner, consumer, n+1, soft, mergedregs);
+				unsigned src_delay = delay_calc_srcn_postra(assigner, consumer, n, soft, mergedregs);
 				new_delay = MAX2(new_delay, src_delay);
 			}
 		}
--- a/src/freedreno/ir3/ir3_postsched.c
+++ b/src/freedreno/ir3/ir3_postsched.c
@ -450,12 +450,12 @@ calculate_deps(struct ir3_postsched_deps_state *state,
 		if (reg->flags & IR3_REG_RELATIV) {
 			/* mark entire array as read: */
 			for (unsigned j = 0; j < reg->size; j++) {
-				add_reg_dep(state, node, reg, reg->array.base + j, i + 1);
+				add_reg_dep(state, node, reg, reg->array.base + j, i);
 			}
 		} else {
 			assert(reg->wrmask >= 1);
 			u_foreach_bit (b, reg->wrmask) {
-				add_reg_dep(state, node, reg, reg->num + b, i + 1);
+				add_reg_dep(state, node, reg, reg->num + b, i);
 			}
 		}
 	}
@ -466,7 +466,7 @@ calculate_deps(struct ir3_postsched_deps_state *state,
 	/* And then after we update the state for what this instruction
 	 * wrote:
 	 */
-	struct ir3_register *reg = node->instr->regs[0];
+	struct ir3_register *reg = node->instr->dsts[0];
 	if (reg->flags & IR3_REG_RELATIV) {
 		/* mark the entire array as written: */
 		for (unsigned i = 0; i < reg->size; i++) {
@ -694,16 +694,16 @@ is_self_mov(struct ir3_instruction *instr)
 	if (!is_same_type_mov(instr))
 		return false;

-	if (instr->regs[0]->num != instr->regs[1]->num)
+	if (instr->dsts[0]->num != instr->srcs[0]->num)
 		return false;

-	if (instr->regs[0]->flags & IR3_REG_RELATIV)
+	if (instr->dsts[0]->flags & IR3_REG_RELATIV)
 		return false;

 	if (instr->cat1.round != ROUND_ZERO)
 		return false;

-	if (instr->regs[1]->flags & (IR3_REG_CONST | IR3_REG_IMMED |
+	if (instr->srcs[0]->flags & (IR3_REG_CONST | IR3_REG_IMMED |
 			IR3_REG_RELATIV | IR3_REG_FNEG | IR3_REG_FABS |
 			IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT))
 		return false;
--- a/src/freedreno/ir3/ir3_sched.c
+++ b/src/freedreno/ir3/ir3_sched.c
@ -381,7 +381,7 @@ check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
 			struct ir3_instruction *indirect = ir->a0_users[i];
 			if (!indirect)
 				continue;
-			if (indirect->address->def != instr->regs[0])
+			if (indirect->address->def != instr->dsts[0])
 				continue;
 			ready = could_sched(indirect, instr);
 		}
@ -398,7 +398,7 @@ check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
 			struct ir3_instruction *indirect = ir->a1_users[i];
 			if (!indirect)
 				continue;
-			if (indirect->address->def != instr->regs[0])
+			if (indirect->address->def != instr->dsts[0])
 				continue;
 			ready = could_sched(indirect, instr);
 		}
@ -436,7 +436,7 @@ check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
 	 *
 	 * We could do this by adding each bary.f instruction as
 	 * virtual ssa src for the kill instruction.  But we have
-	 * fixed length instr->regs[].
+	 * fixed length instr->srcs[].
 	 *
 	 * TODO we could handle this by false-deps now, probably.
 	 */
@ -508,7 +508,7 @@ live_effect(struct ir3_instruction *instr)
 	 * then count all it's other components too:
 	 */
 	if (n->collect)
-		new_live *= n->collect->regs_count - 1;
+		new_live *= n->collect->srcs_count;

 	foreach_ssa_src_n (src, n, instr) {
 		if (__is_false_dep(instr, n))
@ -870,13 +870,13 @@ split_addr(struct ir3_sched_ctx *ctx, struct ir3_instruction **addr,
 		/* remap remaining instructions using current addr
 		 * to new addr:
 		 */
-		if (indirect->address->def == (*addr)->regs[0]) {
+		if (indirect->address->def == (*addr)->dsts[0]) {
 			if (!new_addr) {
 				new_addr = split_instr(ctx, *addr);
 				/* original addr is scheduled, but new one isn't: */
 				new_addr->flags &= ~IR3_INSTR_MARK;
 			}
-			indirect->address->def = new_addr->regs[0];
+			indirect->address->def = new_addr->dsts[0];
 			/* don't need to remove old dag edge since old addr is
 			 * already scheduled:
 			 */
@ -919,13 +919,13 @@ split_pred(struct ir3_sched_ctx *ctx)
 		 * TODO is there ever a case when pred isn't first
 		 * (and only) src?
 		 */
-		if (ssa(predicated->regs[1]) == ctx->pred) {
+		if (ssa(predicated->srcs[0]) == ctx->pred) {
 			if (!new_pred) {
 				new_pred = split_instr(ctx, ctx->pred);
 				/* original pred is scheduled, but new one isn't: */
 				new_pred->flags &= ~IR3_INSTR_MARK;
 			}
-			predicated->regs[1]->instr = new_pred;
+			predicated->srcs[0]->instr = new_pred;
 			/* don't need to remove old dag edge since old pred is
 			 * already scheduled:
 			 */
@ -977,13 +977,7 @@ sched_node_add_dep(struct ir3_instruction *instr, struct ir3_instruction *src, i
 	dag_add_edge(&sn->dag, &n->dag, NULL);


-	/* There's a mismatch between the indices foreach_ssa_src_n uses and the
-	 * indices that ir3_delayslots expects, and additionally we don't want to
-	 * call it and get bogus answers on false dependencies.
-	 */
-	unsigned d = 0;
-	if (i < instr->regs_count)
-		d = ir3_delayslots(src, instr, i + 1, true);
+	unsigned d = ir3_delayslots(src, instr, i, true);

 	n->delay = MAX2(n->delay, d);
 }
@ -1028,7 +1022,7 @@ is_output_only(struct ir3_instruction *instr)
 	if (!writes_gpr(instr))
 		return false;

-	if (!(instr->regs[0]->flags & IR3_REG_SSA))
+	if (!(instr->dsts[0]->flags & IR3_REG_SSA))
 		return false;

 	foreach_ssa_use (use, instr)
@ -1240,9 +1234,12 @@ get_array_id(struct ir3_instruction *instr)
 	 * src or dst, ir3_cp should enforce this.
 	 */

-	for (unsigned i = 0; i < instr->regs_count; i++)
-		if (instr->regs[i]->flags & IR3_REG_ARRAY)
-			return instr->regs[i]->array.id;
+	for (unsigned i = 0; i < instr->dsts_count; i++)
+		if (instr->dsts[i]->flags & IR3_REG_ARRAY)
+			return instr->dsts[i]->array.id;
+	for (unsigned i = 0; i < instr->srcs_count; i++)
+		if (instr->srcs[i]->flags & IR3_REG_ARRAY)
+			return instr->srcs[i]->array.id;

 	unreachable("this was unexpected");
 }