freedreno/ir3: move block-scheduling into legalize

We want to do this only once. If we have post-RA sched pass, then we don't want to do it pre-RA. Since legalize is where we resolve the branch/jumps, we might as well move this into legalize. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3569>
2026-05-06 05:08:08 +02:00 · 2019-12-18 14:04:36 -08:00 · 2019-12-18 14:04:36 -08:00 · 304b50c9f8
commit 304b50c9f8
parent 093c94456b
4 changed files with 45 additions and 49 deletions
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@ -1116,8 +1116,6 @@ void ir3_print_instr(struct ir3_instruction *instr);
 /* delay calculation: */
 int ir3_delayslots(struct ir3_instruction *assigner,
 		struct ir3_instruction *consumer, unsigned n);
-unsigned ir3_distance(struct ir3_block *block, struct ir3_instruction *instr,
-		unsigned maxd, bool pred);
 unsigned ir3_delay_calc(struct ir3_block *block, struct ir3_instruction *instr,
 		bool soft, bool pred);
 void ir3_remove_nops(struct ir3 *ir);
@ -1359,7 +1357,7 @@ ir3_##name(struct ir3_block *block,                                      \
 #define INSTR4(name)        __INSTR4(0, name, OPC_##name)

 /* cat0 instructions: */
-INSTR0(BR)
+INSTR1(BR)
 INSTR0(JUMP)
 INSTR1(KILL)
 INSTR0(END)
--- a/src/freedreno/ir3/ir3_delay.c
+++ b/src/freedreno/ir3/ir3_delay.c
@ -126,8 +126,8 @@ count_instruction(struct ir3_instruction *n)
 *    find the worst case (shortest) distance (only possible after
 *    individual blocks are all scheduled)
 */
-unsigned
-ir3_distance(struct ir3_block *block, struct ir3_instruction *instr,
+static unsigned
+distance(struct ir3_block *block, struct ir3_instruction *instr,
 		unsigned maxd, bool pred)
 {
 	unsigned d = 0;
@ -162,7 +162,7 @@ ir3_distance(struct ir3_block *block, struct ir3_instruction *instr,
 			struct ir3_block *pred = (struct ir3_block *)entry->key;
 			unsigned n;

-			n = ir3_distance(pred, instr, min, pred);
+			n = distance(pred, instr, min, pred);

 			min = MIN2(min, n);
 		}
@ -204,7 +204,7 @@ delay_calc_srcn(struct ir3_block *block,
 		} else {
 			delay = ir3_delayslots(assigner, consumer, srcn);
 		}
-		delay -= ir3_distance(block, assigner, delay, pred);
+		delay -= distance(block, assigner, delay, pred);
 	}

 	return delay;
--- a/src/freedreno/ir3/ir3_legalize.c
+++ b/src/freedreno/ir3/ir3_legalize.c
@ -553,6 +553,45 @@ mark_xvergence_points(struct ir3 *ir)
 	}
 }

+/* Insert the branch/jump instructions for flow control between blocks.
+ * Initially this is done naively, without considering if the successor
+ * block immediately follows the current block (ie. so no jump required),
+ * but that is cleaned up in resolve_jumps().
+ *
+ * TODO what ensures that the last write to p0.x in a block is the
+ * branch condition?  Have we been getting lucky all this time?
+ */
+static void
+block_sched(struct ir3 *ir)
+{
+	foreach_block (block, &ir->block_list) {
+		if (block->successors[1]) {
+			/* if/else, conditional branches to "then" or "else": */
+			struct ir3_instruction *br;
+
+			debug_assert(block->condition);
+
+			/* create "else" branch first (since "then" block should
+			 * frequently/always end up being a fall-thru):
+			 */
+			br = ir3_BR(block, block->condition, 0);
+			br->cat0.inv = true;
+			br->cat0.target = block->successors[1];
+
+			/* "then" branch: */
+			br = ir3_BR(block, block->condition, 0);
+			br->cat0.target = block->successors[0];
+
+		} else if (block->successors[0]) {
+			/* otherwise unconditional jump to next block: */
+			struct ir3_instruction *jmp;
+
+			jmp = ir3_JUMP(block);
+			jmp->cat0.target = block->successors[0];
+		}
+	}
+}
+
 /* Insert nop's required to make this a legal/valid shader program: */
 static void
 nop_sched(struct ir3 *ir)
@ -629,6 +668,7 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)

 	*max_bary = ctx->max_bary;

+	block_sched(ir);
 	nop_sched(ir);

 	do {
--- a/src/freedreno/ir3/ir3_sched.c
+++ b/src/freedreno/ir3/ir3_sched.c
@ -761,48 +761,6 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
 			}
 		}
 	}
-
-	/* And lastly, insert branch/jump instructions to take us to
-	 * the next block.  Later we'll strip back out the branches
-	 * that simply jump to next instruction.
-	 */
-	if (block->successors[1]) {
-		/* if/else, conditional branches to "then" or "else": */
-		struct ir3_instruction *br;
-
-		debug_assert(ctx->pred);
-		debug_assert(block->condition);
-
-		/* create "else" branch first (since "then" block should
-		 * frequently/always end up being a fall-thru):
-		 */
-		br = ir3_BR(block);
-		br->cat0.inv = true;
-		br->cat0.target = block->successors[1];
-
-		/* NOTE: we have to hard code delay of 6 above, since
-		 * we want to insert the nop's before constructing the
-		 * branch.  Throw in an assert so we notice if this
-		 * ever breaks on future generation:
-		 */
-		debug_assert(ir3_delayslots(ctx->pred, br, 0) == 6);
-
-		br = ir3_BR(block);
-		br->cat0.target = block->successors[0];
-
-	} else if (block->successors[0]) {
-		/* otherwise unconditional jump to next block: */
-		struct ir3_instruction *jmp;
-
-		jmp = ir3_JUMP(block);
-		jmp->cat0.target = block->successors[0];
-	}
-
-	/* NOTE: if we kept track of the predecessors, we could do a better
-	 * job w/ (jp) flags.. every node w/ > predecessor is a join point.
-	 * Note that as we eliminate blocks which contain only an unconditional
-	 * jump we probably need to propagate (jp) flag..
-	 */
 }

 int ir3_sched(struct ir3 *ir)