ir3/sched: Don't penalize uses of already-waited tex/SFU

Once we insert a use of a given tex or SFU instruction, then we must wait for that tex/SFU instruction (as well as all earlier ones) to complete, so we shouldn't penalize further uses, even if a subsequent tex/SFU instruction gets scheduled after the first use. This especially matters after the next commit when we start forcibly breaking up long sequences of texture instructions, since if we schedule a group of 8 texture instructions then we want to schedule the uses of those instructions in parallel with the next 8 texture instructions to reduce register pressure. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7571>
2026-01-02 13:50:09 +01:00 · 2020-11-11 15:04:22 +01:00 · 2020-11-11 15:04:22 +01:00 · 7821e5a3f8
commit 7821e5a3f8
parent 5362adf68d
1 changed files with 65 additions and 4 deletions
--- a/src/freedreno/ir3/ir3_sched.c
+++ b/src/freedreno/ir3/ir3_sched.c
@ -99,6 +99,14 @@ struct ir3_sched_ctx {

 	int sfu_delay;
 	int tex_delay;
+
+	/* We order the scheduled tex/SFU instructions, and keep track of the
+	 * index of the last waited on instruction, so we can know which
+	 * instructions are still outstanding (and therefore would require us to
+	 * wait for all outstanding instructions before scheduling a use).
+	 */
+	int tex_index, first_outstanding_tex_index;
+	int sfu_index, first_outstanding_sfu_index;
 };

 struct ir3_sched_node {
@ -108,6 +116,9 @@ struct ir3_sched_node {
 	unsigned delay;
 	unsigned max_delay;

+	unsigned tex_index;
+	unsigned sfu_index;
+
 	/* For instructions that are a meta:collect src, once we schedule
 	 * the first src of the collect, the entire vecN is live (at least
 	 * from the PoV of the first RA pass.. the 2nd scalar pass can fill
@ -153,6 +164,50 @@ static bool is_scheduled(struct ir3_instruction *instr)
 	return !!(instr->flags & IR3_INSTR_MARK);
 }

+/* check_src_cond() passing a ir3_sched_ctx. */
+static bool
+sched_check_src_cond(struct ir3_instruction *instr,
+					 bool (*cond)(struct ir3_instruction *, struct ir3_sched_ctx *),
+					 struct ir3_sched_ctx *ctx)
+{
+	foreach_ssa_src (src, instr) {
+		/* meta:split/collect aren't real instructions, the thing that
+		 * we actually care about is *their* srcs
+		 */
+		if ((src->opc == OPC_META_SPLIT) || (src->opc == OPC_META_COLLECT)) {
+			if (sched_check_src_cond(src, cond, ctx))
+				return true;
+		} else {
+			if (cond(src, ctx))
+				return true;
+		}
+	}
+
+	return false;
+}
+
+/* Is this a prefetch or tex that hasn't been waited on yet? */
+
+static bool
+is_outstanding_tex_or_prefetch(struct ir3_instruction *instr, struct ir3_sched_ctx *ctx)
+{
+	if (!is_tex_or_prefetch(instr))
+		return false;
+
+	struct ir3_sched_node *n = instr->data;
+	return n->tex_index >= ctx->first_outstanding_tex_index;
+}
+
+static bool
+is_outstanding_sfu(struct ir3_instruction *instr, struct ir3_sched_ctx *ctx)
+{
+	if (!is_sfu(instr))
+		return false;
+
+	struct ir3_sched_node *n = instr->data;
+	return n->sfu_index >= ctx->first_outstanding_sfu_index;
+}
+
 static void
 schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
 {
@ -210,8 +265,10 @@ schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)

 	if (is_sfu(instr)) {
 		ctx->sfu_delay = 8;
-	} else if (check_src_cond(instr, is_sfu)) {
+		n->sfu_index = ctx->sfu_index++;
+	} else if (sched_check_src_cond(instr, is_outstanding_sfu, ctx)) {
 		ctx->sfu_delay = 0;
+		ctx->first_outstanding_sfu_index = ctx->sfu_index;
 	} else if (ctx->sfu_delay > 0) {
 		ctx->sfu_delay--;
 	}
@ -225,8 +282,10 @@ schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
 		ctx->tex_delay = 10;
 		assert(ctx->remaining_tex > 0);
 		ctx->remaining_tex--;
-	} else if (check_src_cond(instr, is_tex_or_prefetch)) {
+		n->tex_index = ctx->tex_index++;
+	} else if (sched_check_src_cond(instr, is_outstanding_tex_or_prefetch, ctx)) {
 		ctx->tex_delay = 0;
+		ctx->first_outstanding_tex_index = ctx->tex_index;
 	} else if (ctx->tex_delay > 0) {
 		ctx->tex_delay--;
 	}
@ -443,7 +502,7 @@ static bool
 would_sync(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
 {
 	if (ctx->sfu_delay) {
-		if (check_src_cond(instr, is_sfu))
+		if (sched_check_src_cond(instr, is_outstanding_sfu, ctx))
 			return true;
 	}

@ -453,7 +512,7 @@ would_sync(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
 	 * fetches
 	 */
 	if (ctx->tex_delay && ctx->remaining_tex) {
-		if (check_src_cond(instr, is_tex_or_prefetch))
+		if (sched_check_src_cond(instr, is_outstanding_tex_or_prefetch, ctx))
 			return true;
 	}

@ -986,6 +1045,8 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
 	ctx->pred = NULL;
 	ctx->tex_delay = 0;
 	ctx->sfu_delay = 0;
+	ctx->tex_index = ctx->first_outstanding_tex_index = 0;
+	ctx->sfu_index = ctx->first_outstanding_sfu_index = 0;

 	/* move all instructions to the unscheduled list, and
 	 * empty the block's instruction list (to which we will