diff --git a/src/freedreno/ir3/ir3_sched.c b/src/freedreno/ir3/ir3_sched.c index f96fcb76276..46678fe75de 100644 --- a/src/freedreno/ir3/ir3_sched.c +++ b/src/freedreno/ir3/ir3_sched.c @@ -99,6 +99,14 @@ struct ir3_sched_ctx { int sfu_delay; int tex_delay; + + /* We order the scheduled tex/SFU instructions, and keep track of the + * index of the last waited on instruction, so we can know which + * instructions are still outstanding (and therefore would require us to + * wait for all outstanding instructions before scheduling a use). + */ + int tex_index, first_outstanding_tex_index; + int sfu_index, first_outstanding_sfu_index; }; struct ir3_sched_node { @@ -108,6 +116,9 @@ struct ir3_sched_node { unsigned delay; unsigned max_delay; + unsigned tex_index; + unsigned sfu_index; + /* For instructions that are a meta:collect src, once we schedule * the first src of the collect, the entire vecN is live (at least * from the PoV of the first RA pass.. the 2nd scalar pass can fill @@ -153,6 +164,50 @@ static bool is_scheduled(struct ir3_instruction *instr) return !!(instr->flags & IR3_INSTR_MARK); } +/* check_src_cond() passing a ir3_sched_ctx. */ +static bool +sched_check_src_cond(struct ir3_instruction *instr, + bool (*cond)(struct ir3_instruction *, struct ir3_sched_ctx *), + struct ir3_sched_ctx *ctx) +{ + foreach_ssa_src (src, instr) { + /* meta:split/collect aren't real instructions, the thing that + * we actually care about is *their* srcs + */ + if ((src->opc == OPC_META_SPLIT) || (src->opc == OPC_META_COLLECT)) { + if (sched_check_src_cond(src, cond, ctx)) + return true; + } else { + if (cond(src, ctx)) + return true; + } + } + + return false; +} + +/* Is this a prefetch or tex that hasn't been waited on yet? */ + +static bool +is_outstanding_tex_or_prefetch(struct ir3_instruction *instr, struct ir3_sched_ctx *ctx) +{ + if (!is_tex_or_prefetch(instr)) + return false; + + struct ir3_sched_node *n = instr->data; + return n->tex_index >= ctx->first_outstanding_tex_index; +} + +static bool +is_outstanding_sfu(struct ir3_instruction *instr, struct ir3_sched_ctx *ctx) +{ + if (!is_sfu(instr)) + return false; + + struct ir3_sched_node *n = instr->data; + return n->sfu_index >= ctx->first_outstanding_sfu_index; +} + static void schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) { @@ -210,8 +265,10 @@ schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) if (is_sfu(instr)) { ctx->sfu_delay = 8; - } else if (check_src_cond(instr, is_sfu)) { + n->sfu_index = ctx->sfu_index++; + } else if (sched_check_src_cond(instr, is_outstanding_sfu, ctx)) { ctx->sfu_delay = 0; + ctx->first_outstanding_sfu_index = ctx->sfu_index; } else if (ctx->sfu_delay > 0) { ctx->sfu_delay--; } @@ -225,8 +282,10 @@ schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) ctx->tex_delay = 10; assert(ctx->remaining_tex > 0); ctx->remaining_tex--; - } else if (check_src_cond(instr, is_tex_or_prefetch)) { + n->tex_index = ctx->tex_index++; + } else if (sched_check_src_cond(instr, is_outstanding_tex_or_prefetch, ctx)) { ctx->tex_delay = 0; + ctx->first_outstanding_tex_index = ctx->tex_index; } else if (ctx->tex_delay > 0) { ctx->tex_delay--; } @@ -443,7 +502,7 @@ static bool would_sync(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) { if (ctx->sfu_delay) { - if (check_src_cond(instr, is_sfu)) + if (sched_check_src_cond(instr, is_outstanding_sfu, ctx)) return true; } @@ -453,7 +512,7 @@ would_sync(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) * fetches */ if (ctx->tex_delay && ctx->remaining_tex) { - if (check_src_cond(instr, is_tex_or_prefetch)) + if (sched_check_src_cond(instr, is_outstanding_tex_or_prefetch, ctx)) return true; } @@ -986,6 +1045,8 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block) ctx->pred = NULL; ctx->tex_delay = 0; ctx->sfu_delay = 0; + ctx->tex_index = ctx->first_outstanding_tex_index = 0; + ctx->sfu_index = ctx->first_outstanding_sfu_index = 0; /* move all instructions to the unscheduled list, and * empty the block's instruction list (to which we will