diff --git a/src/panfrost/bifrost/bi_helper_invocations.c b/src/panfrost/bifrost/bi_helper_invocations.c index 564027187ea..a8d2c61c1e2 100644 --- a/src/panfrost/bifrost/bi_helper_invocations.c +++ b/src/panfrost/bifrost/bi_helper_invocations.c @@ -119,10 +119,6 @@ bi_block_uses_helpers(bi_block *block) static bool bi_block_terminates_helpers(bi_block *block) { - /* Can't terminate if there are no helpers */ - if (!(block->pass_flags & 1)) - return false; - /* Can't terminate if a successor needs helpers */ pan_foreach_successor((&block->base), succ) { if (((bi_block *) succ)->pass_flags & 1) @@ -187,19 +183,19 @@ bi_analyze_helper_terminate(bi_context *ctx) _mesa_set_destroy(visited, NULL); _mesa_set_destroy(worklist, NULL); - /* Finally, set helper_terminate on the last derivative-calculating - * instruction in a block that terminates helpers */ + /* Finally, mark clauses requiring helpers */ bi_foreach_block(ctx, _block) { bi_block *block = (bi_block *) _block; - if (!bi_block_terminates_helpers(block)) - continue; + /* At the end, there are helpers iff we don't terminate */ + bool helpers = !bi_block_terminates_helpers(block); - bi_foreach_instr_in_block_rev(block, I) { - if (bi_instr_uses_helpers(I)) { - I->tdd = true; - break; + bi_foreach_clause_in_block_rev(block, clause) { + bi_foreach_instr_in_clause_rev(block, clause, I) { + helpers |= bi_instr_uses_helpers(I); } + + clause->td = !helpers; } } } diff --git a/src/panfrost/bifrost/bi_pack.c b/src/panfrost/bifrost/bi_pack.c index 491d71bebec..a9a1a415fd4 100644 --- a/src/panfrost/bifrost/bi_pack.c +++ b/src/panfrost/bifrost/bi_pack.c @@ -28,7 +28,7 @@ * bits on the wire (as well as fixup branches) */ static uint64_t -bi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2, bool tdd) +bi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2) { /* next_dependencies are the union of the dependencies of successors' * dependencies */ @@ -43,7 +43,7 @@ bi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2, bool tdd .flow_control = (next_1 == NULL && next_2 == NULL) ? BIFROST_FLOW_END : clause->flow_control, - .terminate_discarded_threads = tdd, + .terminate_discarded_threads = clause->td, .next_clause_prefetch = clause->next_clause_prefetch && next_1, .staging_barrier = staging_barrier, .staging_register = clause->staging_register, @@ -625,8 +625,7 @@ bi_pack_format(struct util_dynarray *emission, static void bi_pack_clause(bi_context *ctx, bi_clause *clause, bi_clause *next_1, bi_clause *next_2, - struct util_dynarray *emission, gl_shader_stage stage, - bool tdd) + struct util_dynarray *emission, gl_shader_stage stage) { struct bi_packed_tuple ins[8] = { 0 }; @@ -644,7 +643,7 @@ bi_pack_clause(bi_context *ctx, bi_clause *clause, unsigned constant_quads = DIV_ROUND_UP(clause->constant_count - (ec0_packed ? 1 : 0), 2); - uint64_t header = bi_pack_header(clause, next_1, next_2, tdd); + uint64_t header = bi_pack_header(clause, next_1, next_2); uint64_t ec0 = (clause->constants[0] >> 4); unsigned m0 = (clause->pcrel_idx == 0) ? 4 : 0; @@ -739,17 +738,7 @@ bi_pack(bi_context *ctx, struct util_dynarray *emission) previous_size = emission->size; - /* Terminate discarded threads after the clause if any - * instruction needs threads terminated. Note that this - * may be set for CLPER.i32 which is not - * message-passing, so we need to check all - * instructions */ - bool tdd = false; - - bi_foreach_instr_in_clause(block, clause, I) - tdd |= I->tdd; - - bi_pack_clause(ctx, clause, next, next_2, emission, ctx->stage, tdd); + bi_pack_clause(ctx, clause, next, next_2, emission, ctx->stage); if (!is_last) bi_collect_blend_ret_addr(ctx, emission, clause); diff --git a/src/panfrost/bifrost/bi_print.c b/src/panfrost/bifrost/bi_print.c index 295ff8efcab..aea95cc071d 100644 --- a/src/panfrost/bifrost/bi_print.c +++ b/src/panfrost/bifrost/bi_print.c @@ -103,6 +103,9 @@ bi_print_clause(bi_clause *clause, FILE *fp) if (clause->staging_barrier) fprintf(fp, " osrb"); + if (clause->td) + fprintf(fp, " td"); + if (clause->pcrel_idx != ~0) fprintf(fp, " pcrel(%u)", clause->pcrel_idx); diff --git a/src/panfrost/bifrost/bi_printer.c.py b/src/panfrost/bifrost/bi_printer.c.py index b9450dbed0c..306124b5811 100644 --- a/src/panfrost/bifrost/bi_printer.c.py +++ b/src/panfrost/bifrost/bi_printer.c.py @@ -168,9 +168,6 @@ bi_print_instr(bi_instr *I, FILE *fp) if (I->table) fprintf(fp, ".%s", bi_table_as_str(I->table)); - if (I->tdd) - fprintf(fp, ".tdd"); - switch (I->op) { % for opcode in ops: <% diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 5d5ddfae855..3b577944a77 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -3585,8 +3585,9 @@ bifrost_compile_shader_nir(nir_shader *nir, bi_print_shader(ctx, stdout); bi_lower_fau(ctx); - /* Analyze as late as possible before RA/scheduling */ - bi_analyze_helper_terminate(ctx); + /* Analyze before register allocation to avoid false dependencies. The + * skip bit is a function of only the data flow graph and is invariant + * under valid scheduling. */ bi_analyze_helper_requirements(ctx); bi_register_allocate(ctx); @@ -3595,6 +3596,10 @@ bifrost_compile_shader_nir(nir_shader *nir, bi_print_shader(ctx, stdout); bi_schedule(ctx); bi_assign_scoreboard(ctx); + + /* Analyze after scheduling since we depend on instruction order. */ + bi_analyze_helper_terminate(ctx); + if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal) bi_print_shader(ctx, stdout); diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h index fa20643a9d5..6966102e8aa 100644 --- a/src/panfrost/bifrost/compiler.h +++ b/src/panfrost/bifrost/compiler.h @@ -316,9 +316,6 @@ typedef struct { * useless double fills */ bool no_spill; - /* Should we terminate discarded threads after executing this instruction? */ - bool tdd; - /* Override table, inducing a DTSEL_IMM pair if nonzero */ enum bi_table table; @@ -520,6 +517,9 @@ typedef struct { /* Unique in a clause */ enum bifrost_message_type message_type; bi_instr *message; + + /* Discard helper threads */ + bool td; } bi_clause; typedef struct bi_block {