pan/bi: Do helper termination analysis on clauses

Unlike the dependency analysis for the skip bits which is a function of
the data flow graph, the thread termination analysis is dependent on the
actual sequence of instructions. As such, it must be done after
scheduling to be correct in the presence of out-of-order scheduling.

Furthermore it's specified in terms of clauses, not instructions.
Reflecting this in our code gets a nice simplification. As a side effect
this puts extra td flags on subsequent clauses, which matches the DDK's
behaviour. (Maybe td is just a hint?)

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10961>
This commit is contained in:
Alyssa Rosenzweig 2021-07-12 11:11:27 -04:00 committed by Marge Bot
parent 0515c6a3fb
commit 6bf8e960fa
6 changed files with 26 additions and 36 deletions

View file

@ -119,10 +119,6 @@ bi_block_uses_helpers(bi_block *block)
static bool
bi_block_terminates_helpers(bi_block *block)
{
/* Can't terminate if there are no helpers */
if (!(block->pass_flags & 1))
return false;
/* Can't terminate if a successor needs helpers */
pan_foreach_successor((&block->base), succ) {
if (((bi_block *) succ)->pass_flags & 1)
@ -187,19 +183,19 @@ bi_analyze_helper_terminate(bi_context *ctx)
_mesa_set_destroy(visited, NULL);
_mesa_set_destroy(worklist, NULL);
/* Finally, set helper_terminate on the last derivative-calculating
* instruction in a block that terminates helpers */
/* Finally, mark clauses requiring helpers */
bi_foreach_block(ctx, _block) {
bi_block *block = (bi_block *) _block;
if (!bi_block_terminates_helpers(block))
continue;
/* At the end, there are helpers iff we don't terminate */
bool helpers = !bi_block_terminates_helpers(block);
bi_foreach_instr_in_block_rev(block, I) {
if (bi_instr_uses_helpers(I)) {
I->tdd = true;
break;
bi_foreach_clause_in_block_rev(block, clause) {
bi_foreach_instr_in_clause_rev(block, clause, I) {
helpers |= bi_instr_uses_helpers(I);
}
clause->td = !helpers;
}
}
}

View file

@ -28,7 +28,7 @@
* bits on the wire (as well as fixup branches) */
static uint64_t
bi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2, bool tdd)
bi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2)
{
/* next_dependencies are the union of the dependencies of successors'
* dependencies */
@ -43,7 +43,7 @@ bi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2, bool tdd
.flow_control =
(next_1 == NULL && next_2 == NULL) ?
BIFROST_FLOW_END : clause->flow_control,
.terminate_discarded_threads = tdd,
.terminate_discarded_threads = clause->td,
.next_clause_prefetch = clause->next_clause_prefetch && next_1,
.staging_barrier = staging_barrier,
.staging_register = clause->staging_register,
@ -625,8 +625,7 @@ bi_pack_format(struct util_dynarray *emission,
static void
bi_pack_clause(bi_context *ctx, bi_clause *clause,
bi_clause *next_1, bi_clause *next_2,
struct util_dynarray *emission, gl_shader_stage stage,
bool tdd)
struct util_dynarray *emission, gl_shader_stage stage)
{
struct bi_packed_tuple ins[8] = { 0 };
@ -644,7 +643,7 @@ bi_pack_clause(bi_context *ctx, bi_clause *clause,
unsigned constant_quads =
DIV_ROUND_UP(clause->constant_count - (ec0_packed ? 1 : 0), 2);
uint64_t header = bi_pack_header(clause, next_1, next_2, tdd);
uint64_t header = bi_pack_header(clause, next_1, next_2);
uint64_t ec0 = (clause->constants[0] >> 4);
unsigned m0 = (clause->pcrel_idx == 0) ? 4 : 0;
@ -739,17 +738,7 @@ bi_pack(bi_context *ctx, struct util_dynarray *emission)
previous_size = emission->size;
/* Terminate discarded threads after the clause if any
* instruction needs threads terminated. Note that this
* may be set for CLPER.i32 which is not
* message-passing, so we need to check all
* instructions */
bool tdd = false;
bi_foreach_instr_in_clause(block, clause, I)
tdd |= I->tdd;
bi_pack_clause(ctx, clause, next, next_2, emission, ctx->stage, tdd);
bi_pack_clause(ctx, clause, next, next_2, emission, ctx->stage);
if (!is_last)
bi_collect_blend_ret_addr(ctx, emission, clause);

View file

@ -103,6 +103,9 @@ bi_print_clause(bi_clause *clause, FILE *fp)
if (clause->staging_barrier)
fprintf(fp, " osrb");
if (clause->td)
fprintf(fp, " td");
if (clause->pcrel_idx != ~0)
fprintf(fp, " pcrel(%u)", clause->pcrel_idx);

View file

@ -168,9 +168,6 @@ bi_print_instr(bi_instr *I, FILE *fp)
if (I->table)
fprintf(fp, ".%s", bi_table_as_str(I->table));
if (I->tdd)
fprintf(fp, ".tdd");
switch (I->op) {
% for opcode in ops:
<%

View file

@ -3585,8 +3585,9 @@ bifrost_compile_shader_nir(nir_shader *nir,
bi_print_shader(ctx, stdout);
bi_lower_fau(ctx);
/* Analyze as late as possible before RA/scheduling */
bi_analyze_helper_terminate(ctx);
/* Analyze before register allocation to avoid false dependencies. The
* skip bit is a function of only the data flow graph and is invariant
* under valid scheduling. */
bi_analyze_helper_requirements(ctx);
bi_register_allocate(ctx);
@ -3595,6 +3596,10 @@ bifrost_compile_shader_nir(nir_shader *nir,
bi_print_shader(ctx, stdout);
bi_schedule(ctx);
bi_assign_scoreboard(ctx);
/* Analyze after scheduling since we depend on instruction order. */
bi_analyze_helper_terminate(ctx);
if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal)
bi_print_shader(ctx, stdout);

View file

@ -316,9 +316,6 @@ typedef struct {
* useless double fills */
bool no_spill;
/* Should we terminate discarded threads after executing this instruction? */
bool tdd;
/* Override table, inducing a DTSEL_IMM pair if nonzero */
enum bi_table table;
@ -520,6 +517,9 @@ typedef struct {
/* Unique in a clause */
enum bifrost_message_type message_type;
bi_instr *message;
/* Discard helper threads */
bool td;
} bi_clause;
typedef struct bi_block {