mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 17:20:10 +01:00
pan/bi: Do helper termination analysis on clauses
Unlike the dependency analysis for the skip bits which is a function of the data flow graph, the thread termination analysis is dependent on the actual sequence of instructions. As such, it must be done after scheduling to be correct in the presence of out-of-order scheduling. Furthermore it's specified in terms of clauses, not instructions. Reflecting this in our code gets a nice simplification. As a side effect this puts extra td flags on subsequent clauses, which matches the DDK's behaviour. (Maybe td is just a hint?) Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10961>
This commit is contained in:
parent
0515c6a3fb
commit
6bf8e960fa
6 changed files with 26 additions and 36 deletions
|
|
@ -119,10 +119,6 @@ bi_block_uses_helpers(bi_block *block)
|
|||
static bool
|
||||
bi_block_terminates_helpers(bi_block *block)
|
||||
{
|
||||
/* Can't terminate if there are no helpers */
|
||||
if (!(block->pass_flags & 1))
|
||||
return false;
|
||||
|
||||
/* Can't terminate if a successor needs helpers */
|
||||
pan_foreach_successor((&block->base), succ) {
|
||||
if (((bi_block *) succ)->pass_flags & 1)
|
||||
|
|
@ -187,19 +183,19 @@ bi_analyze_helper_terminate(bi_context *ctx)
|
|||
_mesa_set_destroy(visited, NULL);
|
||||
_mesa_set_destroy(worklist, NULL);
|
||||
|
||||
/* Finally, set helper_terminate on the last derivative-calculating
|
||||
* instruction in a block that terminates helpers */
|
||||
/* Finally, mark clauses requiring helpers */
|
||||
bi_foreach_block(ctx, _block) {
|
||||
bi_block *block = (bi_block *) _block;
|
||||
|
||||
if (!bi_block_terminates_helpers(block))
|
||||
continue;
|
||||
/* At the end, there are helpers iff we don't terminate */
|
||||
bool helpers = !bi_block_terminates_helpers(block);
|
||||
|
||||
bi_foreach_instr_in_block_rev(block, I) {
|
||||
if (bi_instr_uses_helpers(I)) {
|
||||
I->tdd = true;
|
||||
break;
|
||||
bi_foreach_clause_in_block_rev(block, clause) {
|
||||
bi_foreach_instr_in_clause_rev(block, clause, I) {
|
||||
helpers |= bi_instr_uses_helpers(I);
|
||||
}
|
||||
|
||||
clause->td = !helpers;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@
|
|||
* bits on the wire (as well as fixup branches) */
|
||||
|
||||
static uint64_t
|
||||
bi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2, bool tdd)
|
||||
bi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2)
|
||||
{
|
||||
/* next_dependencies are the union of the dependencies of successors'
|
||||
* dependencies */
|
||||
|
|
@ -43,7 +43,7 @@ bi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2, bool tdd
|
|||
.flow_control =
|
||||
(next_1 == NULL && next_2 == NULL) ?
|
||||
BIFROST_FLOW_END : clause->flow_control,
|
||||
.terminate_discarded_threads = tdd,
|
||||
.terminate_discarded_threads = clause->td,
|
||||
.next_clause_prefetch = clause->next_clause_prefetch && next_1,
|
||||
.staging_barrier = staging_barrier,
|
||||
.staging_register = clause->staging_register,
|
||||
|
|
@ -625,8 +625,7 @@ bi_pack_format(struct util_dynarray *emission,
|
|||
static void
|
||||
bi_pack_clause(bi_context *ctx, bi_clause *clause,
|
||||
bi_clause *next_1, bi_clause *next_2,
|
||||
struct util_dynarray *emission, gl_shader_stage stage,
|
||||
bool tdd)
|
||||
struct util_dynarray *emission, gl_shader_stage stage)
|
||||
{
|
||||
struct bi_packed_tuple ins[8] = { 0 };
|
||||
|
||||
|
|
@ -644,7 +643,7 @@ bi_pack_clause(bi_context *ctx, bi_clause *clause,
|
|||
unsigned constant_quads =
|
||||
DIV_ROUND_UP(clause->constant_count - (ec0_packed ? 1 : 0), 2);
|
||||
|
||||
uint64_t header = bi_pack_header(clause, next_1, next_2, tdd);
|
||||
uint64_t header = bi_pack_header(clause, next_1, next_2);
|
||||
uint64_t ec0 = (clause->constants[0] >> 4);
|
||||
unsigned m0 = (clause->pcrel_idx == 0) ? 4 : 0;
|
||||
|
||||
|
|
@ -739,17 +738,7 @@ bi_pack(bi_context *ctx, struct util_dynarray *emission)
|
|||
|
||||
previous_size = emission->size;
|
||||
|
||||
/* Terminate discarded threads after the clause if any
|
||||
* instruction needs threads terminated. Note that this
|
||||
* may be set for CLPER.i32 which is not
|
||||
* message-passing, so we need to check all
|
||||
* instructions */
|
||||
bool tdd = false;
|
||||
|
||||
bi_foreach_instr_in_clause(block, clause, I)
|
||||
tdd |= I->tdd;
|
||||
|
||||
bi_pack_clause(ctx, clause, next, next_2, emission, ctx->stage, tdd);
|
||||
bi_pack_clause(ctx, clause, next, next_2, emission, ctx->stage);
|
||||
|
||||
if (!is_last)
|
||||
bi_collect_blend_ret_addr(ctx, emission, clause);
|
||||
|
|
|
|||
|
|
@ -103,6 +103,9 @@ bi_print_clause(bi_clause *clause, FILE *fp)
|
|||
if (clause->staging_barrier)
|
||||
fprintf(fp, " osrb");
|
||||
|
||||
if (clause->td)
|
||||
fprintf(fp, " td");
|
||||
|
||||
if (clause->pcrel_idx != ~0)
|
||||
fprintf(fp, " pcrel(%u)", clause->pcrel_idx);
|
||||
|
||||
|
|
|
|||
|
|
@ -168,9 +168,6 @@ bi_print_instr(bi_instr *I, FILE *fp)
|
|||
if (I->table)
|
||||
fprintf(fp, ".%s", bi_table_as_str(I->table));
|
||||
|
||||
if (I->tdd)
|
||||
fprintf(fp, ".tdd");
|
||||
|
||||
switch (I->op) {
|
||||
% for opcode in ops:
|
||||
<%
|
||||
|
|
|
|||
|
|
@ -3585,8 +3585,9 @@ bifrost_compile_shader_nir(nir_shader *nir,
|
|||
bi_print_shader(ctx, stdout);
|
||||
bi_lower_fau(ctx);
|
||||
|
||||
/* Analyze as late as possible before RA/scheduling */
|
||||
bi_analyze_helper_terminate(ctx);
|
||||
/* Analyze before register allocation to avoid false dependencies. The
|
||||
* skip bit is a function of only the data flow graph and is invariant
|
||||
* under valid scheduling. */
|
||||
bi_analyze_helper_requirements(ctx);
|
||||
|
||||
bi_register_allocate(ctx);
|
||||
|
|
@ -3595,6 +3596,10 @@ bifrost_compile_shader_nir(nir_shader *nir,
|
|||
bi_print_shader(ctx, stdout);
|
||||
bi_schedule(ctx);
|
||||
bi_assign_scoreboard(ctx);
|
||||
|
||||
/* Analyze after scheduling since we depend on instruction order. */
|
||||
bi_analyze_helper_terminate(ctx);
|
||||
|
||||
if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal)
|
||||
bi_print_shader(ctx, stdout);
|
||||
|
||||
|
|
|
|||
|
|
@ -316,9 +316,6 @@ typedef struct {
|
|||
* useless double fills */
|
||||
bool no_spill;
|
||||
|
||||
/* Should we terminate discarded threads after executing this instruction? */
|
||||
bool tdd;
|
||||
|
||||
/* Override table, inducing a DTSEL_IMM pair if nonzero */
|
||||
enum bi_table table;
|
||||
|
||||
|
|
@ -520,6 +517,9 @@ typedef struct {
|
|||
/* Unique in a clause */
|
||||
enum bifrost_message_type message_type;
|
||||
bi_instr *message;
|
||||
|
||||
/* Discard helper threads */
|
||||
bool td;
|
||||
} bi_clause;
|
||||
|
||||
typedef struct bi_block {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue