jay/lower_scoreboard: be the sole emitter of SYNC

this gets closer to something we can schedule and avoids some pointless syncs.

Totals from 491 (18.55% of 2647) affected shaders:
Instrs: 602994 -> 602946 (-0.01%)
CodeSize: 9063888 -> 9015904 (-0.53%)

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41398>
This commit is contained in:
Alyssa Rosenzweig 2026-05-06 12:15:49 -04:00 committed by Marge Bot
parent 0885ed10f5
commit c1dc9d3b1a
4 changed files with 30 additions and 19 deletions

View file

@ -681,8 +681,6 @@ jay_emit_memory_barrier(struct nir_to_jay_state *nj, nir_intrinsic_instr *intr)
{
nir_variable_mode modes = nir_intrinsic_memory_modes(intr);
jay_SYNC(&nj->bld, TGL_SYNC_ALLWR);
if (modes & nir_var_image) {
emit_lsc_fence(nj, intr, BRW_SFID_TGM);
assert(!nj->nir->info.use_lowered_image_to_global && "fix common code");
@ -703,10 +701,8 @@ jay_emit_memory_barrier(struct nir_to_jay_state *nj, nir_intrinsic_instr *intr)
}
static void
jay_emit_signal_barrier(struct nir_to_jay_state *nj)
jay_emit_signal_barrier(jay_builder *b, struct nir_to_jay_state *nj)
{
jay_builder *b = &nj->bld;
/* Signal barrier / Active threads only (BSpec 72052).
*
* Source 0 is the number of subgroups in [31:24], which comes from the u0.2
@ -729,8 +725,6 @@ jay_emit_signal_barrier(struct nir_to_jay_state *nj)
jay_SEND(b, .sfid = BRW_SFID_MESSAGE_GATEWAY,
.msg_desc = BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG, .srcs = &zipped,
.nr_srcs = 1, .type = JAY_TYPE_U32, .uniform = true);
jay_SYNC(b, TGL_SYNC_BAR);
}
static void
@ -1194,24 +1188,22 @@ jay_emit_intrinsic(struct nir_to_jay_state *nj, nir_intrinsic_instr *intr)
break;
}
case nir_intrinsic_barrier:
case nir_intrinsic_barrier: {
jay_SCHEDULE_BARRIER(b);
if (nir_intrinsic_memory_scope(intr) != SCOPE_NONE) {
jay_emit_memory_barrier(nj, intr);
}
if (cs) {
if (nir_intrinsic_execution_scope(intr) == SCOPE_WORKGROUP) {
if (jay_workgroup_is_one_subgroup(b, nj->nir)) {
// XXX: when we have a scheduler, jay_SCHEDULE_BARRIER(b);
} else {
jay_emit_signal_barrier(nj);
s->prog_data->cs.uses_barrier = true;
}
}
} else {
// XXX: when we have a scheduler, jay_SCHEDULE_BARRIER(b);
if ((cs && nir_intrinsic_execution_scope(intr) == SCOPE_WORKGROUP) &&
!jay_workgroup_is_one_subgroup(b, nj->nir)) {
jay_emit_signal_barrier(b, nj);
s->prog_data->cs.uses_barrier = true;
}
break;
}
case nir_intrinsic_begin_invocation_interlock:
case nir_intrinsic_end_invocation_interlock:

View file

@ -80,6 +80,16 @@ lower_send_local(jay_function *func, jay_block *block)
}
}
/* Lower ordering barriers */
if (I->op == JAY_OPCODE_SCHEDULE_BARRIER) {
if (busy) {
jay_SYNC(&b, TGL_SYNC_ALLWR);
busy = 0;
}
jay_remove_instruction(I);
}
if (I->op == JAY_OPCODE_SEND && !jay_send_eot(I)) {
unsigned sbid = (roundrobin++) % NUM_TOKENS;
jay_set_send_sbid(I, sbid);
@ -97,6 +107,12 @@ lower_send_local(jay_function *func, jay_block *block)
struct gpr_range src = def_to_gpr(func, I, I->src[s]);
BITSET_SET_COUNT(tokens[sbid].reading, src.base, src.width);
}
/* Barriers are non-EOT gateway messages. Insert the needed SYNC */
if (jay_send_sfid(I) == BRW_SFID_MESSAGE_GATEWAY) {
b.cursor = jay_after_inst(I);
jay_SYNC(&b, TGL_SYNC_BAR);
}
}
}

View file

@ -129,6 +129,7 @@ op('shr', 2, 'u32 u64 u16 s16 s32 s64', Props.CMOD | Props.NEGATE0)
op('quad_swizzle', 1, 'u1 u32', 0, ['enum jay_quad_swizzle swizzle'])
op('sync', 0, None, Props.NO_DEST, ['enum tgl_sync_function op'])
op('schedule_barrier', 0, None, Props.NO_DEST)
for n in ['brd', 'illegal', 'goto', 'join', 'if', 'else',
'endif', 'while', 'break', 'cont', 'call', 'calla', 'jmpi', 'ret',

View file

@ -250,6 +250,8 @@ validate_inst(struct validate_state *validate, jay_inst *I)
if (I->op == JAY_OPCODE_SEL) {
CHECK(jay_is_flag(I->src[2]) && "SEL src[2] (selector) must be a flag");
} else if (I->op == JAY_OPCODE_SYNC) {
CHECK(validate->post_ra && "SYNC does not exist while scheduling");
}
}