jay: Implement halt
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Halt needs to be always in pair. First halt issued will mask off active
channels and second one will basically re-enable those masked off
channels.

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/42067>
This commit is contained in:
Sagar Ghuge 2026-06-06 10:00:55 -07:00 committed by Marge Bot
parent 36f3175187
commit 3da4653d46
3 changed files with 43 additions and 7 deletions

View file

@ -88,6 +88,7 @@ struct nir_to_jay_state {
jay_block *break_block;
unsigned indent;
bool needs_final_halt;
/* We cache ballot(true), ctz(ballot(true)), and 4*ctz(ballot(true)) within a
* block. If we had competent backend CSE - or emitted uniformize in NIR and
@ -777,8 +778,22 @@ scalars_equal(nir_scalar a, nir_scalar b)
}
static void
jay_emit_fb_write(jay_builder *b, nir_intrinsic_instr *intr)
jay_emit_halt_target(struct nir_to_jay_state *nj)
{
/* This final halt will re-enable the channels which got masked off by first
* HALT.
*/
if (nj->needs_final_halt) {
/* This avoids re-emitting the halt after EOT send */
nj->needs_final_halt = false;
jay_HALT_TARGET(&nj->bld);
}
}
static void
jay_emit_fb_write(struct nir_to_jay_state *nj, nir_intrinsic_instr *intr)
{
jay_builder *b = &nj->bld;
const struct intel_device_info *devinfo = b->shader->devinfo;
jay_def colour = nj_src(intr->src[0]);
jay_def dual_colour = jay_null();
@ -790,6 +805,8 @@ jay_emit_fb_write(jay_builder *b, nir_intrinsic_instr *intr)
const int target = MAX2(((signed) nir_intrinsic_target(intr)), 0);
const bool last = !nir_instr_next(&intr->instr);
jay_emit_halt_target(nj);
/* The hardware freaks out if we give it an omask without multisampling. */
if (!b->shader->prog_data->fs.uses_omask) {
omask = jay_null();
@ -1449,7 +1466,7 @@ jay_emit_intrinsic(struct nir_to_jay_state *nj, nir_intrinsic_instr *intr)
case nir_intrinsic_store_render_target_intel:
assert(nj->nir->info.stage == MESA_SHADER_FRAGMENT);
jay_emit_fb_write(b, intr);
jay_emit_fb_write(nj, intr);
break;
case nir_intrinsic_shader_clock:
@ -2380,8 +2397,8 @@ jay_emit_jump(struct nir_to_jay_state *nj, nir_jump_instr *instr)
jay_BREAK(&nj->bld);
break;
case nir_jump_halt:
// TODO: Do we want a predicated EOT here, or a jump to the end?
assert(!"TODO: implement HALT");
nj->needs_final_halt = true;
jay_HALT(&nj->bld);
break;
case nir_jump_return:
/* Should be lowered */
@ -2648,6 +2665,8 @@ jay_emit_eot(struct nir_to_jay_state *nj)
{
jay_builder *b = &nj->bld;
jay_emit_halt_target(nj);
if (mesa_shader_stage_is_compute(nj->nir->info.stage)) {
jay_def u0 = nj->payload.u0;

View file

@ -134,7 +134,7 @@ op('schedule_barrier', 0, None, Props.NO_DEST)
for n in ['brd', 'illegal', 'goto', 'join', 'if', 'else',
'endif', 'while', 'break', 'cont', 'call', 'calla', 'jmpi', 'ret',
'loop_once']:
'loop_once', 'halt', 'halt_target']:
op(n, 0, None, Props.NO_DEST)
op('send', 4, None, Props.SIDE_EFFECTS, [

View file

@ -61,6 +61,9 @@ struct jay_codegen {
/* struct intel_shader_reloc */
struct util_dynarray relocs;
/* Index of the final HALT instruction, or -1 if none has been emitted yet. */
int final_halt_offset;
};
static inline gen_operand
@ -264,6 +267,7 @@ static const struct {
OP(FBH, FBH, 1),
OP(FBL, FBL, 1),
OP(FRC, FRC, 1),
OP(HALT, HALT, 0),
OP(IF, IF, 0),
OP(LANE_ID_8, MOV, 0),
OP(LZD, LZD, 1),
@ -575,6 +579,19 @@ emit(struct jay_codegen *jc,
}
break;
case JAY_OPCODE_HALT_TARGET:
/* HALT temporarily disables channels, and the same instruction is used
* to re-enable them: once all channels are disabled, then they are
* re-enabled again immediately.
*
* So put a HALT right before the "epilogue" of the shader to make sure
* all channels get HALTed, so that this last HALT will re-enable them
* again.
*/
jc->final_halt_offset = jc->num_insts - 1;
gen->opcode = GEN_OP_HALT;
break;
default:
break;
}
@ -632,6 +649,7 @@ jay_to_binary(jay_shader *s,
.insts = rzalloc_array(mem_ctx, gen_inst, total_gen_insts),
.insts_cap = total_gen_insts,
.output = rzalloc_size(bin, output_capacity),
.final_halt_offset = -1,
};
util_dynarray_init(&jc.loop_stack, mem_ctx);
util_dynarray_init(&jc.relocs, bin);
@ -672,8 +690,7 @@ jay_to_binary(jay_shader *s,
gen->dst.region.hstride = 1;
}
int final_halt_offset = -1 /* TODO */;
gen_finish_structured_cf(jc.insts, jc.num_insts, final_halt_offset);
gen_finish_structured_cf(jc.insts, jc.num_insts, jc.final_halt_offset);
const unsigned num_relocs =
util_dynarray_num_elements(&jc.relocs, struct intel_shader_reloc);