From 3da4653d46f5efa704e59b472bc6e61ac196ce1b Mon Sep 17 00:00:00 2001 From: Sagar Ghuge Date: Sat, 6 Jun 2026 10:00:55 -0700 Subject: [PATCH] jay: Implement halt Halt needs to be always in pair. First halt issued will mask off active channels and second one will basically re-enable those masked off channels. Signed-off-by: Sagar Ghuge Part-of: --- src/intel/compiler/jay/jay_from_nir.c | 27 ++++++++++++++++++++++---- src/intel/compiler/jay/jay_opcodes.py | 2 +- src/intel/compiler/jay/jay_to_binary.c | 21 ++++++++++++++++++-- 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/src/intel/compiler/jay/jay_from_nir.c b/src/intel/compiler/jay/jay_from_nir.c index 8a04a571f73..de5d144a914 100644 --- a/src/intel/compiler/jay/jay_from_nir.c +++ b/src/intel/compiler/jay/jay_from_nir.c @@ -88,6 +88,7 @@ struct nir_to_jay_state { jay_block *break_block; unsigned indent; + bool needs_final_halt; /* We cache ballot(true), ctz(ballot(true)), and 4*ctz(ballot(true)) within a * block. If we had competent backend CSE - or emitted uniformize in NIR and @@ -777,8 +778,22 @@ scalars_equal(nir_scalar a, nir_scalar b) } static void -jay_emit_fb_write(jay_builder *b, nir_intrinsic_instr *intr) +jay_emit_halt_target(struct nir_to_jay_state *nj) { + /* This final halt will re-enable the channels which got masked off by first + * HALT. + */ + if (nj->needs_final_halt) { + /* This avoids re-emitting the halt after EOT send */ + nj->needs_final_halt = false; + jay_HALT_TARGET(&nj->bld); + } +} + +static void +jay_emit_fb_write(struct nir_to_jay_state *nj, nir_intrinsic_instr *intr) +{ + jay_builder *b = &nj->bld; const struct intel_device_info *devinfo = b->shader->devinfo; jay_def colour = nj_src(intr->src[0]); jay_def dual_colour = jay_null(); @@ -790,6 +805,8 @@ jay_emit_fb_write(jay_builder *b, nir_intrinsic_instr *intr) const int target = MAX2(((signed) nir_intrinsic_target(intr)), 0); const bool last = !nir_instr_next(&intr->instr); + jay_emit_halt_target(nj); + /* The hardware freaks out if we give it an omask without multisampling. */ if (!b->shader->prog_data->fs.uses_omask) { omask = jay_null(); @@ -1449,7 +1466,7 @@ jay_emit_intrinsic(struct nir_to_jay_state *nj, nir_intrinsic_instr *intr) case nir_intrinsic_store_render_target_intel: assert(nj->nir->info.stage == MESA_SHADER_FRAGMENT); - jay_emit_fb_write(b, intr); + jay_emit_fb_write(nj, intr); break; case nir_intrinsic_shader_clock: @@ -2380,8 +2397,8 @@ jay_emit_jump(struct nir_to_jay_state *nj, nir_jump_instr *instr) jay_BREAK(&nj->bld); break; case nir_jump_halt: - // TODO: Do we want a predicated EOT here, or a jump to the end? - assert(!"TODO: implement HALT"); + nj->needs_final_halt = true; + jay_HALT(&nj->bld); break; case nir_jump_return: /* Should be lowered */ @@ -2648,6 +2665,8 @@ jay_emit_eot(struct nir_to_jay_state *nj) { jay_builder *b = &nj->bld; + jay_emit_halt_target(nj); + if (mesa_shader_stage_is_compute(nj->nir->info.stage)) { jay_def u0 = nj->payload.u0; diff --git a/src/intel/compiler/jay/jay_opcodes.py b/src/intel/compiler/jay/jay_opcodes.py index 3dbb78ee33f..995dd8fcfc5 100644 --- a/src/intel/compiler/jay/jay_opcodes.py +++ b/src/intel/compiler/jay/jay_opcodes.py @@ -134,7 +134,7 @@ op('schedule_barrier', 0, None, Props.NO_DEST) for n in ['brd', 'illegal', 'goto', 'join', 'if', 'else', 'endif', 'while', 'break', 'cont', 'call', 'calla', 'jmpi', 'ret', - 'loop_once']: + 'loop_once', 'halt', 'halt_target']: op(n, 0, None, Props.NO_DEST) op('send', 4, None, Props.SIDE_EFFECTS, [ diff --git a/src/intel/compiler/jay/jay_to_binary.c b/src/intel/compiler/jay/jay_to_binary.c index 7edccf3b70e..018b94e0e65 100644 --- a/src/intel/compiler/jay/jay_to_binary.c +++ b/src/intel/compiler/jay/jay_to_binary.c @@ -61,6 +61,9 @@ struct jay_codegen { /* struct intel_shader_reloc */ struct util_dynarray relocs; + + /* Index of the final HALT instruction, or -1 if none has been emitted yet. */ + int final_halt_offset; }; static inline gen_operand @@ -264,6 +267,7 @@ static const struct { OP(FBH, FBH, 1), OP(FBL, FBL, 1), OP(FRC, FRC, 1), + OP(HALT, HALT, 0), OP(IF, IF, 0), OP(LANE_ID_8, MOV, 0), OP(LZD, LZD, 1), @@ -575,6 +579,19 @@ emit(struct jay_codegen *jc, } break; + case JAY_OPCODE_HALT_TARGET: + /* HALT temporarily disables channels, and the same instruction is used + * to re-enable them: once all channels are disabled, then they are + * re-enabled again immediately. + * + * So put a HALT right before the "epilogue" of the shader to make sure + * all channels get HALTed, so that this last HALT will re-enable them + * again. + */ + jc->final_halt_offset = jc->num_insts - 1; + gen->opcode = GEN_OP_HALT; + break; + default: break; } @@ -632,6 +649,7 @@ jay_to_binary(jay_shader *s, .insts = rzalloc_array(mem_ctx, gen_inst, total_gen_insts), .insts_cap = total_gen_insts, .output = rzalloc_size(bin, output_capacity), + .final_halt_offset = -1, }; util_dynarray_init(&jc.loop_stack, mem_ctx); util_dynarray_init(&jc.relocs, bin); @@ -672,8 +690,7 @@ jay_to_binary(jay_shader *s, gen->dst.region.hstride = 1; } - int final_halt_offset = -1 /* TODO */; - gen_finish_structured_cf(jc.insts, jc.num_insts, final_halt_offset); + gen_finish_structured_cf(jc.insts, jc.num_insts, jc.final_halt_offset); const unsigned num_relocs = util_dynarray_num_elements(&jc.relocs, struct intel_shader_reloc);