From 6c142f7edcd7589d4433f2e2de19bd5be308ea4d Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 8 May 2026 15:21:01 -0700 Subject: [PATCH] jay: Implement sample mask writes Part-of: --- src/compiler/nir/nir_intrinsics.py | 2 +- src/intel/compiler/jay/jay_from_nir.c | 14 ++++++++++++-- src/intel/compiler/jay/jay_nir.c | 1 - src/intel/compiler/jay/jay_opcodes.py | 3 +++ src/intel/compiler/jay/jay_opt_propagate.c | 3 ++- src/intel/compiler/jay/jay_to_binary.c | 5 +++++ 6 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 1c1ee70c6b1..2bcbcca5a37 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -2662,7 +2662,7 @@ intrinsic("load_reloc_const_intel", dest_comp=1, bit_sizes=[32], # Write a render target # src[] = { color, src0_alpha, omask, depth, stencil, predicate } -intrinsic("store_render_target_intel", [4, 1, 1, 1, 1, 1], indices=[TARGET], bit_sizes=[32, 32, 16, 32, 32, 1]) +intrinsic("store_render_target_intel", [4, 1, 1, 1, 1, 1], indices=[TARGET], bit_sizes=[32, 32, 32, 32, 32, 1]) # Shuffle with an offset in bytes instead of a lane index. # src[] = { payload, lane offset in bytes } diff --git a/src/intel/compiler/jay/jay_from_nir.c b/src/intel/compiler/jay/jay_from_nir.c index ab233b769a6..9c359f5fad5 100644 --- a/src/intel/compiler/jay/jay_from_nir.c +++ b/src/intel/compiler/jay/jay_from_nir.c @@ -809,7 +809,16 @@ jay_emit_fb_write(jay_builder *b, nir_intrinsic_instr *intr) if (!jay_is_null(src0_alpha)) srcs[len++] = jay_as_gpr(b, src0_alpha); - assert(jay_is_null(omask) && "TODO: samplemask"); + if (!jay_is_null(omask)) { + jay_def packed = jay_alloc_def(b, UGPR, jay_ugpr_per_grf(b->shader)); + jay_WORD_PACK(b, packed, omask); + + for (unsigned i = 0; i < jay_num_values(packed); i++) + srcs[len++] = jay_extract(packed, i); + + /* Split send after omask due to file difference */ + split = len; + } for (unsigned i = 0; i < 4; i++) srcs[len++] = jay_as_gpr(b, jay_extract(colour, i)); @@ -821,7 +830,8 @@ jay_emit_fb_write(jay_builder *b, nir_intrinsic_instr *intr) jay_def packed = jay_alloc_def(b, UGPR, jay_ugpr_per_grf(b->shader)); jay_BYTE_PACK(b, packed, jay_as_gpr(b, stencil)); - /* Split send before stencil */ + /* Split send before stencil due to file difference */ + assert(split == -1 && "TODO: samplemask and stencil outputs together"); split = len; for (unsigned i = 0; i < jay_num_values(packed); i++) diff --git a/src/intel/compiler/jay/jay_nir.c b/src/intel/compiler/jay/jay_nir.c index e3d4b588616..98e9f2f735a 100644 --- a/src/intel/compiler/jay/jay_nir.c +++ b/src/intel/compiler/jay/jay_nir.c @@ -153,7 +153,6 @@ collect_fragment_output(nir_builder *b, nir_intrinsic_instr *intr, void *ctx_) } else if (loc == FRAG_RESULT_STENCIL) { out = &ctx->stencil; } else if (loc == FRAG_RESULT_SAMPLE_MASK) { - UNREACHABLE("todo"); out = &ctx->sample_mask; } else { UNREACHABLE("invalid location"); diff --git a/src/intel/compiler/jay/jay_opcodes.py b/src/intel/compiler/jay/jay_opcodes.py index 548557ae3c5..82a9368850d 100644 --- a/src/intel/compiler/jay/jay_opcodes.py +++ b/src/intel/compiler/jay/jay_opcodes.py @@ -172,6 +172,9 @@ op('and_u32_u16', 2, 'u32') # This is used for stencil outputs in render target write messages. op('byte_pack', 1, 'u32') +# Similar to byte_pack, but for words +op('word_pack', 1, 'u32') + # Pixel coord calculations. expand_quad replicates out the per-2x2 values from # its source g0.[10...13] and - in the case of SIMD32 - g1.[10...13] into a # per-lane value. Then offset_packed_pixel_coords adds the appropriate packed diff --git a/src/intel/compiler/jay/jay_opt_propagate.c b/src/intel/compiler/jay/jay_opt_propagate.c index cf37c8369f6..8d3bea4f066 100644 --- a/src/intel/compiler/jay/jay_opt_propagate.c +++ b/src/intel/compiler/jay/jay_opt_propagate.c @@ -155,7 +155,8 @@ propagate_forwards(jay_function *f) /* Don't propagate into phis yet - TODO: File awareness */ if (I->op == JAY_OPCODE_PHI_SRC || I->op == JAY_OPCODE_SEND || - I->op == JAY_OPCODE_BYTE_PACK) + I->op == JAY_OPCODE_BYTE_PACK || + I->op == JAY_OPCODE_WORD_PACK) continue; jay_foreach_ssa_src(I, s) { diff --git a/src/intel/compiler/jay/jay_to_binary.c b/src/intel/compiler/jay/jay_to_binary.c index 6fdfef4780c..7ebfd7575a8 100644 --- a/src/intel/compiler/jay/jay_to_binary.c +++ b/src/intel/compiler/jay/jay_to_binary.c @@ -493,6 +493,11 @@ emit(struct brw_codegen *p, stride(retype(SRC(0), BRW_TYPE_UB), 4, 1, 0)); break; + case JAY_OPCODE_WORD_PACK: + brw_set_default_exec_size(p, util_logbase2(2 * exec_size)); + brw_MOV(p, retype(dst, BRW_TYPE_UW), subscript(SRC(0), BRW_TYPE_UW, 0)); + break; + case JAY_OPCODE_SHR_ODD_SUBSPANS_BY_4: brw_SHR(p, dst, SRC(0), brw_imm_uv(0x44440000)); break;