jay: move deswizzle hack outside of swsb

this will eventually enable better swsb for the simd32 payload code.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40960>
This commit is contained in:
Alyssa Rosenzweig 2026-04-13 19:29:27 -04:00 committed by Marge Bot
parent 48a24f3c27
commit 6925d9ee23
7 changed files with 39 additions and 39 deletions

View file

@ -3062,27 +3062,7 @@ setup_fragment_payload(struct nir_to_jay_state *nj, struct payload_builder *p)
}
b->cursor = jay_before_block(nj->after_block);
unsigned size = p->offsets[GPR];
/* Odd: copy both halves to contiguous pair after payload */
for (unsigned i = 1; i < size; i += 2) {
jay_DESWIZZLE_16(b, size + size + i + 1, 2 + i);
jay_DESWIZZLE_16(b, size + size + i + 2, 2 + i + size);
}
/* Even: leave the bottom half in place, copy top half. If size=1 (rare
* but possible), this would be a no-op move so skip it.
*/
if (size > 1) {
for (unsigned i = 0; i < size; i += 2) {
jay_inst *I = jay_DESWIZZLE_16(b, 2 + i + 1, 2 + size + i);
/* Stall in between to avoid a write-after-read hazard */
if (i == 0) {
I->dep = (struct tgl_swsb) { 1, TGL_PIPE_INT };
}
}
}
jay_DESWIZZLE(b, p->offsets[GPR]);
}
}

View file

@ -1004,7 +1004,8 @@ jay_is_no_mask(const jay_inst *I)
return jay_inst_is_uniform(I) ||
I->broadcast_flag ||
I->op == JAY_OPCODE_QUAD_SWIZZLE ||
I->op == JAY_OPCODE_DESWIZZLE_16 ||
I->op == JAY_OPCODE_DESWIZZLE_EVEN ||
I->op == JAY_OPCODE_DESWIZZLE_ODD ||
I->op == JAY_OPCODE_OFFSET_PACKED_PIXEL_COORDS ||
I->op == JAY_OPCODE_LANE_ID_8 ||
I->op == JAY_OPCODE_LANE_ID_EXPAND;

View file

@ -125,6 +125,29 @@ lower(jay_builder *b, jay_inst *I)
return true;
}
case JAY_OPCODE_DESWIZZLE: {
unsigned size = jay_deswizzle_size(I);
/* Odd: copy both halves to contiguous pair after payload */
for (unsigned i = 0; i < (size / 2); ++i) {
jay_DESWIZZLE_ODD(b, jay_bare_reg(GPR, size + i), jay_bare_reg(GPR, i),
jay_bare_reg(GPR, i + ((size + 1) / 2)),
!(size & 1));
}
/* Even: leave the bottom half in place, copy top half. If size=1 (rare
* but possible), this would be a no-op move so skip it.
*/
if (size > 1) {
for (unsigned i = 0; i < DIV_ROUND_UP(size, 2); ++i) {
jay_DESWIZZLE_EVEN(b, jay_bare_reg(GPR, i),
jay_bare_reg(GPR, (size / 2) + i), size & 1);
}
}
return true;
}
default:
return false;
}

View file

@ -191,7 +191,6 @@ lower_regdist_local(jay_function *func, jay_block *block, u32_per_pipe *access)
{
struct swsb_state state = { .access = access };
jay_inst *last_sync = NULL;
bool need_deswizzle_wait = false;
jay_foreach_inst_in_block_safe(block, I) {
enum tgl_pipe exec_pipe = inst_exec_pipe(func->shader->devinfo, I);
@ -199,18 +198,6 @@ lower_regdist_local(jay_function *func, jay_block *block, u32_per_pipe *access)
if (I->op == JAY_OPCODE_SYNC) {
last_sync = I;
continue;
} else if (I->op == JAY_OPCODE_DESWIZZLE_16) {
need_deswizzle_wait = true;
state.ip[TGL_PIPE_INT]++;
continue;
}
/* Force a wait on the deswizzles at the start of the program. XXX: Is
* there a cleaner way to deal with this?
*/
if (need_deswizzle_wait) {
dep[TGL_PIPE_INT] = state.ip[TGL_PIPE_INT];
need_deswizzle_wait = false;
}
/* Write-after-{write, read} */

View file

@ -145,7 +145,9 @@ op('send', 4, None, Props.SIDE_EFFECTS, [
op('reloc', 0, 'u32 u64', 0, ['unsigned param', 'unsigned base'])
op('preload', 0, 'u32', 0, ['unsigned reg'])
op('deswizzle_16', 0, 'u32', Props.NO_DEST, ['unsigned dst', 'unsigned src'])
op('deswizzle', 0, 'u32', Props.NO_DEST, ['unsigned size'])
op('deswizzle_odd', 2, 'u32', 0, ['bool src2_hi'])
op('deswizzle_even', 1, 'u32', 0, ['bool src_hi'])
# Calculating the lane ID requires multiple power-of-two steps each involving
# complex architectural features not modelled in the IR.

View file

@ -21,6 +21,7 @@ max_simd_width(const jay_shader *shader, const jay_inst *I)
I->op == JAY_OPCODE_EXTRACT_LAYER ||
I->op == JAY_OPCODE_EXTRACT_BYTE_PER_8LANES ||
I->op == JAY_OPCODE_OFFSET_PACKED_PIXEL_COORDS ||
I->op == JAY_OPCODE_DESWIZZLE_ODD ||
I->op == JAY_OPCODE_MUL_32 ||
I->op == JAY_OPCODE_SHUFFLE) {
return 16;

View file

@ -362,10 +362,16 @@ emit(struct brw_codegen *p,
brw_BFN(p, dst, SRC(0), SRC(1), SRC(2), brw_imm_ud(jay_bfn_ctrl(I)));
break;
case JAY_OPCODE_DESWIZZLE_16:
case JAY_OPCODE_DESWIZZLE_ODD:
bool hi = simd_offs ? true : jay_deswizzle_odd_src2_hi(I);
brw_MOV(p, dst,
byte_offset(to_brw_reg(f, I, simd_offs, 0, false), hi ? 64 : 0));
break;
case JAY_OPCODE_DESWIZZLE_EVEN:
brw_set_default_exec_size(p, BRW_EXECUTE_16);
brw_MOV(p, retype(xe2_vec8_grf(jay_deswizzle_16_dst(I), 0), BRW_TYPE_UD),
retype(xe2_vec8_grf(jay_deswizzle_16_src(I), 0), BRW_TYPE_UD));
brw_MOV(p, byte_offset(dst, 64),
byte_offset(SRC(0), jay_deswizzle_even_src_hi(I) * 64));
break;
case JAY_OPCODE_CVT: {