mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 13:38:06 +02:00
jay: move deswizzle hack outside of swsb
this will eventually enable better swsb for the simd32 payload code. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40960>
This commit is contained in:
parent
48a24f3c27
commit
6925d9ee23
7 changed files with 39 additions and 39 deletions
|
|
@ -3062,27 +3062,7 @@ setup_fragment_payload(struct nir_to_jay_state *nj, struct payload_builder *p)
|
|||
}
|
||||
|
||||
b->cursor = jay_before_block(nj->after_block);
|
||||
unsigned size = p->offsets[GPR];
|
||||
|
||||
/* Odd: copy both halves to contiguous pair after payload */
|
||||
for (unsigned i = 1; i < size; i += 2) {
|
||||
jay_DESWIZZLE_16(b, size + size + i + 1, 2 + i);
|
||||
jay_DESWIZZLE_16(b, size + size + i + 2, 2 + i + size);
|
||||
}
|
||||
|
||||
/* Even: leave the bottom half in place, copy top half. If size=1 (rare
|
||||
* but possible), this would be a no-op move so skip it.
|
||||
*/
|
||||
if (size > 1) {
|
||||
for (unsigned i = 0; i < size; i += 2) {
|
||||
jay_inst *I = jay_DESWIZZLE_16(b, 2 + i + 1, 2 + size + i);
|
||||
|
||||
/* Stall in between to avoid a write-after-read hazard */
|
||||
if (i == 0) {
|
||||
I->dep = (struct tgl_swsb) { 1, TGL_PIPE_INT };
|
||||
}
|
||||
}
|
||||
}
|
||||
jay_DESWIZZLE(b, p->offsets[GPR]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1004,7 +1004,8 @@ jay_is_no_mask(const jay_inst *I)
|
|||
return jay_inst_is_uniform(I) ||
|
||||
I->broadcast_flag ||
|
||||
I->op == JAY_OPCODE_QUAD_SWIZZLE ||
|
||||
I->op == JAY_OPCODE_DESWIZZLE_16 ||
|
||||
I->op == JAY_OPCODE_DESWIZZLE_EVEN ||
|
||||
I->op == JAY_OPCODE_DESWIZZLE_ODD ||
|
||||
I->op == JAY_OPCODE_OFFSET_PACKED_PIXEL_COORDS ||
|
||||
I->op == JAY_OPCODE_LANE_ID_8 ||
|
||||
I->op == JAY_OPCODE_LANE_ID_EXPAND;
|
||||
|
|
|
|||
|
|
@ -125,6 +125,29 @@ lower(jay_builder *b, jay_inst *I)
|
|||
return true;
|
||||
}
|
||||
|
||||
case JAY_OPCODE_DESWIZZLE: {
|
||||
unsigned size = jay_deswizzle_size(I);
|
||||
|
||||
/* Odd: copy both halves to contiguous pair after payload */
|
||||
for (unsigned i = 0; i < (size / 2); ++i) {
|
||||
jay_DESWIZZLE_ODD(b, jay_bare_reg(GPR, size + i), jay_bare_reg(GPR, i),
|
||||
jay_bare_reg(GPR, i + ((size + 1) / 2)),
|
||||
!(size & 1));
|
||||
}
|
||||
|
||||
/* Even: leave the bottom half in place, copy top half. If size=1 (rare
|
||||
* but possible), this would be a no-op move so skip it.
|
||||
*/
|
||||
if (size > 1) {
|
||||
for (unsigned i = 0; i < DIV_ROUND_UP(size, 2); ++i) {
|
||||
jay_DESWIZZLE_EVEN(b, jay_bare_reg(GPR, i),
|
||||
jay_bare_reg(GPR, (size / 2) + i), size & 1);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -191,7 +191,6 @@ lower_regdist_local(jay_function *func, jay_block *block, u32_per_pipe *access)
|
|||
{
|
||||
struct swsb_state state = { .access = access };
|
||||
jay_inst *last_sync = NULL;
|
||||
bool need_deswizzle_wait = false;
|
||||
|
||||
jay_foreach_inst_in_block_safe(block, I) {
|
||||
enum tgl_pipe exec_pipe = inst_exec_pipe(func->shader->devinfo, I);
|
||||
|
|
@ -199,18 +198,6 @@ lower_regdist_local(jay_function *func, jay_block *block, u32_per_pipe *access)
|
|||
if (I->op == JAY_OPCODE_SYNC) {
|
||||
last_sync = I;
|
||||
continue;
|
||||
} else if (I->op == JAY_OPCODE_DESWIZZLE_16) {
|
||||
need_deswizzle_wait = true;
|
||||
state.ip[TGL_PIPE_INT]++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Force a wait on the deswizzles at the start of the program. XXX: Is
|
||||
* there a cleaner way to deal with this?
|
||||
*/
|
||||
if (need_deswizzle_wait) {
|
||||
dep[TGL_PIPE_INT] = state.ip[TGL_PIPE_INT];
|
||||
need_deswizzle_wait = false;
|
||||
}
|
||||
|
||||
/* Write-after-{write, read} */
|
||||
|
|
|
|||
|
|
@ -145,7 +145,9 @@ op('send', 4, None, Props.SIDE_EFFECTS, [
|
|||
|
||||
op('reloc', 0, 'u32 u64', 0, ['unsigned param', 'unsigned base'])
|
||||
op('preload', 0, 'u32', 0, ['unsigned reg'])
|
||||
op('deswizzle_16', 0, 'u32', Props.NO_DEST, ['unsigned dst', 'unsigned src'])
|
||||
op('deswizzle', 0, 'u32', Props.NO_DEST, ['unsigned size'])
|
||||
op('deswizzle_odd', 2, 'u32', 0, ['bool src2_hi'])
|
||||
op('deswizzle_even', 1, 'u32', 0, ['bool src_hi'])
|
||||
|
||||
# Calculating the lane ID requires multiple power-of-two steps each involving
|
||||
# complex architectural features not modelled in the IR.
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ max_simd_width(const jay_shader *shader, const jay_inst *I)
|
|||
I->op == JAY_OPCODE_EXTRACT_LAYER ||
|
||||
I->op == JAY_OPCODE_EXTRACT_BYTE_PER_8LANES ||
|
||||
I->op == JAY_OPCODE_OFFSET_PACKED_PIXEL_COORDS ||
|
||||
I->op == JAY_OPCODE_DESWIZZLE_ODD ||
|
||||
I->op == JAY_OPCODE_MUL_32 ||
|
||||
I->op == JAY_OPCODE_SHUFFLE) {
|
||||
return 16;
|
||||
|
|
|
|||
|
|
@ -362,10 +362,16 @@ emit(struct brw_codegen *p,
|
|||
brw_BFN(p, dst, SRC(0), SRC(1), SRC(2), brw_imm_ud(jay_bfn_ctrl(I)));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_DESWIZZLE_16:
|
||||
case JAY_OPCODE_DESWIZZLE_ODD:
|
||||
bool hi = simd_offs ? true : jay_deswizzle_odd_src2_hi(I);
|
||||
brw_MOV(p, dst,
|
||||
byte_offset(to_brw_reg(f, I, simd_offs, 0, false), hi ? 64 : 0));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_DESWIZZLE_EVEN:
|
||||
brw_set_default_exec_size(p, BRW_EXECUTE_16);
|
||||
brw_MOV(p, retype(xe2_vec8_grf(jay_deswizzle_16_dst(I), 0), BRW_TYPE_UD),
|
||||
retype(xe2_vec8_grf(jay_deswizzle_16_src(I), 0), BRW_TYPE_UD));
|
||||
brw_MOV(p, byte_offset(dst, 64),
|
||||
byte_offset(SRC(0), jay_deswizzle_even_src_hi(I) * 64));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_CVT: {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue