diff --git a/src/intel/compiler/jay/jay_from_nir.c b/src/intel/compiler/jay/jay_from_nir.c index 64f64a78116..a194586a86d 100644 --- a/src/intel/compiler/jay/jay_from_nir.c +++ b/src/intel/compiler/jay/jay_from_nir.c @@ -3062,27 +3062,7 @@ setup_fragment_payload(struct nir_to_jay_state *nj, struct payload_builder *p) } b->cursor = jay_before_block(nj->after_block); - unsigned size = p->offsets[GPR]; - - /* Odd: copy both halves to contiguous pair after payload */ - for (unsigned i = 1; i < size; i += 2) { - jay_DESWIZZLE_16(b, size + size + i + 1, 2 + i); - jay_DESWIZZLE_16(b, size + size + i + 2, 2 + i + size); - } - - /* Even: leave the bottom half in place, copy top half. If size=1 (rare - * but possible), this would be a no-op move so skip it. - */ - if (size > 1) { - for (unsigned i = 0; i < size; i += 2) { - jay_inst *I = jay_DESWIZZLE_16(b, 2 + i + 1, 2 + size + i); - - /* Stall in between to avoid a write-after-read hazard */ - if (i == 0) { - I->dep = (struct tgl_swsb) { 1, TGL_PIPE_INT }; - } - } - } + jay_DESWIZZLE(b, p->offsets[GPR]); } } diff --git a/src/intel/compiler/jay/jay_ir.h b/src/intel/compiler/jay/jay_ir.h index 2d0a9037be7..8307044ef8a 100644 --- a/src/intel/compiler/jay/jay_ir.h +++ b/src/intel/compiler/jay/jay_ir.h @@ -1004,7 +1004,8 @@ jay_is_no_mask(const jay_inst *I) return jay_inst_is_uniform(I) || I->broadcast_flag || I->op == JAY_OPCODE_QUAD_SWIZZLE || - I->op == JAY_OPCODE_DESWIZZLE_16 || + I->op == JAY_OPCODE_DESWIZZLE_EVEN || + I->op == JAY_OPCODE_DESWIZZLE_ODD || I->op == JAY_OPCODE_OFFSET_PACKED_PIXEL_COORDS || I->op == JAY_OPCODE_LANE_ID_8 || I->op == JAY_OPCODE_LANE_ID_EXPAND; diff --git a/src/intel/compiler/jay/jay_lower_post_ra.c b/src/intel/compiler/jay/jay_lower_post_ra.c index db8661b011d..945e9cceaf9 100644 --- a/src/intel/compiler/jay/jay_lower_post_ra.c +++ b/src/intel/compiler/jay/jay_lower_post_ra.c @@ -125,6 +125,29 @@ lower(jay_builder *b, jay_inst *I) return true; } + case JAY_OPCODE_DESWIZZLE: { + unsigned size = jay_deswizzle_size(I); + + /* Odd: copy both halves to contiguous pair after payload */ + for (unsigned i = 0; i < (size / 2); ++i) { + jay_DESWIZZLE_ODD(b, jay_bare_reg(GPR, size + i), jay_bare_reg(GPR, i), + jay_bare_reg(GPR, i + ((size + 1) / 2)), + !(size & 1)); + } + + /* Even: leave the bottom half in place, copy top half. If size=1 (rare + * but possible), this would be a no-op move so skip it. + */ + if (size > 1) { + for (unsigned i = 0; i < DIV_ROUND_UP(size, 2); ++i) { + jay_DESWIZZLE_EVEN(b, jay_bare_reg(GPR, i), + jay_bare_reg(GPR, (size / 2) + i), size & 1); + } + } + + return true; + } + default: return false; } diff --git a/src/intel/compiler/jay/jay_lower_scoreboard.c b/src/intel/compiler/jay/jay_lower_scoreboard.c index 06244e17252..dafac9d130b 100644 --- a/src/intel/compiler/jay/jay_lower_scoreboard.c +++ b/src/intel/compiler/jay/jay_lower_scoreboard.c @@ -191,7 +191,6 @@ lower_regdist_local(jay_function *func, jay_block *block, u32_per_pipe *access) { struct swsb_state state = { .access = access }; jay_inst *last_sync = NULL; - bool need_deswizzle_wait = false; jay_foreach_inst_in_block_safe(block, I) { enum tgl_pipe exec_pipe = inst_exec_pipe(func->shader->devinfo, I); @@ -199,18 +198,6 @@ lower_regdist_local(jay_function *func, jay_block *block, u32_per_pipe *access) if (I->op == JAY_OPCODE_SYNC) { last_sync = I; continue; - } else if (I->op == JAY_OPCODE_DESWIZZLE_16) { - need_deswizzle_wait = true; - state.ip[TGL_PIPE_INT]++; - continue; - } - - /* Force a wait on the deswizzles at the start of the program. XXX: Is - * there a cleaner way to deal with this? - */ - if (need_deswizzle_wait) { - dep[TGL_PIPE_INT] = state.ip[TGL_PIPE_INT]; - need_deswizzle_wait = false; } /* Write-after-{write, read} */ diff --git a/src/intel/compiler/jay/jay_opcodes.py b/src/intel/compiler/jay/jay_opcodes.py index 928d1e90b04..448da0768f4 100644 --- a/src/intel/compiler/jay/jay_opcodes.py +++ b/src/intel/compiler/jay/jay_opcodes.py @@ -145,7 +145,9 @@ op('send', 4, None, Props.SIDE_EFFECTS, [ op('reloc', 0, 'u32 u64', 0, ['unsigned param', 'unsigned base']) op('preload', 0, 'u32', 0, ['unsigned reg']) -op('deswizzle_16', 0, 'u32', Props.NO_DEST, ['unsigned dst', 'unsigned src']) +op('deswizzle', 0, 'u32', Props.NO_DEST, ['unsigned size']) +op('deswizzle_odd', 2, 'u32', 0, ['bool src2_hi']) +op('deswizzle_even', 1, 'u32', 0, ['bool src_hi']) # Calculating the lane ID requires multiple power-of-two steps each involving # complex architectural features not modelled in the IR. diff --git a/src/intel/compiler/jay/jay_simd_width.c b/src/intel/compiler/jay/jay_simd_width.c index 41adb3859bc..0c0fcfdb77c 100644 --- a/src/intel/compiler/jay/jay_simd_width.c +++ b/src/intel/compiler/jay/jay_simd_width.c @@ -21,6 +21,7 @@ max_simd_width(const jay_shader *shader, const jay_inst *I) I->op == JAY_OPCODE_EXTRACT_LAYER || I->op == JAY_OPCODE_EXTRACT_BYTE_PER_8LANES || I->op == JAY_OPCODE_OFFSET_PACKED_PIXEL_COORDS || + I->op == JAY_OPCODE_DESWIZZLE_ODD || I->op == JAY_OPCODE_MUL_32 || I->op == JAY_OPCODE_SHUFFLE) { return 16; diff --git a/src/intel/compiler/jay/jay_to_binary.c b/src/intel/compiler/jay/jay_to_binary.c index 1dbdf82e0fc..2742ca37a09 100644 --- a/src/intel/compiler/jay/jay_to_binary.c +++ b/src/intel/compiler/jay/jay_to_binary.c @@ -362,10 +362,16 @@ emit(struct brw_codegen *p, brw_BFN(p, dst, SRC(0), SRC(1), SRC(2), brw_imm_ud(jay_bfn_ctrl(I))); break; - case JAY_OPCODE_DESWIZZLE_16: + case JAY_OPCODE_DESWIZZLE_ODD: + bool hi = simd_offs ? true : jay_deswizzle_odd_src2_hi(I); + brw_MOV(p, dst, + byte_offset(to_brw_reg(f, I, simd_offs, 0, false), hi ? 64 : 0)); + break; + + case JAY_OPCODE_DESWIZZLE_EVEN: brw_set_default_exec_size(p, BRW_EXECUTE_16); - brw_MOV(p, retype(xe2_vec8_grf(jay_deswizzle_16_dst(I), 0), BRW_TYPE_UD), - retype(xe2_vec8_grf(jay_deswizzle_16_src(I), 0), BRW_TYPE_UD)); + brw_MOV(p, byte_offset(dst, 64), + byte_offset(SRC(0), jay_deswizzle_even_src_hi(I) * 64)); break; case JAY_OPCODE_CVT: {