mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-12 16:08:19 +02:00
jay: replace BYTE/WORD_PACK with a simple MOV
Equivalent now that the IR allows it. For the dynamic case: < (32&W) mov.u16 g0, g38<16,8,2> │ I@1 --- > (32&W) mov.u16 g0, g38<2> │ I@1 For the constant case it's actually better since copyprop can see through it: < (1&W) mov.u32 u0.0, 0xaaaaaaaa │ < (32&W) mov.u16 g1, u0.0 │ I@1 --- > (32&W) mov.u16 g0, 0xaaaa │ Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41872>
This commit is contained in:
parent
6bf1dc6a48
commit
87ce33bbf3
4 changed files with 5 additions and 28 deletions
|
|
@ -816,8 +816,8 @@ jay_emit_fb_write(jay_builder *b, nir_intrinsic_instr *intr)
|
|||
srcs[len++] = jay_as_gpr(b, src0_alpha);
|
||||
|
||||
if (!jay_is_null(omask)) {
|
||||
jay_def packed = jay_alloc_def(b, UGPR, jay_ugpr_per_grf(b->shader));
|
||||
jay_WORD_PACK(b, packed, omask);
|
||||
jay_def packed = jay_alloc_def(b, UGPR, b->shader->dispatch_width / 2);
|
||||
jay_MOV(b, packed, omask)->type = JAY_TYPE_U16;
|
||||
|
||||
for (unsigned i = 0; i < jay_num_values(packed); i++)
|
||||
srcs[len++] = jay_extract(packed, i);
|
||||
|
|
@ -833,8 +833,8 @@ jay_emit_fb_write(jay_builder *b, nir_intrinsic_instr *intr)
|
|||
srcs[len++] = jay_as_gpr(b, depth);
|
||||
|
||||
if (!jay_is_null(stencil)) {
|
||||
jay_def packed = jay_alloc_def(b, UGPR, jay_ugpr_per_grf(b->shader));
|
||||
jay_BYTE_PACK(b, packed, jay_as_gpr(b, stencil));
|
||||
jay_def packed = jay_alloc_def(b, UGPR, b->shader->dispatch_width / 4);
|
||||
jay_MOV(b, packed, stencil)->type = JAY_TYPE_U8;
|
||||
|
||||
/* Split send before stencil due to file difference */
|
||||
assert(split == -1 && "TODO: samplemask and stencil outputs together");
|
||||
|
|
|
|||
|
|
@ -167,15 +167,6 @@ op('extract_byte_per_8lanes', 2, 'u32')
|
|||
op('shr_odd_subspans_by_4', 1, 'u16')
|
||||
op('and_u32_u16', 2, 'u32')
|
||||
|
||||
# Copy the first byte of each lane, treating the destination as if it were
|
||||
# effectively JAY_STRIDE_1 (which doesn't exist). Because the destination
|
||||
# doesn't follow proper lane alignments, this should not write to GPRs.
|
||||
# This is used for stencil outputs in render target write messages.
|
||||
op('byte_pack', 1, 'u32')
|
||||
|
||||
# Similar to byte_pack, but for words
|
||||
op('word_pack', 1, 'u32')
|
||||
|
||||
# Pixel coord calculations. expand_quad replicates out the per-2x2 values from
|
||||
# its source g0.[10...13] and - in the case of SIMD32 - g1.[10...13] into a
|
||||
# per-lane value. Then offset_packed_pixel_coords adds the appropriate packed
|
||||
|
|
|
|||
|
|
@ -153,10 +153,7 @@ propagate_forwards(jay_function *f)
|
|||
}
|
||||
|
||||
/* Don't propagate into phis yet - TODO: File awareness */
|
||||
if (I->op == JAY_OPCODE_PHI_SRC ||
|
||||
I->op == JAY_OPCODE_SEND ||
|
||||
I->op == JAY_OPCODE_BYTE_PACK ||
|
||||
I->op == JAY_OPCODE_WORD_PACK)
|
||||
if (I->op == JAY_OPCODE_PHI_SRC || I->op == JAY_OPCODE_SEND)
|
||||
continue;
|
||||
|
||||
jay_foreach_ssa_src(I, s) {
|
||||
|
|
|
|||
|
|
@ -650,17 +650,6 @@ emit(struct jay_codegen *jc,
|
|||
break;
|
||||
}
|
||||
|
||||
case JAY_OPCODE_BYTE_PACK:
|
||||
jc_MOV(jc, gen_restride(gen_retype(dst, GEN_TYPE_UB), 1, 1, 0),
|
||||
gen_restride(gen_retype(SRC(0), GEN_TYPE_UB), 4, 1, 0));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_WORD_PACK:
|
||||
jc->state.exec_size = 2 * exec_size;
|
||||
jc_MOV(jc, gen_retype(dst, GEN_TYPE_UW),
|
||||
gen_subscript(jc->devinfo, SRC(0), GEN_TYPE_UW, 0));
|
||||
break;
|
||||
|
||||
case JAY_OPCODE_SHR_ODD_SUBSPANS_BY_4:
|
||||
jc_append2(GEN_OP_SHR, dst, SRC(0), gen_imm_uv(0x44440000));
|
||||
break;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue