mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-01 22:40:09 +01:00
aco/lower_to_hw: fix 16bit p_insert on gfx8
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28881>
This commit is contained in:
parent
bb80ac7a70
commit
47d824a644
2 changed files with 51 additions and 15 deletions
|
|
@ -2683,9 +2683,22 @@ lower_to_hw_instr(Program* program)
|
|||
}
|
||||
} else {
|
||||
assert(dst.regClass() == v2b);
|
||||
bld.vop2_sdwa(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), op)
|
||||
->sdwa()
|
||||
.sel[1] = SubdwordSel::ubyte;
|
||||
if (!offset) {
|
||||
bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op)->sdwa().sel[0] =
|
||||
SubdwordSel::ubyte;
|
||||
} else if (program->gfx_level >= GFX9) {
|
||||
bld.vop2_sdwa(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), op)
|
||||
->sdwa()
|
||||
.sel[1] = SubdwordSel::ubyte;
|
||||
} else {
|
||||
assert(offset == 8);
|
||||
Definition dst_hi = Definition(dst.physReg().advance(1), v1b);
|
||||
bld.vop1_sdwa(aco_opcode::v_mov_b32, dst_hi, op)->sdwa().sel[0] =
|
||||
SubdwordSel::ubyte;
|
||||
uint32_t c = ~(BITFIELD_MASK(offset) << (dst.physReg().byte() * 8));
|
||||
bld.vop2(aco_opcode::v_and_b32, dst, Operand::c32(c),
|
||||
Operand(PhysReg(op.physReg().reg()), v1));
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -715,29 +715,52 @@ BEGIN_TEST(to_hw_instr.insert)
|
|||
|
||||
#undef INS
|
||||
|
||||
#define INS(idx, def_b) \
|
||||
bld.pseudo(aco_opcode::p_insert, Definition(v0_lo.advance(def_b), v2b), Operand(v1_lo, v2b), \
|
||||
Operand::c32(idx), Operand::c32(8u));
|
||||
#define INS(idx, def_b, op_b) \
|
||||
bld.pseudo(aco_opcode::p_insert, Definition(v0_lo.advance(def_b), v2b), \
|
||||
Operand(v1_lo.advance(op_b), v2b), Operand::c32(idx), Operand::c32(8u));
|
||||
|
||||
//>> p_unit_test 2
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
|
||||
//~gfx7! v2b: %_:v[0][0:16] = v_bfe_u32 %_:v[1][0:16], 0, 8
|
||||
//~gfx(8|9)! v2b: %0:v[0][0:16] = v_lshlrev_b32 0, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0
|
||||
//~gfx(8|9)! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:ubyte0
|
||||
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060c00
|
||||
INS(0, 0)
|
||||
//~gfx(8|9)! v2b: %0:v[0][16:32] = v_lshlrev_b32 0, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0
|
||||
INS(0, 0, 0)
|
||||
//~gfx(8|9)! v2b: %0:v[0][16:32] = v_mov_b32 %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:ubyte0
|
||||
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc000504
|
||||
if (lvl != GFX7)
|
||||
INS(0, 2)
|
||||
INS(0, 2, 0)
|
||||
//~gfx(8|9)! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:ubyte2
|
||||
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060c02
|
||||
if (lvl != GFX7)
|
||||
INS(0, 0, 2)
|
||||
//~gfx(8|9)! v2b: %0:v[0][16:32] = v_mov_b32 %0:v[1][16:32] dst_sel:uword1 dst_preserve src0_sel:ubyte2
|
||||
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc020504
|
||||
if (lvl != GFX7)
|
||||
INS(0, 2, 2)
|
||||
//~gfx7! v2b: %_:v[0][0:16] = v_lshlrev_b32 8, %_:v[1][0:16]
|
||||
//~gfx(8|9)! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0
|
||||
//~gfx8! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][0:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte0
|
||||
//~gfx8! v2b: %0:v[0][0:16] = v_and_b32 0xffffff00, %0:v[1]
|
||||
//~gfx9! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0
|
||||
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x706000c
|
||||
INS(1, 0)
|
||||
//~gfx(8|9)! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0
|
||||
INS(1, 0, 0)
|
||||
//~gfx8! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][0:16] dst_sel:ubyte3 dst_preserve src0_sel:ubyte0
|
||||
//~gfx8! v2b: %0:v[0][16:32] = v_and_b32 0xff00ffff, %0:v[1]
|
||||
//~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0
|
||||
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc0504
|
||||
if (lvl != GFX7)
|
||||
INS(1, 2)
|
||||
|
||||
INS(1, 2, 0)
|
||||
//~gfx8! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][16:32] dst_sel:ubyte1 dst_preserve src0_sel:ubyte2
|
||||
//~gfx8! v2b: %0:v[0][0:16] = v_and_b32 0xffffff00, %0:v[1]
|
||||
//~gfx9! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte2
|
||||
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x706020c
|
||||
if (lvl != GFX7)
|
||||
INS(1, 0, 2)
|
||||
//~gfx8! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][16:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte2
|
||||
//~gfx8! v2b: %0:v[0][16:32] = v_and_b32 0xff00ffff, %0:v[1]
|
||||
//~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1][16:32] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte2
|
||||
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x20c0504
|
||||
if (lvl != GFX7)
|
||||
INS(1, 2, 2)
|
||||
#undef INS
|
||||
|
||||
finish_to_hw_instr_test();
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue