aco/lower_to_hw: fix 16bit p_insert on gfx8

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28881>
This commit is contained in:
Georg Lehmann 2024-04-23 17:52:05 +02:00 committed by Marge Bot
parent bb80ac7a70
commit 47d824a644
2 changed files with 51 additions and 15 deletions

View file

@ -2683,9 +2683,22 @@ lower_to_hw_instr(Program* program)
}
} else {
assert(dst.regClass() == v2b);
bld.vop2_sdwa(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), op)
->sdwa()
.sel[1] = SubdwordSel::ubyte;
if (!offset) {
bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op)->sdwa().sel[0] =
SubdwordSel::ubyte;
} else if (program->gfx_level >= GFX9) {
bld.vop2_sdwa(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), op)
->sdwa()
.sel[1] = SubdwordSel::ubyte;
} else {
assert(offset == 8);
Definition dst_hi = Definition(dst.physReg().advance(1), v1b);
bld.vop1_sdwa(aco_opcode::v_mov_b32, dst_hi, op)->sdwa().sel[0] =
SubdwordSel::ubyte;
uint32_t c = ~(BITFIELD_MASK(offset) << (dst.physReg().byte() * 8));
bld.vop2(aco_opcode::v_and_b32, dst, Operand::c32(c),
Operand(PhysReg(op.physReg().reg()), v1));
}
}
break;
}

View file

@ -715,29 +715,52 @@ BEGIN_TEST(to_hw_instr.insert)
#undef INS
#define INS(idx, def_b) \
bld.pseudo(aco_opcode::p_insert, Definition(v0_lo.advance(def_b), v2b), Operand(v1_lo, v2b), \
Operand::c32(idx), Operand::c32(8u));
#define INS(idx, def_b, op_b) \
bld.pseudo(aco_opcode::p_insert, Definition(v0_lo.advance(def_b), v2b), \
Operand(v1_lo.advance(op_b), v2b), Operand::c32(idx), Operand::c32(8u));
//>> p_unit_test 2
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
//~gfx7! v2b: %_:v[0][0:16] = v_bfe_u32 %_:v[1][0:16], 0, 8
//~gfx(8|9)! v2b: %0:v[0][0:16] = v_lshlrev_b32 0, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0
//~gfx(8|9)! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:ubyte0
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060c00
INS(0, 0)
//~gfx(8|9)! v2b: %0:v[0][16:32] = v_lshlrev_b32 0, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0
INS(0, 0, 0)
//~gfx(8|9)! v2b: %0:v[0][16:32] = v_mov_b32 %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:ubyte0
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc000504
if (lvl != GFX7)
INS(0, 2)
INS(0, 2, 0)
//~gfx(8|9)! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:ubyte2
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060c02
if (lvl != GFX7)
INS(0, 0, 2)
//~gfx(8|9)! v2b: %0:v[0][16:32] = v_mov_b32 %0:v[1][16:32] dst_sel:uword1 dst_preserve src0_sel:ubyte2
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc020504
if (lvl != GFX7)
INS(0, 2, 2)
//~gfx7! v2b: %_:v[0][0:16] = v_lshlrev_b32 8, %_:v[1][0:16]
//~gfx(8|9)! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0
//~gfx8! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][0:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte0
//~gfx8! v2b: %0:v[0][0:16] = v_and_b32 0xffffff00, %0:v[1]
//~gfx9! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x706000c
INS(1, 0)
//~gfx(8|9)! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0
INS(1, 0, 0)
//~gfx8! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][0:16] dst_sel:ubyte3 dst_preserve src0_sel:ubyte0
//~gfx8! v2b: %0:v[0][16:32] = v_and_b32 0xff00ffff, %0:v[1]
//~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc0504
if (lvl != GFX7)
INS(1, 2)
INS(1, 2, 0)
//~gfx8! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][16:32] dst_sel:ubyte1 dst_preserve src0_sel:ubyte2
//~gfx8! v2b: %0:v[0][0:16] = v_and_b32 0xffffff00, %0:v[1]
//~gfx9! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte2
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x706020c
if (lvl != GFX7)
INS(1, 0, 2)
//~gfx8! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][16:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte2
//~gfx8! v2b: %0:v[0][16:32] = v_and_b32 0xff00ffff, %0:v[1]
//~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1][16:32] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte2
//~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x20c0504
if (lvl != GFX7)
INS(1, 2, 2)
#undef INS
finish_to_hw_instr_test();