aco: preserve subdword RC when lowering p_insert/p_extract

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12640>
This commit is contained in:
Daniel Schürmann 2021-09-01 15:54:35 +02:00
parent 73481338fe
commit 8bd7e2392b
2 changed files with 14 additions and 29 deletions

View file

@ -2089,15 +2089,8 @@ lower_to_hw_instr(Program* program)
Operand::c32(offset), Operand::c32(bits));
}
} else if (dst.regClass() == v2b) {
aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(
aco_opcode::v_mov_b32,
(Format)((uint16_t)Format::VOP1 | (uint16_t)Format::SDWA), 1, 1)};
sdwa->operands[0] = Operand(op.physReg().advance(-op.physReg().byte()),
RegClass::get(op.regClass().type(), 4));
sdwa->definitions[0] = dst;
sdwa->sel[0] = SubdwordSel(1, op.physReg().byte() + offset / 8, signext);
sdwa->dst_sel = SubdwordSel::uword;
bld.insert(std::move(sdwa));
bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op).instr->sdwa().sel[0] =
SubdwordSel(1, offset / 8, signext);
}
break;
}
@ -2132,14 +2125,8 @@ lower_to_hw_instr(Program* program)
bld.vop3(aco_opcode::v_bfe_u32, dst, op, Operand::zero(), Operand::c32(bits));
} else if (program->chip_class >= GFX9 ||
(op.regClass() != s1 && program->chip_class >= GFX8)) {
aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(
aco_opcode::v_mov_b32,
(Format)((uint16_t)Format::VOP1 | (uint16_t)Format::SDWA), 1, 1)};
sdwa->operands[0] = op;
sdwa->definitions[0] = dst;
sdwa->sel[0] = SubdwordSel::dword;
sdwa->dst_sel = SubdwordSel(bits / 8, offset / 8, false);
bld.insert(std::move(sdwa));
bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op).instr->sdwa().dst_sel =
SubdwordSel(bits / 8, offset / 8, false);
} else {
bld.vop3(aco_opcode::v_bfe_u32, dst, op, Operand::zero(), Operand::c32(bits));
bld.vop2(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset),
@ -2147,11 +2134,9 @@ lower_to_hw_instr(Program* program)
}
} else {
assert(dst.regClass() == v2b);
Operand sdwa_op = Operand(op.physReg().advance(-op.physReg().byte()),
RegClass::get(op.regClass().type(), 4));
bld.vop2_sdwa(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), sdwa_op)
bld.vop2_sdwa(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), op)
.instr->sdwa()
.sel[1] = SubdwordSel(1, op.physReg().byte(), false);
.sel[1] = SubdwordSel::ubyte;
}
break;
}

View file

@ -554,15 +554,15 @@ BEGIN_TEST(to_hw_instr.extract)
//>> p_unit_test 4
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 0, 8
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1] dst_sel:uword0 dst_preserve src0_sel:@byte(0)
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(0)
EXT(0, 0)
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1] dst_sel:uword0 dst_preserve src0_sel:@byte(2)
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(2)
if (i != GFX7)
EXT(0, 2)
//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 8, 8
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1] dst_sel:uword0 dst_preserve src0_sel:@byte(1)
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(1)
EXT(1, 0)
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1] dst_sel:uword0 dst_preserve src0_sel:@byte(3)
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(3)
if (i != GFX7)
EXT(1, 2)
@ -640,15 +640,15 @@ BEGIN_TEST(to_hw_instr.insert)
//>> p_unit_test 2
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
//~gfx7! v2b: %_:v[0][0:16] = v_bfe_u32 %_:v[1][0:16], 0, 8
//~gfx[^7]! v2b: %0:v[0][0:16] = v_lshlrev_b32 0, %0:v[1] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0
//~gfx[^7]! v2b: %0:v[0][0:16] = v_lshlrev_b32 0, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0
INS(0, 0)
//~gfx[^7]! v2b: %0:v[0][16:32] = v_lshlrev_b32 0, %0:v[1] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0
//~gfx[^7]! v2b: %0:v[0][16:32] = v_lshlrev_b32 0, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0
if (i != GFX7)
INS(0, 2)
//~gfx7! v2b: %_:v[0][0:16] = v_lshlrev_b32 8, %_:v[1][0:16]
//~gfx[^7]! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0
//~gfx[^7]! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0
INS(1, 0)
//~gfx[^7]! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0
//~gfx[^7]! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0
if (i != GFX7)
INS(1, 2)