mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 13:58:04 +02:00
aco: preserve subdword RC when lowering p_insert/p_extract
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12640>
This commit is contained in:
parent
73481338fe
commit
8bd7e2392b
2 changed files with 14 additions and 29 deletions
|
|
@ -2089,15 +2089,8 @@ lower_to_hw_instr(Program* program)
|
|||
Operand::c32(offset), Operand::c32(bits));
|
||||
}
|
||||
} else if (dst.regClass() == v2b) {
|
||||
aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(
|
||||
aco_opcode::v_mov_b32,
|
||||
(Format)((uint16_t)Format::VOP1 | (uint16_t)Format::SDWA), 1, 1)};
|
||||
sdwa->operands[0] = Operand(op.physReg().advance(-op.physReg().byte()),
|
||||
RegClass::get(op.regClass().type(), 4));
|
||||
sdwa->definitions[0] = dst;
|
||||
sdwa->sel[0] = SubdwordSel(1, op.physReg().byte() + offset / 8, signext);
|
||||
sdwa->dst_sel = SubdwordSel::uword;
|
||||
bld.insert(std::move(sdwa));
|
||||
bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op).instr->sdwa().sel[0] =
|
||||
SubdwordSel(1, offset / 8, signext);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
@ -2132,14 +2125,8 @@ lower_to_hw_instr(Program* program)
|
|||
bld.vop3(aco_opcode::v_bfe_u32, dst, op, Operand::zero(), Operand::c32(bits));
|
||||
} else if (program->chip_class >= GFX9 ||
|
||||
(op.regClass() != s1 && program->chip_class >= GFX8)) {
|
||||
aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(
|
||||
aco_opcode::v_mov_b32,
|
||||
(Format)((uint16_t)Format::VOP1 | (uint16_t)Format::SDWA), 1, 1)};
|
||||
sdwa->operands[0] = op;
|
||||
sdwa->definitions[0] = dst;
|
||||
sdwa->sel[0] = SubdwordSel::dword;
|
||||
sdwa->dst_sel = SubdwordSel(bits / 8, offset / 8, false);
|
||||
bld.insert(std::move(sdwa));
|
||||
bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op).instr->sdwa().dst_sel =
|
||||
SubdwordSel(bits / 8, offset / 8, false);
|
||||
} else {
|
||||
bld.vop3(aco_opcode::v_bfe_u32, dst, op, Operand::zero(), Operand::c32(bits));
|
||||
bld.vop2(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset),
|
||||
|
|
@ -2147,11 +2134,9 @@ lower_to_hw_instr(Program* program)
|
|||
}
|
||||
} else {
|
||||
assert(dst.regClass() == v2b);
|
||||
Operand sdwa_op = Operand(op.physReg().advance(-op.physReg().byte()),
|
||||
RegClass::get(op.regClass().type(), 4));
|
||||
bld.vop2_sdwa(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), sdwa_op)
|
||||
bld.vop2_sdwa(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), op)
|
||||
.instr->sdwa()
|
||||
.sel[1] = SubdwordSel(1, op.physReg().byte(), false);
|
||||
.sel[1] = SubdwordSel::ubyte;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -554,15 +554,15 @@ BEGIN_TEST(to_hw_instr.extract)
|
|||
//>> p_unit_test 4
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
|
||||
//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 0, 8
|
||||
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1] dst_sel:uword0 dst_preserve src0_sel:@byte(0)
|
||||
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(0)
|
||||
EXT(0, 0)
|
||||
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1] dst_sel:uword0 dst_preserve src0_sel:@byte(2)
|
||||
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(2)
|
||||
if (i != GFX7)
|
||||
EXT(0, 2)
|
||||
//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 8, 8
|
||||
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1] dst_sel:uword0 dst_preserve src0_sel:@byte(1)
|
||||
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(1)
|
||||
EXT(1, 0)
|
||||
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1] dst_sel:uword0 dst_preserve src0_sel:@byte(3)
|
||||
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(3)
|
||||
if (i != GFX7)
|
||||
EXT(1, 2)
|
||||
|
||||
|
|
@ -640,15 +640,15 @@ BEGIN_TEST(to_hw_instr.insert)
|
|||
//>> p_unit_test 2
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
|
||||
//~gfx7! v2b: %_:v[0][0:16] = v_bfe_u32 %_:v[1][0:16], 0, 8
|
||||
//~gfx[^7]! v2b: %0:v[0][0:16] = v_lshlrev_b32 0, %0:v[1] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0
|
||||
//~gfx[^7]! v2b: %0:v[0][0:16] = v_lshlrev_b32 0, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0
|
||||
INS(0, 0)
|
||||
//~gfx[^7]! v2b: %0:v[0][16:32] = v_lshlrev_b32 0, %0:v[1] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0
|
||||
//~gfx[^7]! v2b: %0:v[0][16:32] = v_lshlrev_b32 0, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0
|
||||
if (i != GFX7)
|
||||
INS(0, 2)
|
||||
//~gfx7! v2b: %_:v[0][0:16] = v_lshlrev_b32 8, %_:v[1][0:16]
|
||||
//~gfx[^7]! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0
|
||||
//~gfx[^7]! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0
|
||||
INS(1, 0)
|
||||
//~gfx[^7]! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0
|
||||
//~gfx[^7]! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0
|
||||
if (i != GFX7)
|
||||
INS(1, 2)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue