aco: fix assembly of vopc_sdwa writing exec

We would assemble an instruction writing vcc instead.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Fixes: 5ffc73896f ("aco/assembler: Fix v_cmpx with SDWA.")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18077>
This commit is contained in:
Rhys Perry 2022-08-15 17:01:52 +01:00 committed by Marge Bot
parent f60cb8d0af
commit dd105f7c1e
4 changed files with 42 additions and 1 deletions

View file

@ -725,7 +725,8 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
uint32_t encoding = 0;
if (instr->isVOPC()) {
if (instr->definitions[0].physReg() != vcc && instr->definitions[0].physReg() != exec) {
if (instr->definitions[0].physReg() !=
(ctx.gfx_level >= GFX10 && is_cmpx(instr->opcode) ? exec : vcc)) {
encoding |= instr->definitions[0].physReg() << 8;
encoding |= 1 << 15;
}

View file

@ -752,6 +752,13 @@ is_fp_cmp(aco_opcode op)
return get_cmp_info(op, &info) && info.ordered != aco_opcode::num_opcodes;
}
bool
is_cmpx(aco_opcode op)
{
CmpInfo info;
return !get_cmp_info(op, &info);
}
bool
can_swap_operands(aco_ptr<Instruction>& instr, aco_opcode* new_op)
{

View file

@ -1812,6 +1812,7 @@ aco_opcode get_f32_cmp(aco_opcode op);
aco_opcode get_vcmpx(aco_opcode op);
unsigned get_cmp_bitsize(aco_opcode op);
bool is_fp_cmp(aco_opcode op);
bool is_cmpx(aco_opcode op);
bool can_swap_operands(aco_ptr<Instruction>& instr, aco_opcode* new_op);

View file

@ -308,3 +308,35 @@ BEGIN_TEST(assembler.p_constaddr)
aco::lower_to_hw_instr(program.get());
finish_assembler_test();
END_TEST
BEGIN_TEST(assembler.vopc_sdwa)
for (unsigned i = GFX9; i <= GFX10; i++) {
if (!setup_cs(NULL, (amd_gfx_level)i))
continue;
//~gfx9>> v_cmp_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 86860080
//~gfx10>> v_cmp_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d8300f9 86860080
bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(vcc, s2), Operand::zero(), Operand::zero());
//~gfx9! v_cmp_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 8686ac80
//~gfx10! v_cmp_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d8300f9 8686ac80
bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(PhysReg(0x2c), s2), Operand::zero(), Operand::zero());
//~gfx9! v_cmp_lt_u32_sdwa exec, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 8686fe80
//~gfx10! v_cmp_lt_u32_sdwa exec, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d8300f9 8686fe80
bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(exec, s2), Operand::zero(), Operand::zero());
if (i == GFX10) {
//~gfx10! v_cmpx_lt_u32_sdwa 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7da300f9 86860080
bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(exec, s2), Operand::zero(), Operand::zero());
} else {
//~gfx9! v_cmpx_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7db300f9 86860080
bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(vcc, s2), Definition(exec, s2), Operand::zero(), Operand::zero());
//~gfx9! v_cmpx_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7db300f9 8686ac80
bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(PhysReg(0x2c), s2), Definition(exec, s2), Operand::zero(), Operand::zero());
}
finish_assembler_test();
}
END_TEST