From dd105f7c1e3600f1532279e61f6ed38e5c826728 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Mon, 15 Aug 2022 17:01:52 +0100 Subject: [PATCH] aco: fix assembly of vopc_sdwa writing exec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We would assemble an instruction writing vcc instead. Signed-off-by: Rhys Perry Reviewed-by: Georg Lehmann Reviewed-by: Timur Kristóf Fixes: 5ffc73896f3 ("aco/assembler: Fix v_cmpx with SDWA.") Part-of: --- src/amd/compiler/aco_assembler.cpp | 3 ++- src/amd/compiler/aco_ir.cpp | 7 +++++ src/amd/compiler/aco_ir.h | 1 + src/amd/compiler/tests/test_assembler.cpp | 32 +++++++++++++++++++++++ 4 files changed, 42 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index f15ab451fd0..4c9c69627ae 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -725,7 +725,8 @@ emit_instruction(asm_context& ctx, std::vector& out, Instruction* inst uint32_t encoding = 0; if (instr->isVOPC()) { - if (instr->definitions[0].physReg() != vcc && instr->definitions[0].physReg() != exec) { + if (instr->definitions[0].physReg() != + (ctx.gfx_level >= GFX10 && is_cmpx(instr->opcode) ? exec : vcc)) { encoding |= instr->definitions[0].physReg() << 8; encoding |= 1 << 15; } diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 028990920cb..4b40b6dd83a 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -752,6 +752,13 @@ is_fp_cmp(aco_opcode op) return get_cmp_info(op, &info) && info.ordered != aco_opcode::num_opcodes; } +bool +is_cmpx(aco_opcode op) +{ + CmpInfo info; + return !get_cmp_info(op, &info); +} + bool can_swap_operands(aco_ptr& instr, aco_opcode* new_op) { diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index f22438bd86e..4394cee2284 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1812,6 +1812,7 @@ aco_opcode get_f32_cmp(aco_opcode op); aco_opcode get_vcmpx(aco_opcode op); unsigned get_cmp_bitsize(aco_opcode op); bool is_fp_cmp(aco_opcode op); +bool is_cmpx(aco_opcode op); bool can_swap_operands(aco_ptr& instr, aco_opcode* new_op); diff --git a/src/amd/compiler/tests/test_assembler.cpp b/src/amd/compiler/tests/test_assembler.cpp index a368e9ff934..3e1669f44e8 100644 --- a/src/amd/compiler/tests/test_assembler.cpp +++ b/src/amd/compiler/tests/test_assembler.cpp @@ -308,3 +308,35 @@ BEGIN_TEST(assembler.p_constaddr) aco::lower_to_hw_instr(program.get()); finish_assembler_test(); END_TEST + +BEGIN_TEST(assembler.vopc_sdwa) + for (unsigned i = GFX9; i <= GFX10; i++) { + if (!setup_cs(NULL, (amd_gfx_level)i)) + continue; + + //~gfx9>> v_cmp_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 86860080 + //~gfx10>> v_cmp_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d8300f9 86860080 + bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(vcc, s2), Operand::zero(), Operand::zero()); + + //~gfx9! v_cmp_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 8686ac80 + //~gfx10! v_cmp_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d8300f9 8686ac80 + bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(PhysReg(0x2c), s2), Operand::zero(), Operand::zero()); + + //~gfx9! v_cmp_lt_u32_sdwa exec, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 8686fe80 + //~gfx10! v_cmp_lt_u32_sdwa exec, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d8300f9 8686fe80 + bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(exec, s2), Operand::zero(), Operand::zero()); + + if (i == GFX10) { + //~gfx10! v_cmpx_lt_u32_sdwa 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7da300f9 86860080 + bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(exec, s2), Operand::zero(), Operand::zero()); + } else { + //~gfx9! v_cmpx_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7db300f9 86860080 + bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(vcc, s2), Definition(exec, s2), Operand::zero(), Operand::zero()); + + //~gfx9! v_cmpx_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7db300f9 8686ac80 + bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(PhysReg(0x2c), s2), Definition(exec, s2), Operand::zero(), Operand::zero()); + } + + finish_assembler_test(); + } +END_TEST