From f99443a68b743d866cd6fc79d29d2f549f200a23 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 16 Feb 2024 11:09:22 +0000 Subject: [PATCH] aco: don't combine linear and normal VGPR copies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Cc: mesa-stable Part-of: --- src/amd/compiler/aco_lower_to_hw_instr.cpp | 3 ++ src/amd/compiler/tests/test_to_hw_instr.cpp | 50 +++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 27787c7322f..03282ea2857 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -1767,6 +1767,9 @@ try_coalesce_copies(lower_context* ctx, std::map& copy_ copy.op.isConstant() != other->second.op.isConstant()) return; + if (other->second.def.regClass().is_linear_vgpr() != copy.def.regClass().is_linear_vgpr()) + return; + /* don't create 64-bit copies before GFX10 */ if (copy.bytes >= 4 && copy.def.regClass().type() == RegType::vgpr && ctx->program->gfx_level < GFX10) diff --git a/src/amd/compiler/tests/test_to_hw_instr.cpp b/src/amd/compiler/tests/test_to_hw_instr.cpp index e14ff7e56f4..4feb610b3e4 100644 --- a/src/amd/compiler/tests/test_to_hw_instr.cpp +++ b/src/amd/compiler/tests/test_to_hw_instr.cpp @@ -839,6 +839,56 @@ BEGIN_TEST(to_hw_instr.copy_linear_vgpr_v3) finish_to_hw_instr_test(); END_TEST +BEGIN_TEST(to_hw_instr.copy_linear_vgpr_coalesce) + if (!setup_cs(NULL, GFX10)) + return; + + PhysReg reg_v0{256}; + PhysReg reg_v1{256 + 1}; + PhysReg reg_v4{256 + 4}; + PhysReg reg_v5{256 + 5}; + RegClass v1_linear = v1.as_linear(); + + //>> p_unit_test 0 + //! lv2: %0:v[0-1] = v_lshrrev_b64 0, %0:v[4-5] + //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec + //! lv2: %0:v[0-1] = v_lshrrev_b64 0, %0:v[4-5] + //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec + bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); + + Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v1_linear), + Definition(reg_v1, v1_linear), Operand(reg_v4, v1_linear), + Operand(reg_v5, v1_linear)); + instr->pseudo().scratch_sgpr = m0; + + //! p_unit_test 1 + //! lv1: %0:v[0] = v_mov_b32 %0:v[4] + //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec + //! lv1: %0:v[0] = v_mov_b32 %0:v[4] + //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec + //! v1: %0:v[1] = v_mov_b32 %0:v[5] + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); + + instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v1_linear), + Definition(reg_v1, v1), Operand(reg_v4, v1_linear), Operand(reg_v5, v1)); + instr->pseudo().scratch_sgpr = m0; + + //! p_unit_test 2 + //! v1: %0:v[0] = v_mov_b32 %0:v[4] + //! lv1: %0:v[1] = v_mov_b32 %0:v[5] + //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec + //! lv1: %0:v[1] = v_mov_b32 %0:v[5] + //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); + + instr = + bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v1), Definition(reg_v1, v1_linear), + Operand(reg_v4, v1), Operand(reg_v5, v1_linear)); + instr->pseudo().scratch_sgpr = m0; + + finish_to_hw_instr_test(); +END_TEST + BEGIN_TEST(to_hw_instr.pack2x16_constant) PhysReg v0_lo{256}; PhysReg v0_hi{256};