aco: don't combine linear and normal VGPR copies

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27697>
2025-12-23 19:50:11 +01:00 · 2024-02-16 11:09:22 +00:00 · 2024-02-16 11:09:22 +00:00 · f99443a68b
commit f99443a68b
parent 599de4b47c
2 changed files with 53 additions and 0 deletions
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@ -1767,6 +1767,9 @@ try_coalesce_copies(lower_context* ctx, std::map<PhysReg, copy_operation>& copy_
       copy.op.isConstant() != other->second.op.isConstant())
      return;

+   if (other->second.def.regClass().is_linear_vgpr() != copy.def.regClass().is_linear_vgpr())
+      return;
+
   /* don't create 64-bit copies before GFX10 */
   if (copy.bytes >= 4 && copy.def.regClass().type() == RegType::vgpr &&
       ctx->program->gfx_level < GFX10)
--- a/src/amd/compiler/tests/test_to_hw_instr.cpp
+++ b/src/amd/compiler/tests/test_to_hw_instr.cpp
@ -839,6 +839,56 @@ BEGIN_TEST(to_hw_instr.copy_linear_vgpr_v3)
   finish_to_hw_instr_test();
 END_TEST

+BEGIN_TEST(to_hw_instr.copy_linear_vgpr_coalesce)
+   if (!setup_cs(NULL, GFX10))
+      return;
+
+   PhysReg reg_v0{256};
+   PhysReg reg_v1{256 + 1};
+   PhysReg reg_v4{256 + 4};
+   PhysReg reg_v5{256 + 5};
+   RegClass v1_linear = v1.as_linear();
+
+   //>> p_unit_test 0
+   //! lv2: %0:v[0-1] = v_lshrrev_b64 0, %0:v[4-5]
+   //! s2: %0:exec,  s1: %0:scc = s_not_b64 %0:exec
+   //! lv2: %0:v[0-1] = v_lshrrev_b64 0, %0:v[4-5]
+   //! s2: %0:exec,  s1: %0:scc = s_not_b64 %0:exec
+   bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
+
+   Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v1_linear),
+                                   Definition(reg_v1, v1_linear), Operand(reg_v4, v1_linear),
+                                   Operand(reg_v5, v1_linear));
+   instr->pseudo().scratch_sgpr = m0;
+
+   //! p_unit_test 1
+   //! lv1: %0:v[0] = v_mov_b32 %0:v[4]
+   //! s2: %0:exec,  s1: %0:scc = s_not_b64 %0:exec
+   //! lv1: %0:v[0] = v_mov_b32 %0:v[4]
+   //! s2: %0:exec,  s1: %0:scc = s_not_b64 %0:exec
+   //! v1: %0:v[1] = v_mov_b32 %0:v[5]
+   bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1));
+
+   instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v1_linear),
+                      Definition(reg_v1, v1), Operand(reg_v4, v1_linear), Operand(reg_v5, v1));
+   instr->pseudo().scratch_sgpr = m0;
+
+   //! p_unit_test 2
+   //! v1: %0:v[0] = v_mov_b32 %0:v[4]
+   //! lv1: %0:v[1] = v_mov_b32 %0:v[5]
+   //! s2: %0:exec,  s1: %0:scc = s_not_b64 %0:exec
+   //! lv1: %0:v[1] = v_mov_b32 %0:v[5]
+   //! s2: %0:exec,  s1: %0:scc = s_not_b64 %0:exec
+   bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
+
+   instr =
+      bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v1), Definition(reg_v1, v1_linear),
+                 Operand(reg_v4, v1), Operand(reg_v5, v1_linear));
+   instr->pseudo().scratch_sgpr = m0;
+
+   finish_to_hw_instr_test();
+END_TEST
+
 BEGIN_TEST(to_hw_instr.pack2x16_constant)
   PhysReg v0_lo{256};
   PhysReg v0_hi{256};