aco: don't combine linear and normal VGPR copies

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27697>
This commit is contained in:
Rhys Perry 2024-02-16 11:09:22 +00:00 committed by Marge Bot
parent 599de4b47c
commit f99443a68b
2 changed files with 53 additions and 0 deletions

View file

@ -1767,6 +1767,9 @@ try_coalesce_copies(lower_context* ctx, std::map<PhysReg, copy_operation>& copy_
copy.op.isConstant() != other->second.op.isConstant())
return;
if (other->second.def.regClass().is_linear_vgpr() != copy.def.regClass().is_linear_vgpr())
return;
/* don't create 64-bit copies before GFX10 */
if (copy.bytes >= 4 && copy.def.regClass().type() == RegType::vgpr &&
ctx->program->gfx_level < GFX10)

View file

@ -839,6 +839,56 @@ BEGIN_TEST(to_hw_instr.copy_linear_vgpr_v3)
finish_to_hw_instr_test();
END_TEST
BEGIN_TEST(to_hw_instr.copy_linear_vgpr_coalesce)
if (!setup_cs(NULL, GFX10))
return;
PhysReg reg_v0{256};
PhysReg reg_v1{256 + 1};
PhysReg reg_v4{256 + 4};
PhysReg reg_v5{256 + 5};
RegClass v1_linear = v1.as_linear();
//>> p_unit_test 0
//! lv2: %0:v[0-1] = v_lshrrev_b64 0, %0:v[4-5]
//! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec
//! lv2: %0:v[0-1] = v_lshrrev_b64 0, %0:v[4-5]
//! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v1_linear),
Definition(reg_v1, v1_linear), Operand(reg_v4, v1_linear),
Operand(reg_v5, v1_linear));
instr->pseudo().scratch_sgpr = m0;
//! p_unit_test 1
//! lv1: %0:v[0] = v_mov_b32 %0:v[4]
//! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec
//! lv1: %0:v[0] = v_mov_b32 %0:v[4]
//! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec
//! v1: %0:v[1] = v_mov_b32 %0:v[5]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1));
instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v1_linear),
Definition(reg_v1, v1), Operand(reg_v4, v1_linear), Operand(reg_v5, v1));
instr->pseudo().scratch_sgpr = m0;
//! p_unit_test 2
//! v1: %0:v[0] = v_mov_b32 %0:v[4]
//! lv1: %0:v[1] = v_mov_b32 %0:v[5]
//! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec
//! lv1: %0:v[1] = v_mov_b32 %0:v[5]
//! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
instr =
bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v1), Definition(reg_v1, v1_linear),
Operand(reg_v4, v1), Operand(reg_v5, v1_linear));
instr->pseudo().scratch_sgpr = m0;
finish_to_hw_instr_test();
END_TEST
BEGIN_TEST(to_hw_instr.pack2x16_constant)
PhysReg v0_lo{256};
PhysReg v0_hi{256};