From 77e7fd0deee21179b8da8d7d740d1dc4e92f7bf3 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Tue, 6 May 2025 16:47:44 +0100 Subject: [PATCH] aco: swap the correct v_mov_b32 if there are two of them Previously, this function tried to swap the instruction which is not v_mov_b32, so that it doesn't introduce any new OPY-only instructions. If both were v_mov_b32, it swapped Y. Since this makes Y opy-only, this can't be done if X is also opy-only. Signed-off-by: Rhys Perry Fixes: 408fa33c0928 ("aco/gfx12: don't use second VALU for VOPD's OPX if there is a WaR") Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/13101 Reviewed-by: Georg Lehmann Part-of: (cherry picked from commit 9ca71b52aa5bc2eda1f08149f7780e59858ee27b) --- .pick_status.json | 2 +- src/amd/compiler/aco_scheduler_ilp.cpp | 6 +++++- src/amd/compiler/tests/test_scheduler.cpp | 9 +++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index e8d4d71314d..3e4d39bb462 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -104,7 +104,7 @@ "description": "aco: swap the correct v_mov_b32 if there are two of them", "nominated": true, "nomination_type": 2, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "408fa33c092810155baac342de90fd712231aa89", "notes": null diff --git a/src/amd/compiler/aco_scheduler_ilp.cpp b/src/amd/compiler/aco_scheduler_ilp.cpp index 4d3ab25ef7b..886d2f7b15f 100644 --- a/src/amd/compiler/aco_scheduler_ilp.cpp +++ b/src/amd/compiler/aco_scheduler_ilp.cpp @@ -758,13 +758,17 @@ create_vopd_instruction(const SchedILPContext& ctx, unsigned idx, bool prev_can_ if (x_info.src_banks & y_info.src_banks) { assert(x_info.is_commutative || y_info.is_commutative); /* Avoid swapping v_mov_b32 because it will become an OPY-only opcode. */ - if (x_info.op == aco_opcode::v_dual_mov_b32 && !y_info.is_commutative) { + if (x_info.op == aco_opcode::v_dual_mov_b32 && y_info.op == aco_opcode::v_dual_mov_b32) { + swap_x = x_info.is_opy_only; + swap_y = !swap_x; + } else if (x_info.op == aco_opcode::v_dual_mov_b32 && !y_info.is_commutative) { swap_x = true; x_info.is_opy_only = true; } else { swap_x = x_info.is_commutative && x_info.op != aco_opcode::v_dual_mov_b32; swap_y = y_info.is_commutative && !swap_x; } + y_info.is_opy_only |= swap_y && y_info.op == aco_opcode::v_dual_mov_b32; } if (x_info.is_opy_only) { diff --git a/src/amd/compiler/tests/test_scheduler.cpp b/src/amd/compiler/tests/test_scheduler.cpp index bdb75243d1f..f48a7ec91e8 100644 --- a/src/amd/compiler/tests/test_scheduler.cpp +++ b/src/amd/compiler/tests/test_scheduler.cpp @@ -162,6 +162,7 @@ BEGIN_TEST(vopd_sched.war) PhysReg reg_v0{256}; PhysReg reg_v1{257}; PhysReg reg_v3{259}; + PhysReg reg_v5{261}; //>> p_unit_test 0 //~gfx11! v1: %0:v[1] = v_dual_add_f32 %0:v[3], %0:v[1] :: v1: %0:v[0] = v_dual_mul_f32 %0:v[1], %0:v[3] @@ -184,6 +185,14 @@ BEGIN_TEST(vopd_sched.war) bld.vop2(aco_opcode::v_mul_f32, Definition(reg_v1, v1), Operand(reg_v3, v1), Operand(reg_v1, v1)); + /* Test that we swap the right v_mov_b32. */ + //>> p_unit_test 2 + //~gfx11! v1: %0:v[1] = v_dual_mov_b32 %0:v[5] :: v1: %0:v[0] = v_dual_add_nc_u32 0, %0:v[1] + //~gfx12! v1: %0:v[0] = v_dual_mov_b32 %0:v[1] :: v1: %0:v[1] = v_dual_add_nc_u32 0, %0:v[5] + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); + bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v0, v1), Operand(reg_v1, v1)); + bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v1, v1), Operand(reg_v5, v1)); + finish_schedule_vopd_test(); } END_TEST