From ea92aea9f2dde439cb2a11135b993989a3148e0e Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 7 Feb 2024 11:16:18 +0000 Subject: [PATCH] aco: turn v_mov_b32 into addition to create VOPD instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fossil-db (navi31, wave32): Totals from 15655 (19.76% of 79242) affected shaders: Instrs: 10699119 -> 10688239 (-0.10%); split: -0.11%, +0.00% CodeSize: 61290308 -> 61288596 (-0.00%); split: -0.01%, +0.00% Latency: 89159743 -> 89150355 (-0.01%); split: -0.01%, +0.00% InvThroughput: 16966295 -> 16955427 (-0.06%); split: -0.07%, +0.00% VALU: 5484626 -> 5473993 (-0.19%); split: -0.20%, +0.00% VOPD: 1446725 -> 1457358 (+0.73%); split: +0.74%, -0.01% Signed-off-by: Rhys Perry Reviewed-by: Georg Lehmann Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_scheduler_ilp.cpp | 30 ++++++++++++++++++++------ 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/src/amd/compiler/aco_scheduler_ilp.cpp b/src/amd/compiler/aco_scheduler_ilp.cpp index 097fff0df91..eb513b5c3dd 100644 --- a/src/amd/compiler/aco_scheduler_ilp.cpp +++ b/src/amd/compiler/aco_scheduler_ilp.cpp @@ -138,10 +138,7 @@ get_vopd_info(const Instruction* instr) case aco_opcode::v_sub_f32: info.op = aco_opcode::v_dual_sub_f32; break; case aco_opcode::v_subrev_f32: info.op = aco_opcode::v_dual_subrev_f32; break; case aco_opcode::v_mul_legacy_f32: info.op = aco_opcode::v_dual_mul_dx9_zero_f32; break; - case aco_opcode::v_mov_b32: - info.op = aco_opcode::v_dual_mov_b32; - info.is_commutative = false; - break; + case aco_opcode::v_mov_b32: info.op = aco_opcode::v_dual_mov_b32; break; case aco_opcode::v_cndmask_b32: info.op = aco_opcode::v_dual_cndmask_b32; info.is_commutative = false; @@ -222,6 +219,14 @@ is_vopd_compatible(const VOPDInfo& a, const VOPDInfo& b) if ((a_src_banks & b.src_banks) != 0) return false; + /* If we have to turn v_mov_b32 into v_add_u32 but there is already an OPY-only instruction, + * we can't do it. + */ + if (a.op == aco_opcode::v_dual_mov_b32 && !b.is_commutative && b.is_opy_only) + return false; + if (b.op == aco_opcode::v_dual_mov_b32 && !a.is_commutative && a.is_opy_only) + return false; + return true; } @@ -634,7 +639,12 @@ get_vopd_opcode_operands(Instruction* instr, const VOPDInfo& info, bool swap, ac *num_operands += instr->operands.size(); std::copy(instr->operands.begin(), instr->operands.end(), operands); - if (swap) { + if (swap && info.op == aco_opcode::v_dual_mov_b32) { + *op = aco_opcode::v_dual_add_nc_u32; + (*num_operands)++; + operands[0] = Operand::zero(); + operands[1] = instr->operands[0]; + } else if (swap) { if (info.op == aco_opcode::v_dual_sub_f32) *op = aco_opcode::v_dual_subrev_f32; else if (info.op == aco_opcode::v_dual_subrev_f32) @@ -654,8 +664,14 @@ create_vopd_instruction(const SchedILPContext& ctx, unsigned idx) bool swap_x = false, swap_y = false; if (x_info.src_banks & y_info.src_banks) { assert(x_info.is_commutative || y_info.is_commutative); - swap_x = x_info.is_commutative; - swap_y = y_info.is_commutative && !swap_x; + /* Avoid swapping v_mov_b32 because it will become an OPY-only opcode. */ + if (x_info.op == aco_opcode::v_dual_mov_b32 && !y_info.is_commutative) { + swap_x = true; + x_info.is_opy_only = true; + } else { + swap_x = x_info.is_commutative && x_info.op != aco_opcode::v_dual_mov_b32; + swap_y = y_info.is_commutative && !swap_x; + } } if (x_info.is_opy_only) {