From c5f02a1cd3b110bafff0fc55064938604bf539ee Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 13 Jan 2021 16:35:01 +0000 Subject: [PATCH] aco: swap multiplication operands if needed to create v_fmac_f32/etc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For v_pk_fma_f32 and v_fma_f32 from nir_op_ffma, we don't try to put scalars in the first operand. No fossil-db changes. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_register_allocation.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 1eadb916ccf..530d7d2e177 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2580,10 +2580,17 @@ register_allocation(Program* program, std::vector& live_out_per_block, ra (instr->opcode == aco_opcode::v_pk_fma_f16 && program->chip_class >= GFX10) || (instr->opcode == aco_opcode::v_dot4_i32_i8 && program->family != CHIP_VEGA20)) && instr->operands[2].isTemp() && instr->operands[2].isKillBeforeDef() && - instr->operands[2].getTemp().type() == RegType::vgpr && instr->operands[1].isTemp() && - instr->operands[1].getTemp().type() == RegType::vgpr && !instr->usesModifiers() && - instr->operands[0].physReg().byte() == 0 && instr->operands[1].physReg().byte() == 0 && - instr->operands[2].physReg().byte() == 0) { + instr->operands[2].getTemp().type() == RegType::vgpr && + ((instr->operands[0].isTemp() && + instr->operands[0].getTemp().type() == RegType::vgpr) || + (instr->operands[1].isTemp() && + instr->operands[1].getTemp().type() == RegType::vgpr)) && + !instr->usesModifiers() && instr->operands[0].physReg().byte() == 0 && + instr->operands[1].physReg().byte() == 0 && instr->operands[2].physReg().byte() == 0) { + if (!instr->operands[1].isTemp() || + instr->operands[1].getTemp().type() != RegType::vgpr) + std::swap(instr->operands[0], instr->operands[1]); + unsigned def_id = instr->definitions[0].tempId(); bool use_vop2 = true; if (ctx.assignments[def_id].affinity) {