diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 1eadb916ccf..530d7d2e177 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2580,10 +2580,17 @@ register_allocation(Program* program, std::vector& live_out_per_block, ra (instr->opcode == aco_opcode::v_pk_fma_f16 && program->chip_class >= GFX10) || (instr->opcode == aco_opcode::v_dot4_i32_i8 && program->family != CHIP_VEGA20)) && instr->operands[2].isTemp() && instr->operands[2].isKillBeforeDef() && - instr->operands[2].getTemp().type() == RegType::vgpr && instr->operands[1].isTemp() && - instr->operands[1].getTemp().type() == RegType::vgpr && !instr->usesModifiers() && - instr->operands[0].physReg().byte() == 0 && instr->operands[1].physReg().byte() == 0 && - instr->operands[2].physReg().byte() == 0) { + instr->operands[2].getTemp().type() == RegType::vgpr && + ((instr->operands[0].isTemp() && + instr->operands[0].getTemp().type() == RegType::vgpr) || + (instr->operands[1].isTemp() && + instr->operands[1].getTemp().type() == RegType::vgpr)) && + !instr->usesModifiers() && instr->operands[0].physReg().byte() == 0 && + instr->operands[1].physReg().byte() == 0 && instr->operands[2].physReg().byte() == 0) { + if (!instr->operands[1].isTemp() || + instr->operands[1].getTemp().type() != RegType::vgpr) + std::swap(instr->operands[0], instr->operands[1]); + unsigned def_id = instr->definitions[0].tempId(); bool use_vop2 = true; if (ctx.assignments[def_id].affinity) {