diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 1acd899e6c5..445d723feb7 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2959,6 +2959,14 @@ register_allocation(Program* program, std::vector& live_out_per_block, ra parallelcopy, instr); update_renames(ctx, register_file, parallelcopy, instr, (UpdateRenames)0); definition->setFixed(reg); + } else if (instr_info.classes[(int)instr->opcode] == instr_class::wmma && + instr->operands[2].isTemp() && instr->operands[2].isKill() && + instr->operands[2].regClass() == definition->regClass()) { + /* For WMMA, the dest needs to either be equal to operands[2], or not overlap it. + * Here we set a policy of forcing them the same if operands[2] gets killed (and + * otherwise they don't overlap). This may not be optimal if RA would select a + * different location due to affinity, but that gets complicated very quickly. */ + definition->setFixed(instr->operands[2].physReg()); } if (!definition->isFixed()) {