From d8458c05591fae42e2b3d2b1c62ac26ef7918a23 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Sat, 15 Jul 2023 19:43:08 +0200 Subject: [PATCH] aco: Make RA understand WMMA instructions. Part-of: --- src/amd/compiler/aco_register_allocation.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 1acd899e6c5..445d723feb7 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2959,6 +2959,14 @@ register_allocation(Program* program, std::vector& live_out_per_block, ra parallelcopy, instr); update_renames(ctx, register_file, parallelcopy, instr, (UpdateRenames)0); definition->setFixed(reg); + } else if (instr_info.classes[(int)instr->opcode] == instr_class::wmma && + instr->operands[2].isTemp() && instr->operands[2].isKill() && + instr->operands[2].regClass() == definition->regClass()) { + /* For WMMA, the dest needs to either be equal to operands[2], or not overlap it. + * Here we set a policy of forcing them the same if operands[2] gets killed (and + * otherwise they don't overlap). This may not be optimal if RA would select a + * different location due to affinity, but that gets complicated very quickly. */ + definition->setFixed(instr->operands[2].physReg()); } if (!definition->isFixed()) {