diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 0e5579a26ff..3c4b029c058 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2693,25 +2693,48 @@ get_regs_for_phis(ra_ctx& ctx, Block& block, RegisterFile& register_file, if (definition.isFixed()) continue; - /* use affinity if available */ + /* Preferring the more expensive to copy operands doesn't do much for logical phis on GFX11+ + * because it creates a waitcnt anyway. */ + bool avoid_heavy_copies = + ctx.program->gfx_level < GFX11 || phi->opcode == aco_opcode::p_linear_phi; + + std::optional affinity; if (ctx.assignments[definition.tempId()].affinity && ctx.assignments[ctx.assignments[definition.tempId()].affinity].assigned) { - assignment& affinity = ctx.assignments[ctx.assignments[definition.tempId()].affinity]; - assert(affinity.rc == definition.regClass()); - if (get_reg_specified(ctx, register_file, definition.regClass(), phi, affinity.reg, -1)) { - definition.setFixed(affinity.reg); + affinity.emplace(ctx.assignments[ctx.assignments[definition.tempId()].affinity]); + } + + small_vec, 4> operands; + /* by going backwards, we aim to avoid copies in else-blocks */ + for (int i = phi->operands.size() - 1; i >= 0; i--) { + const Operand& op = phi->operands[i]; + if (!op.isTemp() || !op.isFixed()) + continue; + operands.emplace_back(ctx.assignments[op.tempId()].weight, i); + + /* Don't use the affinity if it might end up creating a waitcnt. */ + if (avoid_heavy_copies && affinity && op.physReg() != affinity->reg && + ctx.assignments[op.tempId()].weight > 0) + affinity.reset(); + } + + /* use affinity if available */ + if (affinity) { + assert(affinity->rc == definition.regClass()); + if (get_reg_specified(ctx, register_file, definition.regClass(), phi, affinity->reg, -1)) { + definition.setFixed(affinity->reg); register_file.fill(definition); ctx.assignments[definition.tempId()].set(definition); continue; } } - /* by going backwards, we aim to avoid copies in else-blocks */ - for (int i = phi->operands.size() - 1; i >= 0; i--) { - const Operand& op = phi->operands[i]; - if (!op.isTemp() || !op.isFixed()) - continue; + /* If avoid_heavy_copies=false, then this is already sorted how we want it to be. */ + if (avoid_heavy_copies) + std::sort(operands.begin(), operands.end(), std::greater()); + for (auto pair : operands) { + const Operand& op = phi->operands[pair.second]; PhysReg reg = op.physReg(); if (get_reg_specified(ctx, register_file, definition.regClass(), phi, reg, -1)) { definition.setFixed(reg);