From 52a8ecffa8387f01411b045f2909368ab4945fdf Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Tue, 28 Oct 2025 16:43:53 +0000 Subject: [PATCH] aco/ra: prefer phi operands which don't create waitcnt fossil-db (navi31): Totals from 88 (0.11% of 79825) affected shaders: Instrs: 338969 -> 338866 (-0.03%); split: -0.11%, +0.08% CodeSize: 1769436 -> 1769032 (-0.02%); split: -0.10%, +0.08% Latency: 2616059 -> 2616218 (+0.01%); split: -0.02%, +0.02% InvThroughput: 513410 -> 513400 (-0.00%); split: -0.01%, +0.01% SClause: 9081 -> 9083 (+0.02%); split: -0.03%, +0.06% Copies: 38796 -> 38716 (-0.21%); split: -0.86%, +0.66% Branches: 10726 -> 10734 (+0.07%); split: -0.05%, +0.12% SALU: 48527 -> 48447 (-0.16%); split: -0.69%, +0.53% fossil-db (navi21): Totals from 34492 (43.21% of 79825) affected shaders: Instrs: 22913622 -> 22864147 (-0.22%); split: -0.25%, +0.04% CodeSize: 120028404 -> 119838516 (-0.16%); split: -0.19%, +0.03% VGPRs: 1392656 -> 1393248 (+0.04%); split: -0.07%, +0.11% Latency: 100282167 -> 100021714 (-0.26%); split: -0.57%, +0.31% InvThroughput: 22752202 -> 22818933 (+0.29%); split: -0.12%, +0.41% VClause: 420148 -> 420069 (-0.02%); split: -0.29%, +0.27% SClause: 447545 -> 447557 (+0.00%) Copies: 2243057 -> 2243914 (+0.04%); split: -0.43%, +0.46% Branches: 724887 -> 725208 (+0.04%); split: -0.02%, +0.07% VALU: 13235472 -> 13236299 (+0.01%); split: -0.07%, +0.08% SALU: 4072375 -> 4073031 (+0.02%); split: -0.02%, +0.03% Signed-off-by: Rhys Perry --- src/amd/compiler/aco_register_allocation.cpp | 43 +++++++++++++++----- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 0e5579a26ff..3c4b029c058 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2693,25 +2693,48 @@ get_regs_for_phis(ra_ctx& ctx, Block& block, RegisterFile& register_file, if (definition.isFixed()) continue; - /* use affinity if available */ + /* Preferring the more expensive to copy operands doesn't do much for logical phis on GFX11+ + * because it creates a waitcnt anyway. */ + bool avoid_heavy_copies = + ctx.program->gfx_level < GFX11 || phi->opcode == aco_opcode::p_linear_phi; + + std::optional affinity; if (ctx.assignments[definition.tempId()].affinity && ctx.assignments[ctx.assignments[definition.tempId()].affinity].assigned) { - assignment& affinity = ctx.assignments[ctx.assignments[definition.tempId()].affinity]; - assert(affinity.rc == definition.regClass()); - if (get_reg_specified(ctx, register_file, definition.regClass(), phi, affinity.reg, -1)) { - definition.setFixed(affinity.reg); + affinity.emplace(ctx.assignments[ctx.assignments[definition.tempId()].affinity]); + } + + small_vec, 4> operands; + /* by going backwards, we aim to avoid copies in else-blocks */ + for (int i = phi->operands.size() - 1; i >= 0; i--) { + const Operand& op = phi->operands[i]; + if (!op.isTemp() || !op.isFixed()) + continue; + operands.emplace_back(ctx.assignments[op.tempId()].weight, i); + + /* Don't use the affinity if it might end up creating a waitcnt. */ + if (avoid_heavy_copies && affinity && op.physReg() != affinity->reg && + ctx.assignments[op.tempId()].weight > 0) + affinity.reset(); + } + + /* use affinity if available */ + if (affinity) { + assert(affinity->rc == definition.regClass()); + if (get_reg_specified(ctx, register_file, definition.regClass(), phi, affinity->reg, -1)) { + definition.setFixed(affinity->reg); register_file.fill(definition); ctx.assignments[definition.tempId()].set(definition); continue; } } - /* by going backwards, we aim to avoid copies in else-blocks */ - for (int i = phi->operands.size() - 1; i >= 0; i--) { - const Operand& op = phi->operands[i]; - if (!op.isTemp() || !op.isFixed()) - continue; + /* If avoid_heavy_copies=false, then this is already sorted how we want it to be. */ + if (avoid_heavy_copies) + std::sort(operands.begin(), operands.end(), std::greater()); + for (auto pair : operands) { + const Operand& op = phi->operands[pair.second]; PhysReg reg = op.physReg(); if (get_reg_specified(ctx, register_file, definition.regClass(), phi, reg, -1)) { definition.setFixed(reg);