aco/ra: prefer phi operands which don't create waitcnt

fossil-db (navi31):
Totals from 88 (0.11% of 79825) affected shaders:
Instrs: 338969 -> 338866 (-0.03%); split: -0.11%, +0.08%
CodeSize: 1769436 -> 1769032 (-0.02%); split: -0.10%, +0.08%
Latency: 2616059 -> 2616218 (+0.01%); split: -0.02%, +0.02%
InvThroughput: 513410 -> 513400 (-0.00%); split: -0.01%, +0.01%
SClause: 9081 -> 9083 (+0.02%); split: -0.03%, +0.06%
Copies: 38796 -> 38716 (-0.21%); split: -0.86%, +0.66%
Branches: 10726 -> 10734 (+0.07%); split: -0.05%, +0.12%
SALU: 48527 -> 48447 (-0.16%); split: -0.69%, +0.53%

fossil-db (navi21):
Totals from 34492 (43.21% of 79825) affected shaders:
Instrs: 22913622 -> 22864147 (-0.22%); split: -0.25%, +0.04%
CodeSize: 120028404 -> 119838516 (-0.16%); split: -0.19%, +0.03%
VGPRs: 1392656 -> 1393248 (+0.04%); split: -0.07%, +0.11%
Latency: 100282167 -> 100021714 (-0.26%); split: -0.57%, +0.31%
InvThroughput: 22752202 -> 22818933 (+0.29%); split: -0.12%, +0.41%
VClause: 420148 -> 420069 (-0.02%); split: -0.29%, +0.27%
SClause: 447545 -> 447557 (+0.00%)
Copies: 2243057 -> 2243914 (+0.04%); split: -0.43%, +0.46%
Branches: 724887 -> 725208 (+0.04%); split: -0.02%, +0.07%
VALU: 13235472 -> 13236299 (+0.01%); split: -0.07%, +0.08%
SALU: 4072375 -> 4073031 (+0.02%); split: -0.02%, +0.03%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
This commit is contained in:
Rhys Perry 2025-10-28 16:43:53 +00:00
parent db653957ba
commit 52a8ecffa8

View file

@ -2693,25 +2693,48 @@ get_regs_for_phis(ra_ctx& ctx, Block& block, RegisterFile& register_file,
if (definition.isFixed())
continue;
/* use affinity if available */
/* Preferring the more expensive to copy operands doesn't do much for logical phis on GFX11+
* because it creates a waitcnt anyway. */
bool avoid_heavy_copies =
ctx.program->gfx_level < GFX11 || phi->opcode == aco_opcode::p_linear_phi;
std::optional<assignment> affinity;
if (ctx.assignments[definition.tempId()].affinity &&
ctx.assignments[ctx.assignments[definition.tempId()].affinity].assigned) {
assignment& affinity = ctx.assignments[ctx.assignments[definition.tempId()].affinity];
assert(affinity.rc == definition.regClass());
if (get_reg_specified(ctx, register_file, definition.regClass(), phi, affinity.reg, -1)) {
definition.setFixed(affinity.reg);
affinity.emplace(ctx.assignments[ctx.assignments[definition.tempId()].affinity]);
}
small_vec<std::pair<unsigned, unsigned>, 4> operands;
/* by going backwards, we aim to avoid copies in else-blocks */
for (int i = phi->operands.size() - 1; i >= 0; i--) {
const Operand& op = phi->operands[i];
if (!op.isTemp() || !op.isFixed())
continue;
operands.emplace_back(ctx.assignments[op.tempId()].weight, i);
/* Don't use the affinity if it might end up creating a waitcnt. */
if (avoid_heavy_copies && affinity && op.physReg() != affinity->reg &&
ctx.assignments[op.tempId()].weight > 0)
affinity.reset();
}
/* use affinity if available */
if (affinity) {
assert(affinity->rc == definition.regClass());
if (get_reg_specified(ctx, register_file, definition.regClass(), phi, affinity->reg, -1)) {
definition.setFixed(affinity->reg);
register_file.fill(definition);
ctx.assignments[definition.tempId()].set(definition);
continue;
}
}
/* by going backwards, we aim to avoid copies in else-blocks */
for (int i = phi->operands.size() - 1; i >= 0; i--) {
const Operand& op = phi->operands[i];
if (!op.isTemp() || !op.isFixed())
continue;
/* If avoid_heavy_copies=false, then this is already sorted how we want it to be. */
if (avoid_heavy_copies)
std::sort(operands.begin(), operands.end(), std::greater());
for (auto pair : operands) {
const Operand& op = phi->operands[pair.second];
PhysReg reg = op.physReg();
if (get_reg_specified(ctx, register_file, definition.regClass(), phi, reg, -1)) {
definition.setFixed(reg);