From d4c0318f482bc51bb56432239b2ac4d20d66edc6 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sun, 25 Jan 2026 16:24:06 +0100 Subject: [PATCH] aco: apply DPP with scalar src1 on gfx11.5+ Foz-DB Navi48: Totals from 6261 (7.62% of 82179) affected shaders: MaxWaves: 176284 -> 176236 (-0.03%); split: +0.01%, -0.03% Instrs: 5850185 -> 5828451 (-0.37%); split: -0.41%, +0.04% CodeSize: 31363324 -> 31419904 (+0.18%); split: -0.08%, +0.26% VGPRs: 328284 -> 328200 (-0.03%); split: -0.07%, +0.05% SpillSGPRs: 2268 -> 2256 (-0.53%) Latency: 50235516 -> 50218816 (-0.03%); split: -0.06%, +0.03% InvThroughput: 8256243 -> 8242036 (-0.17%); split: -0.22%, +0.05% VClause: 81000 -> 80975 (-0.03%); split: -0.11%, +0.08% SClause: 136376 -> 136387 (+0.01%); split: -0.11%, +0.11% Copies: 414021 -> 417894 (+0.94%); split: -0.13%, +1.07% Branches: 105301 -> 105298 (-0.00%); split: -0.00%, +0.00% PreSGPRs: 291360 -> 291432 (+0.02%) PreVGPRs: 238593 -> 238729 (+0.06%); split: -0.02%, +0.08% VALU: 3425446 -> 3403463 (-0.64%); split: -0.65%, +0.01% SALU: 815505 -> 819372 (+0.47%); split: -0.02%, +0.50% Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_ir.cpp | 6 +++++- src/amd/compiler/aco_optimizer.cpp | 8 +++++++- src/amd/compiler/aco_optimizer_postRA.cpp | 8 +++++++- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index b198378b374..3885f1bb315 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -484,7 +484,8 @@ can_use_DPP(amd_gfx_level gfx_level, const aco_ptr& instr, bool dpp for (unsigned i = 0; i < instr->operands.size(); i++) { if (instr->operands[i].isLiteral()) return false; - if (!instr->operands[i].isOfType(RegType::vgpr) && i < 2) + if (!instr->operands[i].isOfType(RegType::vgpr) && + (i == 0 || (i == 1 && gfx_level < GFX11_5))) return false; } @@ -555,6 +556,9 @@ convert_to_DPP(amd_gfx_level gfx_level, aco_ptr& instr, bool dpp8) remove_vop3 &= instr->operands.size() < 3 || !instr->operands[2].isFixed() || instr->operands[2].isOfType(RegType::vgpr) || instr->operands[2].physReg() == vcc; + /* scalar src1 needs VOP3. */ + remove_vop3 &= instr->operands.size() < 2 || instr->operands[1].isOfType(RegType::vgpr); + if (remove_vop3) instr->format = withoutVOP3(instr->format); diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 388f28bf039..fb844a3db98 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -4982,9 +4982,14 @@ select_instruction(opt_ctx& ctx, aco_ptr& instr) if (((dpp8 && ctx.program->gfx_level < GFX11) || !input_mods) && mov_uses_mods) continue; + Format old_format = instr->format; if (i != 0) { - if (!can_swap_operands(instr, &instr->opcode, 0, i)) + if (!instr->operands[0].isOfType(RegType::vgpr) && !instr->isVOP3P()) + instr->format = asVOP3(instr->format); + if (!can_swap_operands(instr, &instr->opcode, 0, i)) { + instr->format = old_format; continue; + } instr->valu().swapOperands(0, i); } @@ -4993,6 +4998,7 @@ select_instruction(opt_ctx& ctx, aco_ptr& instr) ASSERTED bool success = can_swap_operands(instr, &instr->opcode, 0, i); assert(success); instr->valu().swapOperands(0, i); + instr->format = old_format; } continue; } diff --git a/src/amd/compiler/aco_optimizer_postRA.cpp b/src/amd/compiler/aco_optimizer_postRA.cpp index be6df392aeb..c5e45e0ea0e 100644 --- a/src/amd/compiler/aco_optimizer_postRA.cpp +++ b/src/amd/compiler/aco_optimizer_postRA.cpp @@ -643,9 +643,14 @@ try_combine_dpp(pr_opt_ctx& ctx, aco_ptr& instr) if (((dpp8 && ctx.program->gfx_level < GFX11) || !input_mods) && mov_uses_mods) continue; + Format old_format = instr->format; if (i != 0) { - if (!can_swap_operands(instr, &instr->opcode, 0, i)) + if (!instr->operands[0].isOfType(RegType::vgpr) && !instr->isVOP3P()) + instr->format = asVOP3(instr->format); + if (!can_swap_operands(instr, &instr->opcode, 0, i)) { + instr->format = old_format; continue; + } instr->valu().swapOperands(0, i); } @@ -654,6 +659,7 @@ try_combine_dpp(pr_opt_ctx& ctx, aco_ptr& instr) ASSERTED bool success = can_swap_operands(instr, &instr->opcode, 0, i); assert(success); instr->valu().swapOperands(0, i); + instr->format = old_format; } continue; }