From 1c1bd9d090eee4d4fe838eb2a65aecf5ff108a71 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sun, 25 Jan 2026 18:43:20 +0100 Subject: [PATCH] aco: only apply DPP with 3 or less uses Creating many new DPP instructions increases code size and decreases throughput. Foz-DB Navi48: Totals from 2196 (2.67% of 82179) affected shaders: MaxWaves: 59930 -> 59960 (+0.05%); split: +0.08%, -0.03% Instrs: 3718514 -> 3718298 (-0.01%); split: -0.08%, +0.07% CodeSize: 20593544 -> 20507660 (-0.42%); split: -0.43%, +0.02% VGPRs: 135924 -> 135744 (-0.13%); split: -0.17%, +0.04% Latency: 33174704 -> 33163001 (-0.04%); split: -0.07%, +0.04% InvThroughput: 6500723 -> 6491382 (-0.14%); split: -0.15%, +0.01% VClause: 72348 -> 72343 (-0.01%); split: -0.06%, +0.05% SClause: 83160 -> 83165 (+0.01%); split: -0.03%, +0.04% Copies: 286592 -> 285575 (-0.35%); split: -0.45%, +0.09% Branches: 99970 -> 99971 (+0.00%); split: -0.00%, +0.00% PreSGPRs: 103280 -> 103279 (-0.00%) PreVGPRs: 95590 -> 95440 (-0.16%); split: -0.30%, +0.14% VALU: 1931369 -> 1931725 (+0.02%); split: -0.08%, +0.09% SALU: 637663 -> 636780 (-0.14%); split: -0.15%, +0.01% VOPD: 65236 -> 65589 (+0.54%); split: +0.91%, -0.37% Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_optimizer.cpp | 3 +++ src/amd/compiler/aco_optimizer_postRA.cpp | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 2f78631cde8..46746aea4e0 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -4979,6 +4979,9 @@ select_instruction(opt_ctx& ctx, aco_ptr& instr) for (unsigned i = 0; i < input_info.operands.size(); i++) { if (!input_info.operands[i].op.isTemp()) continue; + /* Applying DPP with many uses is unlikely to be profitable. */ + if (ctx.uses[input_info.operands[i].op.tempId()] > 3) + continue; Instruction* parent = ctx.info[input_info.operands[i].op.tempId()].parent_instr; if (!parent->isDPP() || parent->opcode != aco_opcode::v_mov_b32 || diff --git a/src/amd/compiler/aco_optimizer_postRA.cpp b/src/amd/compiler/aco_optimizer_postRA.cpp index c5e45e0ea0e..77d04c6ccd4 100644 --- a/src/amd/compiler/aco_optimizer_postRA.cpp +++ b/src/amd/compiler/aco_optimizer_postRA.cpp @@ -612,6 +612,10 @@ try_combine_dpp(pr_opt_ctx& ctx, aco_ptr& instr) if (mov->opcode != aco_opcode::v_mov_b32 || !mov->isDPP()) continue; + /* Applying DPP with many uses is unlikely to be profitable. */ + if (ctx.uses[mov->definitions[0].tempId()] > 3) + continue; + /* If we aren't going to remove the v_mov_b32, we have to ensure that it doesn't overwrite * it's own operand before we use it. */