From 9e87090db48de50007aeb1ebc58553696cd7a1c0 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Mon, 4 May 2026 16:16:05 +0200 Subject: [PATCH] nir/loop_analyze: do not count fmul towards the limit when only used by fadd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As always with loop unrolling, don't look too closely at stats, but they confirm more loops are now unrolled. Foz-DB Navi10: Totals from 66 (0.09% of 72319) affected shaders: MaxWaves: 1464 -> 1424 (-2.73%); split: +0.82%, -3.55% Instrs: 101778 -> 173128 (+70.10%) CodeSize: 544148 -> 905392 (+66.39%) VGPRs: 3652 -> 3788 (+3.72%); split: -0.77%, +4.49% SpillSGPRs: 105 -> 75 (-28.57%) Latency: 1197088 -> 1033471 (-13.67%); split: -17.08%, +3.41% InvThroughput: 315257 -> 293245 (-6.98%); split: -13.29%, +6.31% VClause: 1663 -> 3057 (+83.82%); split: -0.12%, +83.94% SClause: 2797 -> 4496 (+60.74%); split: -0.21%, +60.96% Copies: 6472 -> 11219 (+73.35%); split: -0.08%, +73.42% Branches: 2695 -> 4697 (+74.29%); split: -0.56%, +74.84% PreSGPRs: 3418 -> 3619 (+5.88%); split: -0.79%, +6.67% PreVGPRs: 3305 -> 3423 (+3.57%); split: -1.06%, +4.63% VALU: 73061 -> 124934 (+71.00%) SALU: 11775 -> 20803 (+76.67%); split: -0.99%, +77.66% VMEM: 2729 -> 4627 (+69.55%) SMEM: 3796 -> 5869 (+54.61%); split: -0.18%, +54.79% Reviewed-by: Daniel Schürmann Reviewed-by: Marek Olšák Part-of: --- src/compiler/nir/nir_loop_analyze.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c index 208dd9fd895..6e72b1fbafb 100644 --- a/src/compiler/nir/nir_loop_analyze.c +++ b/src/compiler/nir/nir_loop_analyze.c @@ -26,6 +26,7 @@ #include "util/ralloc.h" #include "nir.h" #include "nir_constant_expressions.h" +#include "nir_search_helpers.h" typedef struct { /* The loop we store information for */ @@ -97,6 +98,10 @@ instr_cost(loop_info_state *state, nir_instr *instr, } else if (nir_op_is_vec_or_mov(alu->op)) { /* movs and vecs are likely free. */ return 0; + } else if ((alu->op == nir_op_fmul || alu->op == nir_op_fmulz) && + is_only_used_by_fadd(alu)) { + /* If we can fuse fma/mad, do not count the mul. */ + return 0; } if (alu->op == nir_op_flrp) {