From 25add9cbd15355d39967d5f375f4325ca73353a6 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Mon, 4 May 2026 16:13:03 +0200 Subject: [PATCH] nir/opt_peephole_select: do not count fmul towards the limit when only used by fadd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foz-DB Navi10: Totals from 4077 (5.64% of 72319) affected shaders: MaxWaves: 84057 -> 83325 (-0.87%); split: +0.07%, -0.94% Instrs: 6019711 -> 6007338 (-0.21%); split: -0.27%, +0.07% CodeSize: 32373984 -> 32356152 (-0.06%); split: -0.18%, +0.13% VGPRs: 236588 -> 238172 (+0.67%); split: -0.05%, +0.72% SpillSGPRs: 7341 -> 7367 (+0.35%); split: -0.65%, +1.01% Latency: 61833147 -> 61386674 (-0.72%); split: -0.91%, +0.19% InvThroughput: 22328993 -> 22364077 (+0.16%); split: -0.16%, +0.32% VClause: 97803 -> 97832 (+0.03%); split: -0.29%, +0.32% SClause: 147544 -> 146274 (-0.86%); split: -1.19%, +0.33% Copies: 606083 -> 593887 (-2.01%); split: -2.27%, +0.26% Branches: 171344 -> 164203 (-4.17%); split: -4.17%, +0.00% PreSGPRs: 234116 -> 234922 (+0.34%); split: -0.17%, +0.52% PreVGPRs: 211250 -> 211374 (+0.06%); split: -0.00%, +0.06% VALU: 4130666 -> 4132669 (+0.05%); split: -0.11%, +0.16% SALU: 854007 -> 852585 (-0.17%); split: -0.77%, +0.61% VMEM: 162718 -> 162755 (+0.02%); split: -0.00%, +0.03% SMEM: 237856 -> 236323 (-0.64%); split: -0.65%, +0.00% Reviewed-by: Daniel Schürmann Reviewed-by: Marek Olšák Part-of: --- src/compiler/nir/nir_opt_peephole_select.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/compiler/nir/nir_opt_peephole_select.c b/src/compiler/nir/nir_opt_peephole_select.c index 0a5c386fea8..4b6825a542f 100644 --- a/src/compiler/nir/nir_opt_peephole_select.c +++ b/src/compiler/nir/nir_opt_peephole_select.c @@ -25,6 +25,7 @@ #include "nir.h" #include "nir_builder.h" #include "nir_control_flow.h" +#include "nir_search_helpers.h" /* * Implements a small peephole optimization that looks for @@ -230,10 +231,10 @@ block_check_for_allowed_instrs(nir_block *block, unsigned *count, break; case nir_instr_type_alu: { - nir_alu_instr *mov = nir_instr_as_alu(instr); + nir_alu_instr *alu = nir_instr_as_alu(instr); bool movelike = false; - switch (mov->op) { + switch (alu->op) { case nir_op_mov: case nir_op_fneg: case nir_op_ineg: @@ -280,11 +281,18 @@ block_check_for_allowed_instrs(nir_block *block, unsigned *count, * merged as a destination modifier or source modifier on some * other instruction. */ - if (mov->op != nir_op_fsat && !movelike) - (*count)++; + if (alu->op != nir_op_fsat && !movelike) { + /* If this is a fmul that is only used by fadd, don't count it. + * It will likely be fused to fma/mad. + */ + if ((alu->op != nir_op_fmul && alu->op != nir_op_fmulz) || + !is_only_used_by_fadd(alu)) { + (*count)++; + } + } } else { /* The only uses of this definition must be phis in the successor */ - nir_foreach_use_including_if(use, &mov->def) { + nir_foreach_use_including_if(use, &alu->def) { if (nir_src_is_if(use) || nir_src_use_instr(use)->type != nir_instr_type_phi || nir_src_use_instr(use)->block != block->successors[0])