From 4e2f1345d8a3544d3d0e51c6155df4cac657d9a7 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sun, 1 Feb 2026 18:02:20 +0100 Subject: [PATCH] nir/opt_algebraic: make fcmp(a+b, 0.0) -> fcmp(a, -b) exact using ninf And remove some cases that never happen because we remove fneg on compare with constants. Foz-DB Navi48: Totals from 1305 (1.58% of 82405) affected shaders: MaxWaves: 32872 -> 32854 (-0.05%) Instrs: 4554013 -> 4551638 (-0.05%); split: -0.06%, +0.01% CodeSize: 25269108 -> 25255428 (-0.05%); split: -0.06%, +0.00% VGPRs: 87660 -> 87732 (+0.08%) Latency: 33291152 -> 33285023 (-0.02%); split: -0.03%, +0.01% InvThroughput: 8965288 -> 8963071 (-0.02%); split: -0.03%, +0.00% VClause: 104008 -> 103947 (-0.06%); split: -0.09%, +0.03% SClause: 97577 -> 97574 (-0.00%); split: -0.01%, +0.00% Copies: 372741 -> 372628 (-0.03%); split: -0.05%, +0.02% Branches: 134076 -> 134072 (-0.00%) PreSGPRs: 65109 -> 65110 (+0.00%); split: -0.00%, +0.00% PreVGPRs: 68911 -> 68968 (+0.08%); split: -0.01%, +0.10% VALU: 2247091 -> 2245815 (-0.06%); split: -0.07%, +0.01% SALU: 810190 -> 810001 (-0.02%); split: -0.02%, +0.00% VOPD: 205075 -> 205016 (-0.03%); split: +0.04%, -0.07% Reviewed-by: Rhys Perry Part-of: --- src/compiler/nir/nir_opt_algebraic.py | 28 +++++++++------------------ 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 59db4c7fb70..56e45dbc8f3 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -3613,34 +3613,24 @@ late_optimizations = [ # a=Inf, b=-Inf a=-Inf, b=Inf a=NaN b=NaN # (a+b) < 0 false false false false # a < -b false false false false - # -(a+b) < 0 false false false false - # -a < b false false false false # (a+b) >= 0 false false false false # a >= -b true true false false - # -(a+b) >= 0 false false false false - # -a >= b true true false false # (a+b) == 0 false false false false # a == -b true true false false # (a+b) != 0 true true true true # a != -b false false true true - (('flt', ('fadd(is_used_once)', a, b), 0.0), ('flt', a, ('fneg', b))), - (('flt', ('fneg(is_used_once)', ('fadd(is_used_once)', a, b)), 0.0), ('flt', ('fneg', a), b)), - (('flt', 0.0, ('fadd(is_used_once)', a, b) ), ('flt', ('fneg', a), b)), - (('flt', 0.0, ('fneg(is_used_once)', ('fadd(is_used_once)', a, b))), ('flt', a, ('fneg', b))), - (('~fge', ('fadd(is_used_once)', a, b), 0.0), ('fge', a, ('fneg', b))), - (('~fge', ('fneg(is_used_once)', ('fadd(is_used_once)', a, b)), 0.0), ('fge', ('fneg', a), b)), - (('~fge', 0.0, ('fadd(is_used_once)', a, b) ), ('fge', ('fneg', a), b)), - (('~fge', 0.0, ('fneg(is_used_once)', ('fadd(is_used_once)', a, b))), ('fge', a, ('fneg', b))), - (('~feq', ('fadd(is_used_once)', a, b), 0.0), ('feq', a, ('fneg', b))), - (('~fneu', ('fadd(is_used_once)', a, b), 0.0), ('fneu', a, ('fneg', b))), + (('flt', ('fadd(is_used_once)', a, b), 0.0), ('flt', a, ('fneg', b))), + (('flt', 0.0, ('fadd(is_used_once)', a, b) ), ('flt', ('fneg', a), b)), + (('fge', ('fadd(is_used_once,ninf)', a, b), 0.0), ('fge', a, ('fneg', b))), + (('fge', 0.0, ('fadd(is_used_once,ninf)', a, b) ), ('fge', ('fneg', a), b)), + (('feq', ('fadd(is_used_once,ninf)', a, b), 0.0), ('feq', a, ('fneg', b))), + (('fneu', ('fadd(is_used_once,ninf)', a, b), 0.0), ('fneu', a, ('fneg', b))), # If either source must be finite, then the original (a+b) cannot produce # NaN due to Inf-Inf. The patterns and the replacements produce the same # result if b is NaN. Therefore, the replacements are exact. - (('fge', ('fadd(is_used_once)', 'a(is_finite)', b), 0.0), ('fge', a, ('fneg', b))), - (('fge', ('fneg(is_used_once)', ('fadd(is_used_once)', 'a(is_finite)', b)), 0.0), ('fge', ('fneg', a), b)), - (('fge', 0.0, ('fadd(is_used_once)', 'a(is_finite)', b) ), ('fge', ('fneg', a), b)), - (('fge', 0.0, ('fneg(is_used_once)', ('fadd(is_used_once)', 'a(is_finite)', b))), ('fge', a, ('fneg', b))), + (('fge', ('fadd(is_used_once)', 'a(is_finite)', b), 0.0), ('fge', a, ('fneg', b))), + (('fge', 0.0, ('fadd(is_used_once)', 'a(is_finite)', b) ), ('fge', ('fneg', a), b)), (('feq', ('fadd(is_used_once)', 'a(is_finite)', b), 0.0), ('feq', a, ('fneg', b))), (('fneu', ('fadd(is_used_once)', 'a(is_finite)', b), 0.0), ('fneu', a, ('fneg', b))), @@ -3825,7 +3815,7 @@ late_optimizations.extend([ (('fge', ('fsat(is_used_once)', a), 1.0), ('fge', a, 1.0)), - (('~fge', ('fmin(is_used_once)', ('fadd(is_used_once)', a, b), ('fadd', c, d)), 0.0), ('iand', ('fge', a, ('fneg', b)), ('fge', c, ('fneg', d)))), + (('fge', ('fmin(is_used_once,nnan)', ('fadd(is_used_once)', a, b), ('fadd', c, d)), 0.0), ('iand', ('fge', a, ('fneg', b)), ('fge', c, ('fneg', d)))), (('flt', ('fneg', a), ('fneg', b)), ('flt', b, a)), (('fge', ('fneg', a), ('fneg', b)), ('fge', b, a)),