From 9785fa460c41b9498c24a82b98069655a91224c5 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Mon, 10 Feb 2025 18:45:46 +0100 Subject: [PATCH] nir/opt_algebraic: optimize DXBC boolean bcsel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foz-DB Navi21: Totals from 1749 (2.20% of 79377) affected shaders: Instrs: 1695408 -> 1685149 (-0.61%); split: -0.68%, +0.07% CodeSize: 9241312 -> 9174180 (-0.73%); split: -0.79%, +0.06% VGPRs: 90688 -> 90664 (-0.03%); split: -0.04%, +0.01% SpillSGPRs: 278 -> 298 (+7.19%) Latency: 9560167 -> 9540386 (-0.21%); split: -0.29%, +0.08% InvThroughput: 2236022 -> 2220411 (-0.70%); split: -0.72%, +0.02% VClause: 29910 -> 29917 (+0.02%) Copies: 146365 -> 145230 (-0.78%); split: -1.03%, +0.25% Branches: 59545 -> 59560 (+0.03%) PreSGPRs: 78858 -> 79242 (+0.49%); split: -0.10%, +0.59% PreVGPRs: 78643 -> 78560 (-0.11%); split: -0.11%, +0.00% VALU: 1127861 -> 1113990 (-1.23%); split: -1.24%, +0.01% SALU: 249535 -> 253237 (+1.48%); split: -0.15%, +1.63% v2 (idr): Remove a pattern that is now redundant. v3 (idr): Don't undistribute ineg from bcsel. On platforms where ineg is a free source modifier, this can be harmful. Reviewed-by: Ian Romanick Reviewed-by: Alyssa Rosenzweig Reviewed-by: Timur Kristóf Part-of: --- src/compiler/nir/nir_opt_algebraic.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index d68e45aa6a1..124060346a2 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -1460,6 +1460,17 @@ optimizations.extend([ (('ilt', ('ineg', ('b2i', 'a@1')), 0), a), (('iand', ('ineg', ('b2i', a)), 1.0), ('b2f', a)), (('iand', ('ineg', ('b2i', a)), 1), ('b2i', a)), + (('bcsel', a, ('b2i', 'b@1'), ('b2i', 'c@1')), ('b2i', ('bcsel', a, b, c))), + (('bcsel', a, ('b2i', 'b@1'), 0), ('b2i', ('bcsel', a, b, False))), + (('bcsel', a, ('b2i', 'b@1'), 1), ('b2i', ('bcsel', a, b, True))), + (('bcsel', a, 0, ('b2i', 'b@1')), ('b2i', ('bcsel', a, False, b))), + (('bcsel', a, 1, ('b2i', 'b@1')), ('b2i', ('bcsel', a, True, b))), + + (('bcsel', a, ('ineg', ('b2i', 'b@1')), ('ineg', ('b2i', 'c@1'))), ('ineg', ('b2i', ('bcsel', a, b, c)))), + (('bcsel', a, ('ineg', ('b2i', 'b@1')), 0), ('ineg', ('b2i', ('bcsel', a, b, False)))), + (('bcsel', a, ('ineg', ('b2i', 'b@1')), -1), ('ineg', ('b2i', ('bcsel', a, b, True)))), + (('bcsel', a, 0, ('ineg', ('b2i', 'b@1'))), ('ineg', ('b2i', ('bcsel', a, False, b)))), + (('bcsel', a, -1, ('ineg', ('b2i', 'b@1'))), ('ineg', ('b2i', ('bcsel', a, True, b)))), ]) for op in ('ior', 'iand', 'ixor'): @@ -2086,7 +2097,6 @@ optimizations.extend([ # lets iand(b2i1(...), 1) get simplified. Backends can usually fuse iand/inot # so this should be no worse when it isn't strictly better. (('bcsel', a, 0, ('b2i16', 'b@1')), ('b2i16', ('iand', ('inot', a), b))), - (('bcsel', a, ('b2i16', 'b@1'), ('b2i16', 'c@1')), ('b2i16', ('bcsel', a, b, c))), # Lowered pack followed by lowered unpack, for the high bits (('u2u32', ('ushr', ('ior', ('ishl', a, 32), ('u2u64', 'b@8')), 32)), ('u2u32', a)),