From 9785fa460c41b9498c24a82b98069655a91224c5 Mon Sep 17 00:00:00 2001
From: Georg Lehmann <dadschoorse@gmail.com>
Date: Mon, 10 Feb 2025 18:45:46 +0100
Subject: [PATCH] nir/opt_algebraic: optimize DXBC boolean bcsel
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Foz-DB Navi21:
Totals from 1749 (2.20% of 79377) affected shaders:
Instrs: 1695408 -> 1685149 (-0.61%); split: -0.68%, +0.07%
CodeSize: 9241312 -> 9174180 (-0.73%); split: -0.79%, +0.06%
VGPRs: 90688 -> 90664 (-0.03%); split: -0.04%, +0.01%
SpillSGPRs: 278 -> 298 (+7.19%)
Latency: 9560167 -> 9540386 (-0.21%); split: -0.29%, +0.08%
InvThroughput: 2236022 -> 2220411 (-0.70%); split: -0.72%, +0.02%
VClause: 29910 -> 29917 (+0.02%)
Copies: 146365 -> 145230 (-0.78%); split: -1.03%, +0.25%
Branches: 59545 -> 59560 (+0.03%)
PreSGPRs: 78858 -> 79242 (+0.49%); split: -0.10%, +0.59%
PreVGPRs: 78643 -> 78560 (-0.11%); split: -0.11%, +0.00%
VALU: 1127861 -> 1113990 (-1.23%); split: -1.24%, +0.01%
SALU: 249535 -> 253237 (+1.48%); split: -0.15%, +1.63%

v2 (idr): Remove a pattern that is now redundant.

v3 (idr): Don't undistribute ineg from bcsel. On platforms where ineg
is a free source modifier, this can be harmful.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33498>
---
 src/compiler/nir/nir_opt_algebraic.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index d68e45aa6a1..124060346a2 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -1460,6 +1460,17 @@ optimizations.extend([
    (('ilt', ('ineg', ('b2i', 'a@1')), 0), a),
    (('iand', ('ineg', ('b2i', a)), 1.0), ('b2f', a)),
    (('iand', ('ineg', ('b2i', a)), 1),   ('b2i', a)),
+   (('bcsel', a, ('b2i', 'b@1'), ('b2i', 'c@1')), ('b2i', ('bcsel', a, b, c))),
+   (('bcsel', a, ('b2i', 'b@1'), 0), ('b2i', ('bcsel', a, b, False))),
+   (('bcsel', a, ('b2i', 'b@1'), 1), ('b2i', ('bcsel', a, b, True))),
+   (('bcsel', a, 0, ('b2i', 'b@1')), ('b2i', ('bcsel', a, False, b))),
+   (('bcsel', a, 1, ('b2i', 'b@1')), ('b2i', ('bcsel', a, True, b))),
+
+   (('bcsel', a, ('ineg', ('b2i', 'b@1')), ('ineg', ('b2i', 'c@1'))), ('ineg', ('b2i', ('bcsel', a, b, c)))),
+   (('bcsel', a, ('ineg', ('b2i', 'b@1')), 0), ('ineg', ('b2i', ('bcsel', a, b, False)))),
+   (('bcsel', a, ('ineg', ('b2i', 'b@1')), -1), ('ineg', ('b2i', ('bcsel', a, b, True)))),
+   (('bcsel', a, 0, ('ineg', ('b2i', 'b@1'))), ('ineg', ('b2i', ('bcsel', a, False, b)))),
+   (('bcsel', a, -1, ('ineg', ('b2i', 'b@1'))), ('ineg', ('b2i', ('bcsel', a, True, b)))),
 ])
 
 for op in ('ior', 'iand', 'ixor'):
@@ -2086,7 +2097,6 @@ optimizations.extend([
    # lets iand(b2i1(...), 1) get simplified. Backends can usually fuse iand/inot
    # so this should be no worse when it isn't strictly better.
    (('bcsel', a, 0, ('b2i16', 'b@1')), ('b2i16', ('iand', ('inot', a), b))),
-   (('bcsel', a, ('b2i16', 'b@1'), ('b2i16', 'c@1')), ('b2i16', ('bcsel', a, b, c))),
 
    # Lowered pack followed by lowered unpack, for the high bits
    (('u2u32', ('ushr', ('ior', ('ishl', a, 32), ('u2u64', 'b@8')), 32)), ('u2u32', a)),