agx: optimize b2x(inot)

easier to do as algebraic rules. total instructions in shared programs: 2915611 -> 2910477 (-0.18%) instructions in affected programs: 285251 -> 280117 (-1.80%) helped: 1245 HURT: 5 Instructions are helped. total bytes in shared programs: 19439752 -> 19400010 (-0.20%) bytes in affected programs: 1881556 -> 1841814 (-2.11%) helped: 1244 HURT: 6 Bytes are helped. total halfregs in shared programs: 519253 -> 519553 (0.06%) halfregs in affected programs: 5509 -> 5809 (5.45%) helped: 24 HURT: 107 Halfregs are HURT. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27616>
2026-02-24 22:30:31 +01:00 · 2024-01-19 18:02:00 -04:00 · 2024-01-19 18:02:00 -04:00 · b45d54a388
commit b45d54a388
parent 34ca925064
2 changed files with 13 additions and 19 deletions
--- a/src/asahi/compiler/agx_compile.c
+++ b/src/asahi/compiler/agx_compile.c
@ -1576,24 +1576,6 @@ agx_emit_alu(agx_builder *b, nir_alu_instr *instr)
   case nir_op_bcsel:
      return agx_icmpsel_to(b, dst, s0, i0, s2, s1, AGX_ICOND_UEQ);

-   case nir_op_b2i32:
-   case nir_op_b2i16:
-   case nir_op_b2i8:
-      return agx_icmpsel_to(b, dst, s0, i0, i0, i1, AGX_ICOND_UEQ);
-
-   case nir_op_b2b32:
-      return agx_icmpsel_to(b, dst, s0, i0, i0, agx_mov_imm(b, 32, 0xFFFFFFFF),
-                            AGX_ICOND_UEQ);
-
-   case nir_op_b2f16:
-   case nir_op_b2f32: {
-      /* At this point, boolean is just zero/nonzero, so compare with zero */
-      agx_index f1 = (sz == 16) ? agx_mov_imm(b, 16, _mesa_float_to_half(1.0))
-                                : agx_mov_imm(b, 32, fui(1.0));
-
-      return agx_fcmpsel_to(b, dst, s0, i0, i0, f1, AGX_FCOND_EQ);
-   }
-
   case nir_op_i2i32: {
      if (src_sz == 8) {
         /* Sign extend in software, NIR likes 8-bit conversions */
--- a/src/asahi/compiler/agx_nir_algebraic.py
+++ b/src/asahi/compiler/agx_nir_algebraic.py
@ -83,6 +83,17 @@ lower_pack = [
      ('isub', 32, 'bits'))),
 ]

+lower_selects = []
+
+for T, sizes, one in [('f', [16, 32], 1.0),
+                      ('i', [8, 16, 32], 1),
+                      ('b', [32], -1)]:
+    for size in sizes:
+        lower_selects.extend([
+            ((f'b2{T}{size}', ('inot', 'a@1')), ('bcsel', a, 0, one)),
+            ((f'b2{T}{size}', 'a@1'), ('bcsel', a, one, 0)),
+        ])
+
 fuse_extr = []
 for start in range(32):
    fuse_extr.extend([
@ -170,7 +181,8 @@ def run():
    print('#include "agx_nir.h"')

    print(nir_algebraic.AlgebraicPass("agx_nir_lower_algebraic_late",
-                                      lower_sm5_shift + lower_pack).render())
+                                      lower_sm5_shift + lower_pack +
+                                      lower_selects).render())
    print(nir_algebraic.AlgebraicPass("agx_nir_fuse_algebraic_late",
                                      fuse_extr + fuse_ubfe + fuse_imad).render())
    print(nir_algebraic.AlgebraicPass("agx_nir_opt_ixor_bcsel",