From 4834df82e25aed970ab4fd1f1bd5aef2d29a19cd Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 2 Aug 2023 09:14:18 -0700
Subject: [PATCH] nir/algebraic: More patterns to generate iadd3

I noticed some shaders with patterns similar to these while working on
cooperative matrix lowering.

Meteor Lake and DG2 are the only platforms that support iadd3, so there
were no shader-db or fossil-db changes on any other platforms.

shader-db:

Meteor Lake and DG2 had similar results. (Meteor Lake shown)
total instructions in shared programs: 19869445 -> 19868343 (<.01%)
instructions in affected programs: 419426 -> 418324 (-0.26%)
helped: 913 / HURT: 2

total cycles in shared programs: 936010029 -> 935909811 (-0.01%)
cycles in affected programs: 31746523 -> 31646305 (-0.32%)
helped: 495 / HURT: 356

LOST:   10
GAINED: 12

fossil-db:

Meteor Lake and DG2 had similar results. (Meteor Lake shown)
Totals:
Instrs: 154514596 -> 154505466 (-0.01%); split: -0.01%, +0.00%
Cycle count: 17540226067 -> 17436266198 (-0.59%); split: -0.63%, +0.04%
Spill count: 146887 -> 146886 (-0.00%)
Fill count: 272499 -> 272489 (-0.00%); split: -0.01%, +0.00%
Max live registers: 32634290 -> 32634739 (+0.00%); split: -0.00%, +0.00%
Max dispatch width: 5550128 -> 5550368 (+0.00%)

Totals from 4401 (0.70% of 632560) affected shaders:
Instrs: 3095239 -> 3086109 (-0.29%); split: -0.30%, +0.00%
Cycle count: 7327352564 -> 7223392695 (-1.42%); split: -1.51%, +0.10%
Spill count: 28105 -> 28104 (-0.00%)
Fill count: 45830 -> 45820 (-0.02%); split: -0.04%, +0.02%
Max live registers: 264376 -> 264825 (+0.17%); split: -0.05%, +0.22%
Max dispatch width: 43768 -> 44008 (+0.55%)

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29148>
---
 src/compiler/nir/nir_opt_algebraic.py |  9 +++++++++
 src/compiler/nir/nir_search_helpers.h | 18 ++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index f204da23eff..69edcada0f9 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -3099,6 +3099,15 @@ for s in [8, 16, 32, 64]:
       ((iadd, ('ineg', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)')), 'c(is_not_const)'), ('iadd3', ('ineg', a), ('ineg', b), c), cond),
       ((iadd, ('ineg', ('iadd(is_used_once)', '#a(is_16_bits)',  'b(is_not_const)')), 'c(is_not_const)'), ('iadd3', ('ineg', a), ('ineg', b), c), cond),
       ((iadd, ('ineg', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)')), '#c(is_16_bits)'),  ('iadd3', ('ineg', a), ('ineg', b), c), cond),
+
+      ((iadd, ('ishl', a, 1), 'b(is_not_const)'), ('iadd3', a, a, b), cond),
+      ((iadd, ('ishl', a, 1), '#b(is_16_bits)' ), ('iadd3', a, a, b), cond),
+      ((iadd, ('ineg', ('ishl', a, 1)), 'b(is_not_const)'), ('iadd3', ('ineg', a), ('ineg', a), b), cond),
+      ((iadd, ('ineg', ('ishl', a, 1)), '#b(is_16_bits)' ), ('iadd3', ('ineg', a), ('ineg', a), b), cond),
+
+      # Use special checks to ensure (b+b) or -(b+b) fit in 16 bits.
+      (('ishl@{}'.format(s), ('iadd', a, '#b(is_2x_16_bits)'), 1), ('iadd3', a, a, ('iadd', b, b)), cond),
+      (('ishl@{}'.format(s), ('ineg', ('iadd', a, '#b(is_neg2x_16_bits)')), 1), ('iadd3', ('ineg', a), ('ineg', a), ('ineg', ('iadd', b, b))), cond),
    ])
 
 late_optimizations.extend([
diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h
index c0a68ed3681..6a1740fbae8 100644
--- a/src/compiler/nir/nir_search_helpers.h
+++ b/src/compiler/nir/nir_search_helpers.h
@@ -396,6 +396,24 @@ is_16_bits(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
    return is_16_bits_with_scale(instr, src, num_components, swizzle, 1);
 }
 
+/** Like is_16_bits, but could 2 times the constant fit in 16 bits? */
+static inline bool
+is_2x_16_bits(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
+              unsigned src, unsigned num_components,
+              const uint8_t *swizzle)
+{
+   return is_16_bits_with_scale(instr, src, num_components, swizzle, 2);
+}
+
+/** Like is_16_bits, but could -2 times the constant fit in 16 bits? */
+static inline bool
+is_neg2x_16_bits(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
+                 unsigned src, unsigned num_components,
+                 const uint8_t *swizzle)
+{
+   return is_16_bits_with_scale(instr, src, num_components, swizzle, -2);
+}
+
 static inline bool
 is_not_const(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
              unsigned src, UNUSED unsigned num_components,