From 4834df82e25aed970ab4fd1f1bd5aef2d29a19cd Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 2 Aug 2023 09:14:18 -0700 Subject: [PATCH] nir/algebraic: More patterns to generate iadd3 I noticed some shaders with patterns similar to these while working on cooperative matrix lowering. Meteor Lake and DG2 are the only platforms that support iadd3, so there were no shader-db or fossil-db changes on any other platforms. shader-db: Meteor Lake and DG2 had similar results. (Meteor Lake shown) total instructions in shared programs: 19869445 -> 19868343 (<.01%) instructions in affected programs: 419426 -> 418324 (-0.26%) helped: 913 / HURT: 2 total cycles in shared programs: 936010029 -> 935909811 (-0.01%) cycles in affected programs: 31746523 -> 31646305 (-0.32%) helped: 495 / HURT: 356 LOST: 10 GAINED: 12 fossil-db: Meteor Lake and DG2 had similar results. (Meteor Lake shown) Totals: Instrs: 154514596 -> 154505466 (-0.01%); split: -0.01%, +0.00% Cycle count: 17540226067 -> 17436266198 (-0.59%); split: -0.63%, +0.04% Spill count: 146887 -> 146886 (-0.00%) Fill count: 272499 -> 272489 (-0.00%); split: -0.01%, +0.00% Max live registers: 32634290 -> 32634739 (+0.00%); split: -0.00%, +0.00% Max dispatch width: 5550128 -> 5550368 (+0.00%) Totals from 4401 (0.70% of 632560) affected shaders: Instrs: 3095239 -> 3086109 (-0.29%); split: -0.30%, +0.00% Cycle count: 7327352564 -> 7223392695 (-1.42%); split: -1.51%, +0.10% Spill count: 28105 -> 28104 (-0.00%) Fill count: 45830 -> 45820 (-0.02%); split: -0.04%, +0.02% Max live registers: 264376 -> 264825 (+0.17%); split: -0.05%, +0.22% Max dispatch width: 43768 -> 44008 (+0.55%) Reviewed-by: Jordan Justen Part-of: --- src/compiler/nir/nir_opt_algebraic.py | 9 +++++++++ src/compiler/nir/nir_search_helpers.h | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index f204da23eff..69edcada0f9 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -3099,6 +3099,15 @@ for s in [8, 16, 32, 64]: ((iadd, ('ineg', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)')), 'c(is_not_const)'), ('iadd3', ('ineg', a), ('ineg', b), c), cond), ((iadd, ('ineg', ('iadd(is_used_once)', '#a(is_16_bits)', 'b(is_not_const)')), 'c(is_not_const)'), ('iadd3', ('ineg', a), ('ineg', b), c), cond), ((iadd, ('ineg', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)')), '#c(is_16_bits)'), ('iadd3', ('ineg', a), ('ineg', b), c), cond), + + ((iadd, ('ishl', a, 1), 'b(is_not_const)'), ('iadd3', a, a, b), cond), + ((iadd, ('ishl', a, 1), '#b(is_16_bits)' ), ('iadd3', a, a, b), cond), + ((iadd, ('ineg', ('ishl', a, 1)), 'b(is_not_const)'), ('iadd3', ('ineg', a), ('ineg', a), b), cond), + ((iadd, ('ineg', ('ishl', a, 1)), '#b(is_16_bits)' ), ('iadd3', ('ineg', a), ('ineg', a), b), cond), + + # Use special checks to ensure (b+b) or -(b+b) fit in 16 bits. + (('ishl@{}'.format(s), ('iadd', a, '#b(is_2x_16_bits)'), 1), ('iadd3', a, a, ('iadd', b, b)), cond), + (('ishl@{}'.format(s), ('ineg', ('iadd', a, '#b(is_neg2x_16_bits)')), 1), ('iadd3', ('ineg', a), ('ineg', a), ('ineg', ('iadd', b, b))), cond), ]) late_optimizations.extend([ diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h index c0a68ed3681..6a1740fbae8 100644 --- a/src/compiler/nir/nir_search_helpers.h +++ b/src/compiler/nir/nir_search_helpers.h @@ -396,6 +396,24 @@ is_16_bits(UNUSED struct hash_table *ht, const nir_alu_instr *instr, return is_16_bits_with_scale(instr, src, num_components, swizzle, 1); } +/** Like is_16_bits, but could 2 times the constant fit in 16 bits? */ +static inline bool +is_2x_16_bits(UNUSED struct hash_table *ht, const nir_alu_instr *instr, + unsigned src, unsigned num_components, + const uint8_t *swizzle) +{ + return is_16_bits_with_scale(instr, src, num_components, swizzle, 2); +} + +/** Like is_16_bits, but could -2 times the constant fit in 16 bits? */ +static inline bool +is_neg2x_16_bits(UNUSED struct hash_table *ht, const nir_alu_instr *instr, + unsigned src, unsigned num_components, + const uint8_t *swizzle) +{ + return is_16_bits_with_scale(instr, src, num_components, swizzle, -2); +} + static inline bool is_not_const(UNUSED struct hash_table *ht, const nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components,