nir/opt_algebraic: add various unordered/ordered patterns from aco

Foz-DB Navi21:
Totals from 6747 (8.50% of 79395) affected shaders:
MaxWaves: 134646 -> 134642 (-0.00%)
Instrs: 7830299 -> 7828851 (-0.02%); split: -0.03%, +0.01%
CodeSize: 43045532 -> 43010260 (-0.08%); split: -0.09%, +0.00%
VGPRs: 378960 -> 378968 (+0.00%)
SpillSGPRs: 1209 -> 1208 (-0.08%)
Latency: 74667977 -> 74670405 (+0.00%); split: -0.02%, +0.02%
InvThroughput: 20124981 -> 20124768 (-0.00%); split: -0.02%, +0.02%
VClause: 162870 -> 162868 (-0.00%); split: -0.00%, +0.00%
SClause: 277280 -> 277315 (+0.01%); split: -0.00%, +0.02%
Copies: 528627 -> 528667 (+0.01%); split: -0.00%, +0.01%
PreSGPRs: 319526 -> 319508 (-0.01%)
PreVGPRs: 334264 -> 334265 (+0.00%); split: -0.00%, +0.00%
VALU: 5485412 -> 5485408 (-0.00%); split: -0.02%, +0.02%
SALU: 743882 -> 742301 (-0.21%); split: -0.21%, +0.00%

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29467>
This commit is contained in:
Georg Lehmann 2024-05-29 16:12:44 +02:00 committed by Marge Bot
parent 434dfb51ca
commit 3e86d2452f

View file

@ -146,6 +146,13 @@ def add_fabs_fneg(pattern, replacements, commutative = True):
optimizations = [
# These will be recreated by late_algebraic if supported.
# Lowering here means we don't have to duplicate all other optimization patterns.
(('fgeu', a, b), ('inot', ('flt', a, b))),
(('fltu', a, b), ('inot', ('fge', a, b))),
(('fneo', 0.0, a), ('flt', 0.0, ('fabs', a))),
(('fequ', 0.0, a), ('inot', ('flt', 0.0, ('fabs', a)))),
(('imul', a, '#b(is_pos_power_of_two)'), ('ishl', a, ('find_lsb', b)), '!options->lower_bitops'),
(('imul', 'a@8', 0x80), ('ishl', a, 7), '!options->lower_bitops'),
@ -3091,13 +3098,47 @@ late_optimizations = [
# This is how SpvOpFOrdNotEqual might be implemented. Replace it with
# SpvOpLessOrGreater.
(('iand', ('fneu', a, b), ('iand', ('feq', a, a), ('feq', b, b))), ('ior', ('!flt', a, b), ('!flt', b, a))),
(('iand', ('fneu', a, 0.0), ('feq', a, a) ), ('!flt', 0.0, ('fabs', a))),
*add_fabs_fneg((('iand', ('fneu', 'ma', 'mb'), ('iand', ('feq', a, a), ('feq', b, b))), ('ior', ('!flt', 'ma', 'mb'), ('!flt', 'mb', 'ma'))), {'ma' : a, 'mb' : b}),
(('iand', ('fneu', a, 0.0), ('feq', a, a)), ('!flt', 0.0, ('fabs', a))),
# This is how SpvOpFUnordEqual might be implemented. Replace it with
# !SpvOpLessOrGreater.
(('ior', ('feq', a, b), ('ior', ('fneu', a, a), ('fneu', b, b))), ('inot', ('ior', ('!flt', a, b), ('!flt', b, a)))),
(('ior', ('feq', a, 0.0), ('fneu', a, a), ), ('inot', ('!flt', 0.0, ('fabs', a)))),
*add_fabs_fneg((('ior', ('feq', 'ma', 'mb'), ('ior', ('fneu', a, a), ('fneu', b, b))), ('inot', ('ior', ('!flt', 'ma', 'mb'), ('!flt', 'mb', 'ma')))), {'ma' : a, 'mb' : b}),
(('ior', ('feq', a, 0.0), ('fneu', a, a)), ('inot', ('!flt', 0.0, ('fabs', a)))),
*add_fabs_fneg((('ior', ('flt', 'ma', 'mb'), ('ior', ('fneu', a, a), ('fneu', b, b))), ('inot', ('fge', 'ma', 'mb'))), {'ma' : a, 'mb' : b}, False),
*add_fabs_fneg((('ior', ('fge', 'ma', 'mb'), ('ior', ('fneu', a, a), ('fneu', b, b))), ('inot', ('flt', 'ma', 'mb'))), {'ma' : a, 'mb' : b}, False),
*add_fabs_fneg((('ior', ('flt', 'ma', 'b(is_a_number)'), ('fneu', a, a)), ('inot', ('fge', 'ma', b))), {'ma' : a}),
*add_fabs_fneg((('ior', ('fge', 'ma', 'b(is_a_number)'), ('fneu', a, a)), ('inot', ('flt', 'ma', b))), {'ma' : a}),
*add_fabs_fneg((('ior', ('flt', 'a(is_a_number)', 'mb'), ('fneu', b, b)), ('inot', ('fge', a, 'mb'))), {'mb' : b}),
*add_fabs_fneg((('ior', ('fge', 'a(is_a_number)', 'mb'), ('fneu', b, b)), ('inot', ('flt', a, 'mb'))), {'mb' : b}),
*add_fabs_fneg((('iand', ('fneu', 'ma', 'b(is_a_number)'), ('feq', a, a)), ('fneo', 'ma', b), 'options->has_fneo_fcmpu'), {'ma' : a}),
*add_fabs_fneg((('ior', ('feq', 'ma', 'b(is_a_number)'), ('fneu', a, a)), ('fequ', 'ma', b), 'options->has_fneo_fcmpu'), {'ma' : a}),
(('ior', ('flt', a, b), ('flt', b, a)), ('fneo', a, b), 'options->has_fneo_fcmpu'),
(('flt', 0.0, ('fabs', a)), ('fneo', 0.0, a), 'options->has_fneo_fcmpu'),
# These don't interfere with the previous optimizations which include this
# in the search expression, because nir_algebraic_impl visits instructions
# in reverse order.
(('ior', ('fneu', 'a@16', a), ('fneu', 'b@16', b)), ('funord', a, b), 'options->has_ford_funord'),
(('iand', ('feq', 'a@16', a), ('feq', 'b@16', b)), ('ford', a, b), 'options->has_ford_funord'),
(('ior', ('fneu', 'a@32', a), ('fneu', 'b@32', b)), ('funord', a, b), 'options->has_ford_funord'),
(('iand', ('feq', 'a@32', a), ('feq', 'b@32', b)), ('ford', a, b), 'options->has_ford_funord'),
(('ior', ('fneu', 'a@64', a), ('fneu', 'b@64', b)), ('funord', a, b), 'options->has_ford_funord'),
(('iand', ('feq', 'a@64', a), ('feq', 'b@64', b)), ('ford', a, b), 'options->has_ford_funord'),
(('inot', ('ford(is_used_once)', a, b)), ('funord', a, b)),
(('inot', ('funord(is_used_once)', a, b)), ('ford', a, b)),
(('inot', ('feq(is_used_once)', a, b)), ('fneu', a, b)),
(('inot', ('fneu(is_used_once)', a, b)), ('feq', a, b)),
(('inot', ('fequ(is_used_once)', a, b)), ('fneo', a, b)),
(('inot', ('fneo(is_used_once)', a, b)), ('fequ', a, b)),
(('inot', ('flt(is_used_once)', a, b)), ('fgeu', a, b), 'options->has_fneo_fcmpu'),
(('inot', ('fgeu(is_used_once)', a, b)), ('flt', a, b)),
(('inot', ('fge(is_used_once)', a, b)), ('fltu', a, b), 'options->has_fneo_fcmpu'),
(('inot', ('fltu(is_used_once)', a, b)), ('fge', a, b)),
# nir_lower_to_source_mods will collapse this, but its existence during the
# optimization loop can prevent other optimizations.