nir/opt_algebraic: use nan/inf/sz preserve flags instead of exact for cmp/min/max replacement

And remove some, because they should be covered by the search pattern anyway.

Foz-DB Navi48:
Totals from 560 (0.68% of 82405) affected shaders:
MaxWaves: 11279 -> 11291 (+0.11%)
Instrs: 5214229 -> 5214386 (+0.00%); split: -0.02%, +0.02%
CodeSize: 29613884 -> 29616740 (+0.01%); split: -0.01%, +0.02%
VGPRs: 50400 -> 50328 (-0.14%)
Latency: 36481700 -> 36481157 (-0.00%); split: -0.01%, +0.01%
InvThroughput: 7309905 -> 7307905 (-0.03%); split: -0.05%, +0.02%
VClause: 131423 -> 131424 (+0.00%); split: -0.00%, +0.00%
SClause: 111485 -> 111499 (+0.01%); split: -0.00%, +0.01%
Copies: 441899 -> 442029 (+0.03%); split: -0.02%, +0.05%
Branches: 165599 -> 165597 (-0.00%)
PreVGPRs: 43558 -> 43525 (-0.08%)
VALU: 2573609 -> 2573324 (-0.01%); split: -0.03%, +0.02%
SALU: 851172 -> 851271 (+0.01%); split: -0.01%, +0.02%
VOPD: 366409 -> 366934 (+0.14%); split: +0.23%, -0.08%

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39641>
This commit is contained in:
Georg Lehmann 2026-01-31 23:17:20 +01:00 committed by Marge Bot
parent a8ad72b912
commit b678899ef8

View file

@ -788,8 +788,8 @@ optimizations.extend([
(('bcsel(is_only_used_as_float)', ('feq', a, 'b(is_not_zero)'), b, a), a),
(('bcsel(is_only_used_as_float)', ('fneu', a, 'b(is_not_zero)'), a, b), a),
(('bcsel', ('feq(ignore_exact)', a, 0), 0, ('fsat', ('fmul', a, 'b(is_a_number)'))), ('!fsat', ('fmul', a, b))),
(('bcsel', ('fneu(ignore_exact)', a, 0), ('fsat', ('fmul', a, 'b(is_a_number)')), 0), ('!fsat', ('fmul', a, b))),
(('bcsel', ('feq(ignore_exact)', a, 0), 0, ('fsat', ('fmul', a, 'b(is_a_number)'))), ('fsat(preserve_sz)', ('fmul', a, b))),
(('bcsel', ('fneu(ignore_exact)', a, 0), ('fsat', ('fmul', a, 'b(is_a_number)')), 0), ('fsat(preserve_sz)', ('fmul', a, b))),
(('bcsel', ('feq(ignore_exact)', a, 0), b, ('fadd', a, 'b(is_not_zero)')), ('fadd', a, b)),
(('bcsel', ('fneu(ignore_exact)', a, 0), ('fadd', a, 'b(is_not_zero)'), b), ('fadd', a, b)),
@ -1054,7 +1054,7 @@ optimizations.extend([
(('~fmax', ('fmin', a, 0.0), -1.0), ('fneg', ('fsat', ('fneg', a))), '!options->lower_fsat'),
# fsat(fsign(NaN)) = fsat(0) = 0, and b2f(0 < NaN) = b2f(False) = 0. Mark
# the new comparison precise to prevent it being changed to 'a != 0'.
(('fsat', ('fsign', a)), ('b2f', ('!flt', 0.0, a))),
(('fsat', ('fsign', a)), ('b2f', ('flt(preserve_nan_inf)', 0.0, a))),
(('fsat', ('b2f', a)), ('b2f', a)),
(('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
(('fsat', ('fsat', a)), ('fsat', a)),
@ -1109,17 +1109,16 @@ optimizations.extend([
# The ior versions are exact because fmin and fmax will always pick a
# non-NaN value, if one exists. Therefore (a < NaN) || (a < c) == a <
# fmax(NaN, c) == a < c. Mark the fmin or fmax in the replacement as exact
# to prevent other optimizations from ruining the "NaN clensing" property
# of the fmin or fmax.
(('ior', ('flt(is_used_once)', a, b), ('flt', a, c)), ('flt', a, ('!fmax', b, c))),
(('ior', ('flt(is_used_once)', a, c), ('flt', b, c)), ('flt', ('!fmin', a, b), c)),
(('ior', ('fge(is_used_once)', a, b), ('fge', a, c)), ('fge', a, ('!fmin', b, c))),
(('ior', ('fge(is_used_once)', a, c), ('fge', b, c)), ('fge', ('!fmax', a, b), c)),
(('ior', ('flt', a, '#b'), ('flt', a, '#c')), ('flt', a, ('!fmax', b, c))),
(('ior', ('flt', '#a', c), ('flt', '#b', c)), ('flt', ('!fmin', a, b), c)),
(('ior', ('fge', a, '#b'), ('fge', a, '#c')), ('fge', a, ('!fmin', b, c))),
(('ior', ('fge', '#a', c), ('fge', '#b', c)), ('fge', ('!fmax', a, b), c)),
# fmax(NaN, c) == a < c. If the source comparisons were NaN preserving,
# so should be the replacement, which prevents further optimizations
(('ior', ('flt(is_used_once)', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
(('ior', ('flt(is_used_once)', a, c), ('flt', b, c)), ('flt', ('fmin', a, b), c)),
(('ior', ('fge(is_used_once)', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
(('ior', ('fge(is_used_once)', a, c), ('fge', b, c)), ('fge', ('fmax', a, b), c)),
(('ior', ('flt', a, '#b'), ('flt', a, '#c')), ('flt', a, ('fmax', b, c))),
(('ior', ('flt', '#a', c), ('flt', '#b', c)), ('flt', ('fmin', a, b), c)),
(('ior', ('fge', a, '#b'), ('fge', a, '#c')), ('fge', a, ('fmin', b, c))),
(('ior', ('fge', '#a', c), ('fge', '#b', c)), ('fge', ('fmax', a, b), c)),
(('~iand', ('flt(is_used_once)', a, b), ('flt', a, c)), ('flt', a, ('fmin', b, c))),
(('~iand', ('flt(is_used_once)', a, c), ('flt', b, c)), ('flt', ('fmax', a, b), c)),
(('~iand', ('fge(is_used_once)', a, b), ('fge', a, c)), ('fge', a, ('fmax', b, c))),
@ -1138,10 +1137,10 @@ optimizations.extend([
# single step. Doing just the replacement can lead to an infinite loop as
# the pattern is repeatedly applied to the result of the previous
# application of the pattern.
(('ior', ('ior(is_used_once)', ('flt(is_used_once)', a, c), d), ('flt', b, c)), ('ior', ('flt', ('!fmin', a, b), c), d)),
(('ior', ('ior(is_used_once)', ('flt', a, c), d), ('flt(is_used_once)', b, c)), ('ior', ('flt', ('!fmin', a, b), c), d)),
(('ior', ('ior(is_used_once)', ('flt(is_used_once)', a, b), d), ('flt', a, c)), ('ior', ('flt', a, ('!fmax', b, c)), d)),
(('ior', ('ior(is_used_once)', ('flt', a, b), d), ('flt(is_used_once)', a, c)), ('ior', ('flt', a, ('!fmax', b, c)), d)),
(('ior', ('ior(is_used_once)', ('flt(is_used_once)', a, c), d), ('flt', b, c)), ('ior', ('flt', ('fmin', a, b), c), d)),
(('ior', ('ior(is_used_once)', ('flt', a, c), d), ('flt(is_used_once)', b, c)), ('ior', ('flt', ('fmin', a, b), c), d)),
(('ior', ('ior(is_used_once)', ('flt(is_used_once)', a, b), d), ('flt', a, c)), ('ior', ('flt', a, ('fmax', b, c)), d)),
(('ior', ('ior(is_used_once)', ('flt', a, b), d), ('flt(is_used_once)', a, c)), ('ior', ('flt', a, ('fmax', b, c)), d)),
# This is how SpvOpFOrdNotEqual might be implemented. If both values are
# numbers, then it can be replaced with fneu.
@ -1192,14 +1191,10 @@ for s in [16, 32, 64]:
# For all other values of 'a', the original and replacement behave as
# copysign.
#
# Marking the replacement comparisons as precise prevents any future
# optimizations from replacing either of the comparisons with the
# logical-not of the other.
#
# Note: Use b2i32 in the replacement because some platforms that
# support fp16 don't support int16.
(('bcsel@{}'.format(s), ('feq', a, 0.0), 1.0, ('i2f{}'.format(s), ('iadd', ('b2i{}'.format(s), ('flt', 0.0, 'a@{}'.format(s))), ('ineg', ('b2i{}'.format(s), ('flt', 'a@{}'.format(s), 0.0)))))),
('i2f{}'.format(s), ('iadd', ('b2i32', ('!fge', a, 0.0)), ('ineg', ('b2i32', ('!flt', a, 0.0)))))),
('i2f{}'.format(s), ('iadd', ('b2i32', ('fge', a, 0.0)), ('ineg', ('b2i32', ('flt', a, 0.0)))))),
# Signed pow() used in Control. It's not enough to match just the
# copysign piece because we would require extra instructions to handle
@ -1209,7 +1204,7 @@ for s in [16, 32, 64]:
('i2f', ('iadd', ('b2i', ('flt', 0.0, a)),
('ineg', ('b2i', ('flt', a, 0.0)))))),
('bcsel', ('!flt', a, 0.0),
('bcsel', ('flt', a, 0.0),
('fneg', ('fexp2', ('fmul', ('flog2', ('fabs', a)), b))),
('fexp2', ('fmul', ('flog2', ('fabs', a)), b))), 'true', TestStatus.XFAIL), # XFAIL is that a=0.0, b=-1.0 ends up producing inf instead of NaN (thanks to eliding an fmul(inf, 0.0))
@ -1550,7 +1545,7 @@ optimizations.extend([
# Vulkan allows us to use any rounding mode, so choose rtz because it's simple.
# Avoid some NaNs being converted to Inf if the lsb are cut off.
(('f2bf', a), ('bcsel', ('!fneu', a, a), -1, ('unpack_32_2x16_split_y', a)), 'options->lower_bfloat16_conversions', TestStatus.UNSUPPORTED), # all test inputs skipped
(('f2bf', a), ('bcsel', ('fneu(preserve_nan_inf)', a, a), -1, ('unpack_32_2x16_split_y', a)), 'options->lower_bfloat16_conversions', TestStatus.UNSUPPORTED), # all test inputs skipped
(('bf2f', a), ('pack_32_2x16', ('vec2', 0, a)), 'options->lower_bfloat16_conversions'),
])
@ -2765,8 +2760,8 @@ optimizations.extend([
# float(0 < NaN) - float(NaN < 0) = float(False) - float(False) = 0 - 0 = 0
# Mark the new comparisons precise to prevent them being changed to 'a !=
# 0' or 'a == 0'.
(('fsign', a), ('fsub', ('b2f', ('!flt', 0.0, a)), ('b2f', ('!flt', a, 0.0))), 'options->lower_fsign'),
(('fsign', 'a@64'), ('fsub', ('b2f', ('!flt', 0.0, a)), ('b2f', ('!flt', a, 0.0))), 'options->lower_doubles_options & nir_lower_dsign'),
(('fsign', a), ('fsub', ('b2f', ('flt(preserve_nan_inf)', 0.0, a)), ('b2f', ('flt(preserve_nan_inf)', a, 0.0))), 'options->lower_fsign'),
(('fsign', 'a@64'), ('fsub', ('b2f', ('flt(preserve_nan_inf)', 0.0, a)), ('b2f', ('flt(preserve_nan_inf)', a, 0.0))), 'options->lower_doubles_options & nir_lower_dsign'),
# Address/offset calculations:
# Drivers supporting imul24 should use a pass like nir_lower_amul(), this
@ -3472,7 +3467,7 @@ optimizations.extend([
"""
optimizations.extend([
(('fquantize2f16', 'a@32'),
('bcsel', ('!flt', ('!fabs', a), math.ldexp(1.0, -14)),
('bcsel', ('flt(preserve_nan_inf)', ('fabs(preserve_nan_inf)', a), math.ldexp(1.0, -14)),
('iand', a, 1 << 31),
('!f2f32', ('!f2f16_rtne', a))),
'options->lower_fquantize2f16'),
@ -3659,12 +3654,12 @@ late_optimizations = [
# This is how SpvOpFOrdNotEqual might be implemented. Replace it with
# SpvOpLessOrGreater.
*add_fabs_fneg((('iand', ('fneu', 'ma', 'mb'), ('iand', ('feq', a, a), ('feq', b, b))), ('ior', ('!flt', 'ma', 'mb'), ('!flt', 'mb', 'ma'))), {'ma' : a, 'mb' : b}),
(('iand', ('fneu', a, 0.0), ('feq', a, a)), ('!flt', 0.0, ('fabs', a))),
*add_fabs_fneg((('iand', ('fneu', 'ma', 'mb'), ('iand', ('feq', a, a), ('feq', b, b))), ('ior', ('flt', 'ma', 'mb'), ('flt', 'mb', 'ma'))), {'ma' : a, 'mb' : b}),
(('iand', ('fneu', a, 0.0), ('feq', a, a)), ('flt', 0.0, ('fabs', a))),
# This is how SpvOpFUnordEqual might be implemented. Replace it with
# !SpvOpLessOrGreater.
*add_fabs_fneg((('ior', ('feq', 'ma', 'mb'), ('ior', ('fneu', a, a), ('fneu', b, b))), ('inot', ('ior', ('!flt', 'ma', 'mb'), ('!flt', 'mb', 'ma')))), {'ma' : a, 'mb' : b}),
*add_fabs_fneg((('ior', ('feq', 'ma', 'mb'), ('ior', ('fneu', a, a), ('fneu', b, b))), ('inot', ('ior', ('flt', 'ma', 'mb'), ('flt', 'mb', 'ma')))), {'ma' : a, 'mb' : b}),
(('ior', ('feq', a, 0.0), ('fneu', a, a)), ('inot', ('!flt', 0.0, ('fabs', a)))),
*add_fabs_fneg((('ior', ('flt', 'ma', 'mb'), ('ior', ('fneu', a, a), ('fneu', b, b))), ('inot', ('fge', 'ma', 'mb'))), {'ma' : a, 'mb' : b}, False),