nir/opt_algebraic: use fcanonicalize

Mostly optimizations, some minor fixes but I don't
think they are worth backporting.

Foz-DB Navi21:
Totals from 7570 (9.21% of 82151) affected shaders:
MaxWaves: 204288 -> 204476 (+0.09%); split: +0.09%, -0.00%
Instrs: 4511439 -> 4500261 (-0.25%); split: -0.25%, +0.00%
CodeSize: 23727088 -> 23644388 (-0.35%); split: -0.35%, +0.00%
VGPRs: 290944 -> 290616 (-0.11%); split: -0.12%, +0.01%
SpillSGPRs: 1256 -> 1251 (-0.40%)
Latency: 16738072 -> 16726717 (-0.07%); split: -0.10%, +0.04%
InvThroughput: 3736856 -> 3716631 (-0.54%); split: -0.55%, +0.01%
VClause: 66150 -> 66156 (+0.01%); split: -0.05%, +0.06%
SClause: 93644 -> 93631 (-0.01%); split: -0.02%, +0.01%
Copies: 448816 -> 458584 (+2.18%); split: -0.05%, +2.22%
Branches: 139817 -> 139775 (-0.03%); split: -0.03%, +0.00%
PreSGPRs: 321922 -> 321900 (-0.01%); split: -0.01%, +0.00%
PreVGPRs: 239709 -> 238856 (-0.36%); split: -0.39%, +0.03%
VALU: 2595164 -> 2584250 (-0.42%); split: -0.43%, +0.01%
SALU: 839038 -> 838965 (-0.01%); split: -0.02%, +0.01%
VMEM: 137584 -> 137583 (-0.00%)

Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39180>
This commit is contained in:
Georg Lehmann 2026-01-06 17:42:13 +01:00 committed by Marge Bot
parent 625afb0d29
commit 442daeb54a

View file

@ -215,20 +215,14 @@ optimizations = [
(('~fmul', ('fsign', a), ('ffloor', ('fadd', ('fabs', a), 0.5))), ('ftrunc', ('fadd', a, ('fmul', ('fsign', a), 0.5))), '!options->lower_ftrunc || options->lower_ffloor'),
(('~fneg', ('fneg', a)), a),
(('fneg', ('fneg', a)), ('fcanonicalize', a)),
(('ineg', ('ineg', a)), a),
(('fabs', ('fneg', a)), ('fabs', a)),
(('fabs', ('u2f', a)), ('u2f', a)),
(('iabs', ('iabs', a)), ('iabs', a)),
(('iabs', ('ineg', a)), ('iabs', a)),
(('~fadd', a, 0.0), a),
(('~fadd', a, -0.0), a, 'true', TestStatus.UNSUPPORTED), # No support for inexactly testing -0.0 inputs
# a+0.0 is 'a' unless 'a' is denormal or -0.0. If it's only used by a
# floating point instruction, they should flush any input denormals and we
# can replace -0.0 with 0.0 if the float execution mode allows it.
(('fadd(is_only_used_as_float,nsz)', 'a', 0.0), a),
(('fadd(is_only_used_as_float)', a, -0.0), a),
(('fadd', ('fneg', a), -0.0), ('fneg', a)),
(('fadd(nsz)', a, 0.0), ('fcanonicalize', a)),
(('fadd', a, -0.0), ('fcanonicalize', a)),
(('iadd', a, 0), a),
(('iadd_sat', a, 0), a),
(('isub_sat', a, 0), a),
@ -268,8 +262,8 @@ optimizations = [
(('iadd', ('ineg', a), a), 0),
(('iadd', ('ineg', a), ('iadd', a, b)), b),
(('iadd', a, ('iadd', ('ineg', a), b)), b),
(('~fadd', ('fneg', a), ('fadd', a, b)), b),
(('~fadd', a, ('fadd', ('fneg', a), b)), b),
(('~fadd', ('fneg', a), ('fadd', a, b)), ('fcanonicalize', b)),
(('~fadd', a, ('fadd', ('fneg', a), b)), ('fcanonicalize', b)),
(('fadd', ('fsat', a), ('fsat', ('fneg', a))), ('fsat', ('fabs', a))),
(('fadd', a, a), ('fmul', a, 2.0)),
(('fadd(contract)', a, ('fadd(is_used_once)', a, b)), ('fadd', b, ('fmul', a, 2.0))),
@ -289,12 +283,7 @@ optimizations = [
(('imul', a, 0), 0),
(('umul_unorm_4x8_vc4', a, 0), 0),
(('umul_unorm_4x8_vc4', a, ~0), a),
(('~fmul', a, 1.0), a),
(('~fmulz', a, 1.0), a),
# The only effect a*1.0 can have is flushing denormals. If it's only used by
# a floating point instruction, they should flush any input denormals and
# this multiplication isn't needed.
(('fmul(is_only_used_as_float)', a, 1.0), a),
(('fmul', a, 1.0), ('fcanonicalize', a)),
(('imul', a, 1), a),
(('fmul', a, -1.0), ('fneg', a)),
(('imul', a, -1), ('ineg', a)),
@ -304,13 +293,10 @@ optimizations = [
# If a != a: fsign(a)*a*a => 0*NaN*NaN => abs(NaN)*NaN
(('fmul', ('fsign', a), ('fmul', a, a)), ('fmul', ('fabs', a), a)),
(('fmul', ('fmul', ('fsign', a), a), a), ('fmul', ('fabs', a), a)),
(('~ffma', 0.0, a, b), b),
(('~ffma', -0.0, a, b), b, 'true', TestStatus.UNSUPPORTED), # No support for inexactly testing -0.0 inputs
(('ffma(is_only_used_as_float,nsz,nnan,ninf)', 0.0, a, b), b),
(('ffma(is_only_used_as_float,nsz,nnan,ninf)', -0.0, a, b), b, 'true', TestStatus.UNSUPPORTED), # No support for nsz testing -0.0 inputs
(('ffma(nsz,nnan)', 0.0, a, b), ('fcanonicalize', b)),
(('ffma(nsz,nnan)', -0.0, a, b), ('fcanonicalize', b), 'true', TestStatus.UNSUPPORTED), # No support for nsz testing -0.0 inputs
(('ffmaz', 0.0, a, b), ('fadd', 0.0, b)),
(('ffmaz', -0.0, a, b), ('fadd', 0.0, b)),
(('~ffma', a, b, 0.0), ('fmul', a, b)),
(('ffma(nsz)', a, b, 0.0), ('fmul', a, b)),
(('ffmaz(nsz)', a, b, 0.0), ('fmulz', a, b)),
(('ffma', a, b, -0.0), ('fmul', a, b)),
@ -321,12 +307,12 @@ optimizations = [
(('ffmaz(nsz)', -1.0, a, b), ('fadd', ('fneg', a), b)),
(('~ffma', '#a', '#b', c), ('fadd', ('fmul', a, b), c)),
(('~ffmaz', '#a', '#b', c), ('fadd', ('fmulz', a, b), c)),
(('~flrp', a, b, 0.0), a),
(('~flrp', a, b, -0.0), a, 'true', TestStatus.UNSUPPORTED), # No support for inexactly testing -0.0 inputs
(('~flrp', a, b, 1.0), b),
(('~flrp', a, a, b), a),
(('~flrp', 0.0, a, b), ('fmul', a, b)),
(('~flrp', -0.0, a, b), ('fmul', a, b), 'true', TestStatus.UNSUPPORTED), # all inputs skipped
(('flrp(nnan,nsz)', a, b, 0.0), ('fcanonicalize', a)),
(('flrp(nnan,nsz)', a, b, -0.0), ('fcanonicalize', a), 'true', TestStatus.UNSUPPORTED), # No support for nsz testing -0.0 inputs
(('flrp(nnan,nsz)', a, b, 1.0), ('fcanonicalize', b)),
(('~flrp', a, a, b), ('fcanonicalize', a)),
(('flrp(nnan,nsz)', 0.0, a, b), ('fmul', a, b)),
(('flrp(nnan,nsz)', -0.0, a, b), ('fmul', a, b), 'true', TestStatus.UNSUPPORTED), # No support for nsz testing -0.0 inputs
# flrp(a, a + b, c) => a + flrp(0, b, c) => a + (b * c)
(('~flrp', a, ('fadd(is_used_once)', a, b), c), ('fadd', ('fmul', b, c), a)),
@ -465,7 +451,7 @@ optimizations.extend([
# Float sizes
for s in [16, 32, 64]:
optimizations.extend([
(('~flrp@{}'.format(s), a, b, ('b2f', 'c@1')), ('bcsel', c, b, a), 'options->lower_flrp{}'.format(s)),
(('~flrp@{}'.format(s), a, b, ('b2f', 'c@1')), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a)), 'options->lower_flrp{}'.format(s)),
(('~flrp@{}'.format(s), a, ('fadd', a, b), c), ('fadd', ('fmul', b, c), a), 'options->lower_flrp{}'.format(s)),
(('~flrp@{}'.format(s), ('fadd(is_used_once)', a, b), ('fadd(is_used_once)', a, c), d), ('fadd', ('flrp', b, c, d), a), 'options->lower_flrp{}'.format(s)),
@ -478,14 +464,14 @@ for s in [16, 32, 64]:
(('~fadd@{}'.format(s), a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp{}'.format(s)),
(('~fadd@{}'.format(s), ('fmul', a, ('fadd', 1.0, ('fneg', ('b2f', 'c@1')))), ('fmul', b, ('b2f', c))), ('bcsel', c, b, a), 'options->lower_flrp{}'.format(s)),
(('~fadd@{}'.format(s), a, ('fmul', ('b2f', 'c@1'), ('fadd', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp{}'.format(s)),
(('~fadd@{}'.format(s), a, ('fmul', ('b2f', 'c@1'), ('fadd', b, ('fneg', a)))), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a)), 'options->lower_flrp{}'.format(s)),
(('~ffma@{}'.format(s), a, ('fadd', 1.0, ('fneg', ('b2f', 'c@1'))), ('fmul', b, ('b2f', 'c@1'))), ('bcsel', c, b, a)),
(('~ffma@{}'.format(s), b, ('b2f', 'c@1'), ('ffma', ('fneg', a), ('b2f', 'c@1'), a)), ('bcsel', c, b, a)),
(('~ffma@{}'.format(s), a, ('fadd', 1.0, ('fneg', ('b2f', 'c@1'))), ('fmul', b, ('b2f', 'c@1'))), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
(('~ffma@{}'.format(s), b, ('b2f', 'c@1'), ('ffma', ('fneg', a), ('b2f', 'c@1'), a)), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
# These two aren't flrp lowerings, but do appear in some shaders.
(('~ffma@{}'.format(s), ('b2f', 'c@1'), ('fadd', b, ('fneg', a)), a), ('bcsel', c, b, a)),
(('~ffma@{}'.format(s), ('b2f', 'c@1'), ('ffma', ('fneg', a), b, d), ('fmul', a, b)), ('bcsel', c, d, ('fmul', a, b))),
(('~ffma@{}'.format(s), ('b2f', 'c@1'), ('fadd', b, ('fneg', a)), a), ('bcsel', c, ('fcanonicalize', b), ('fcanonicalize', a))),
(('~ffma@{}'.format(s), ('b2f', 'c@1'), ('ffma', ('fneg', a), b, d), ('fmul', a, b)), ('bcsel', c, ('fcanonicalize', d), ('fmul', a, b))),
# 1 - ((1 - a) * (1 - b))
# 1 - (1 - a - b + a*b)
@ -952,12 +938,8 @@ optimizations.extend([
(('bcsel', a, b, a), ('iand', a, b)),
(('bcsel', a, b, True), ('ior', ('inot', a), b)),
(('bcsel', a, False, b), ('iand', ('inot', a), b)),
(('fmin', 'a@64', a), a, '!nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 64)'),
(('fmin', 'a@32', a), a, '!nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 32)'),
(('fmin', 'a@16', a), a, '!nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 16)'),
(('fmax', 'a@64', a), a, '!nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 64)'),
(('fmax', 'a@32', a), a, '!nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 32)'),
(('fmax', 'a@16', a), a, '!nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 16)'),
(('fmin', a, a), ('fcanonicalize', a)),
(('fmax', a, a), ('fcanonicalize', a)),
(('imin', a, a), a),
(('imax', a, a), a),
(('umin', a, a), a),
@ -984,10 +966,10 @@ optimizations.extend([
(('umax', ('umin', a, b), a), a),
(('imin', ('imax', a, b), a), a),
(('imax', ('imin', a, b), a), a),
(('fmax(nsz)', 'a(is_a_number_not_negative)', 'b(is_not_positive)'), ('fmul', a, 1.0)),
(('fmin(nsz)', 'a(is_a_number_not_positive)', 'b(is_not_negative)'), ('fmul', a, 1.0)),
(('fmax', 'a(is_a_number_not_negative)', 'b(is_lt_zero)'), ('fmul', a, 1.0)),
(('fmin', 'a(is_a_number_not_positive)', 'b(is_gt_zero)'), ('fmul', a, 1.0)),
(('fmax(nsz)', 'a(is_a_number_not_negative)', 'b(is_not_positive)'), ('fcanonicalize', a)),
(('fmin(nsz)', 'a(is_a_number_not_positive)', 'b(is_not_negative)'), ('fcanonicalize', a)),
(('fmax', 'a(is_a_number_not_negative)', 'b(is_lt_zero)'), ('fcanonicalize', a)),
(('fmin', 'a(is_a_number_not_positive)', 'b(is_gt_zero)'), ('fcanonicalize', a)),
(('fneg', ('fmax(is_used_once)', ('fneg', a), ('fneg', b))), ('fmin', a, b)),
(('fneg', ('fmin(is_used_once)', ('fneg', a), ('fneg', b))), ('fmax', a, b)),
(('fneg', ('fmax(is_used_once)', ('fneg', a), '#b')), ('fmin', a, ('fneg', b))),
@ -1044,9 +1026,9 @@ optimizations.extend([
(('imin', a, ('ineg', a)), ('ineg', ('iabs', a))),
(('fmin', a, ('fneg', ('fabs', a))), ('fneg', ('fabs', a))),
(('imin', a, ('ineg', ('iabs', a))), ('ineg', ('iabs', a))),
(('~fmin', a, ('fabs', a)), a),
(('fmin', a, ('fabs', a)), ('fcanonicalize', a)),
(('imin', a, ('iabs', a)), a),
(('~fmax', a, ('fneg', ('fabs', a))), a),
(('fmax', a, ('fneg', ('fabs', a))), ('fcanonicalize', a)),
(('imax', a, ('ineg', ('iabs', a))), a),
(('fmax', a, ('fabs', a)), ('fabs', a)),
(('imax', a, ('iabs', a)), ('iabs', a)),
@ -1265,7 +1247,7 @@ for s in [16, 32, 64]:
optimizations.extend([
# S = smaller, B = bigger
# floatS -> floatB -> floatS ==> identity
(('~f2f{}'.format(s), ('f2f{}'.format(B), 'a@{}'.format(s))), a),
(('f2f{}'.format(s), ('f2f{}'.format(B), 'a@{}'.format(s))), ('fcanonicalize', a)),
# floatS -> floatB -> intB ==> floatS -> intB
(('f2u{}'.format(B), ('f2f{}'.format(B), 'a@{}'.format(s))), ('f2u{}'.format(B), a)),
@ -1842,8 +1824,8 @@ optimizations.extend([
('bcsel', ('ieq', ('iand', b, 1), 0), 0, ('u2f', a))),
# Exponential/logarithmic identities
(('~fexp2', ('flog2', a)), a), # 2^lg2(a) = a
(('~flog2', ('fexp2', a)), a), # lg2(2^a) = a
(('~fexp2', ('flog2', a)), ('fcanonicalize', a)), # 2^lg2(a) = a
(('~flog2', ('fexp2', a)), ('fcanonicalize', a)), # lg2(2^a) = a
# 32-bit fpow should use fmulz to fix https://gitlab.freedesktop.org/mesa/mesa/-/issues/11464 (includes apitrace)
(('fpow@32', a, b), ('fexp2', ('fmulz', ('flog2', a), b)), 'options->lower_fpow && ' + has_fmulz), # a^b = 2^(lg2(a)*b)
(('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b)
@ -1857,12 +1839,12 @@ optimizations.extend([
(('~fexp2', ('fmul', ('flog2', a), 5.0)), ('fmul', ('fmul', ('fmul', a, a), ('fmul', a, a)), a)),
(('~fexp2', ('fmul', ('flog2', a), 6.0)), ('fmul', ('fmul', ('fmul', a, a), ('fmul', a, a)), ('fmul', a, a))),
(('~fexp2', ('fmul', ('flog2', a), 8.0)), ('fmul', ('fmul', ('fmul', a, a), ('fmul', a, a)), ('fmul', ('fmul', a, a), ('fmul', a, a)))),
(('~fpow', a, 1.0), a),
(('~fpow', a, 1.0), ('fcanonicalize', a)),
(('~fpow', a, 2.0), ('fmul', a, a)),
(('~fpow', a, 3.0), ('fmul', ('fmul', a, a), a)),
(('~fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))),
(('~fpow', 2.0, a), ('fexp2', a)),
(('~fpow', ('fpow', a, 2.2), 0.454545), a),
(('~fpow', ('fpow', a, 2.2), 0.454545), ('fcanonicalize', a)),
(('~fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)),
(('~fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
(('~frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
@ -1878,7 +1860,7 @@ optimizations.extend([
# Division and reciprocal
(('~fdiv', 1.0, a), ('frcp', a)),
(('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
(('~frcp', ('frcp', a)), a),
(('~frcp', ('frcp', a)), ('fcanonicalize', a)),
(('~frcp', ('fsqrt', a)), ('frsq', a)),
(('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'),
(('~frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'),
@ -1909,7 +1891,7 @@ optimizations.extend([
(('bcsel@64', a, -0.0, -1.0), ('fneg', ('b2f', ('inot', a))), '!(options->lower_doubles_options & nir_lower_fp64_full_software)'),
(('bcsel', a, b, b), b),
(('~fcsel', a, b, b), b),
(('fcsel', a, b, b), ('fcanonicalize', b)),
# With D3D booleans, imax is AND and umax is OR
(('imax', ('ineg', ('b2i', 'a@1')), ('ineg', ('b2i', 'b@1'))),
@ -1995,7 +1977,7 @@ optimizations.extend([
# fract(x) = x - floor(x), so fract(NaN) = NaN
(('~ffract', 'a(is_integral)'), 0.0),
(('ffract', ('ffract', a)), ('ffract', a)),
(('fabs', 'a(is_not_negative)'), a),
(('fabs', 'a(is_not_negative)'), ('fcanonicalize', a)),
(('iabs', 'a(is_not_negative)'), a),
(('fsat', 'a(is_not_positive)'), 0.0),
@ -2458,10 +2440,10 @@ optimizations.extend([
(('ifind_msb', ('extract_u16', a, b)), ('ufind_msb', ('extract_u16', a, b))),
(('ifind_msb', ('imax', a, 1)), ('ufind_msb', ('imax', a, 1))),
(('~fmul', ('bcsel(is_used_once)', c, -1.0, 1.0), b), ('bcsel', c, ('fneg', b), b)),
(('~fmul', ('bcsel(is_used_once)', c, 1.0, -1.0), b), ('bcsel', c, b, ('fneg', b))),
(('~fmulz', ('bcsel(is_used_once)', c, -1.0, 1.0), b), ('bcsel', c, ('fneg', b), b)),
(('~fmulz', ('bcsel(is_used_once)', c, 1.0, -1.0), b), ('bcsel', c, b, ('fneg', b))),
(('fmul', ('bcsel(is_used_once)', c, -1.0, 1.0), b), ('bcsel', c, ('fneg', b), ('fcanonicalize', b))),
(('fmul', ('bcsel(is_used_once)', c, 1.0, -1.0), b), ('bcsel', c, ('fcanonicalize', b), ('fneg', b))),
(('fmulz(nsz)', ('bcsel(is_used_once)', c, -1.0, 1.0), b), ('bcsel', c, ('fneg', b), ('fcanonicalize', b))),
(('fmulz(nsz)', ('bcsel(is_used_once)', c, 1.0, -1.0), b), ('bcsel', c, ('fcanonicalize', b), ('fneg', b))),
(('fabs', ('bcsel(is_used_once)', b, ('fneg', a), a)), ('fabs', a)),
(('fabs', ('bcsel(is_used_once)', b, a, ('fneg', a))), ('fabs', a)),
(('~bcsel', ('flt', a, 0.0), ('fneg', a), a), ('fabs', a)),
@ -3614,8 +3596,8 @@ before_ffma_optimizations = [
(('iadd', ('ineg', a), a), 0),
(('iadd', ('ineg', a), ('iadd', a, b)), b),
(('iadd', a, ('iadd', ('ineg', a), b)), b),
(('~fadd', ('fneg', a), ('fadd', a, b)), b),
(('~fadd', a, ('fadd', ('fneg', a), b)), b),
(('~fadd', ('fneg', a), ('fadd', a, b)), ('fcanonicalize', b)),
(('~fadd', a, ('fadd', ('fneg', a), b)), ('fcanonicalize', b)),
(('~flrp', ('fadd(is_used_once)', a, -1.0), ('fadd(is_used_once)', a, 1.0), d), ('fadd', ('flrp', -1.0, 1.0, d), a)),
(('~flrp', ('fadd(is_used_once)', a, 1.0), ('fadd(is_used_once)', a, -1.0), d), ('fadd', ('flrp', 1.0, -1.0, d), a)),
@ -3713,9 +3695,7 @@ late_optimizations = [
(('inot', ('fge(is_used_once)', a, b)), ('fltu', a, b), 'options->has_fneo_fcmpu'),
(('inot', ('fltu(is_used_once)', a, b)), ('fge', a, b)),
# nir_lower_to_source_mods will collapse this, but its existence during the
# optimization loop can prevent other optimizations.
(('fneg', ('fneg', a)), a),
(('fneg(is_only_used_as_float)', ('fneg', a)), a),
# combine imul and iadd to imad
(('iadd@32', ('imul(is_only_used_by_iadd)', a, b), c), ('imad', a, b, c), 'options->has_imad32'),
@ -4174,7 +4154,7 @@ distribute_src_mods = [
(('fdot2_replicated', ('fneg', a), ('fneg', b)), ('fdot2_replicated', a, b), 'true', TestStatus.UNSUPPORTED),
(('fdot3_replicated', ('fneg', a), ('fneg', b)), ('fdot3_replicated', a, b), 'true', TestStatus.UNSUPPORTED),
(('fdot4_replicated', ('fneg', a), ('fneg', b)), ('fdot4_replicated', a, b), 'true', TestStatus.UNSUPPORTED),
(('fneg', ('fneg', a)), a),
(('fneg(is_only_used_as_float)', ('fneg', a)), a),
(('fneg', ('fmul(is_used_once)', a, b)), ('fmul', ('fneg', a), b)),
(('fabs', ('fmul(is_used_once)', a, b)), ('fmul', ('fabs', a), ('fabs', b))),