nir/opt_algebraic: add more fmulz pattern

Totals from 3 (0.00% of 202440) affected shaders: (Navi48)
Instrs: 5684 -> 5641 (-0.76%); split: -0.77%, +0.02%
CodeSize: 30952 -> 30708 (-0.79%); split: -0.80%, +0.01%
Latency: 9236 -> 9199 (-0.40%); split: -0.42%, +0.02%
InvThroughput: 2287 -> 2273 (-0.61%)
VALU: 3900 -> 3884 (-0.41%)
SALU: 305 -> 289 (-5.25%)

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40848>
This commit is contained in:
Georg Lehmann 2026-03-31 15:41:34 +02:00 committed by Marge Bot
parent 7372c7c9e2
commit 52b195b4e8

View file

@ -380,6 +380,13 @@ optimizations += [
# (b!=0.0 ? a : 0.0) * (a!=0.0 ? b : 0.0) -> fmulz(a, b)
*add_fabs_fneg((('fmul@32(nsz)', ('bcsel', ('fneu', b, 0.0), 'ma', 0.0), ('bcsel', ('fneu', a, 0.0), 'mb', 0.0)),
('fmulz', 'ma', 'mb'), has_fmulz), {'ma' : a, 'mb' : b}),
# b2f(a != 0.0 && b) * (b ? a : 0.0)
*add_fabs_fneg((('fmul@32(nsz)', ('b2f', ('iand', ('fneu', a, 0.0), b)), ('bcsel', b, 'ma', 0.0)),
('fmulz', 'ma', ('b2f', b)), has_fmulz), {'ma' : a}),
# b2f(!(a == 0.0 || b)) * (b ? 0.0 : a)
*add_fabs_fneg((('fmul@32(nsz)', ('b2f', ('inot', ('ior', ('feq', a, 0.0), b))), ('bcsel', b, 0.0, 'ma')),
('fmulz', 'ma', ('b2f', ('inot', b))), has_fmulz), {'ma' : a}),
# (min(abs(a), abs(b)) == 0.0 ? 0.0 : a * b) -> fmulz(a,b)
*add_fabs_fneg((('bcsel', ('feq', ('fmin', ('fabs', a), ('fabs', b)), 0.0), 0.0, ('fmul@32', 'ma', 'mb')),
@ -394,6 +401,10 @@ optimizations += [
('ffmaz', 'ma', 'mb', c), has_fmulz), {'ma' : a, 'mb' : b}),
*add_fabs_fneg((('ffma@32(nsz)', 'ma', ('bcsel', ('feq', a, 0.0), 0.0, '#b(is_not_const_zero)'), c),
('ffmaz', 'ma', b, c), has_fmulz), {'ma' : a}),
*add_fabs_fneg((('ffma@32(nsz)', ('b2f', ('iand', ('fneu', a, 0.0), b)), ('bcsel', b, 'ma', 0.0), c),
('ffmaz', 'ma', ('b2f', b), c), has_fmulz), {'ma' : a}),
*add_fabs_fneg((('ffma@32(nsz)', ('b2f', ('inot', ('ior', ('feq', a, 0.0), b))), ('bcsel', b, 0.0, 'ma'), c),
('ffmaz', 'ma', ('b2f', ('inot', b)), c), has_fmulz), {'ma' : a}),
# b == 0.0 ? 1.0 : fexp2(fmul(a, b)) -> fexp2(fmulz(a, b))
*add_fabs_fneg((('bcsel(nsz,nnan,ninf)', ('feq', b, 0.0), 1.0, ('fexp2', ('fmul@32', a, 'mb'))),
@ -797,6 +808,10 @@ optimizations.extend([
(('bcsel(is_only_used_as_float_nsz)', ('fneu', a, 0.0), a, 0.0), a),
(('bcsel(is_only_used_as_float_nsz)', ('feq', a, 0.0), -0.0, a), a),
(('bcsel(is_only_used_as_float_nsz)', ('fneu', a, 0.0), a, -0.0), a),
(('bcsel(is_only_used_as_float_nsz)', ('ior', ('feq', a, 0.0), b), 0.0, a), ('bcsel', b, 0, a)),
(('bcsel(is_only_used_as_float_nsz)', ('iand', ('fneu', a, 0.0), b), a, 0.0), ('bcsel', b, a, 0)),
(('bcsel(is_only_used_as_float_nsz)', ('ior', ('feq', a, 0.0), b), -0.0, a), ('bcsel', b, 0, a)),
(('bcsel(is_only_used_as_float_nsz)', ('iand', ('fneu', a, 0.0), b), a, -0.0), ('bcsel', b, a, 0)),
(('bcsel', ('feq', a, 0), 0, ('fsat', ('fmul', a, 'b(is_a_number)'))), ('fsat(preserve_sz)', ('fmul', a, b))),
(('bcsel', ('fneu', a, 0), ('fsat', ('fmul', a, 'b(is_a_number)')), 0), ('fsat(preserve_sz)', ('fmul', a, b))),
(('bcsel', ('feq', a, 0), 0, ('fsat', ('fmul_rtz', a, 'b(is_a_number)'))), ('fsat(preserve_sz)', ('fmul_rtz', a, b))),