From ecfefe823e033b9c563d280fae125e0a9c14faae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 8 Jul 2024 07:56:20 -0400 Subject: [PATCH] nir/opt_algebraic: use fmulz for fpow lowering to fix incorrect rendering The original implementation in all radeon drivers had this behavior. Fixes: 9bc1fb4c07fa3e - ac/llvm,radeonsi: lower nir_fpow for aco and llvm Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11464 Acked-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Faith Ekstrand Part-of: --- src/compiler/nir/nir_opt_algebraic.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 2c09a11c210..c721f38dcf0 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -340,6 +340,8 @@ optimizations = [ *add_fabs_fneg((('bcsel(nsz,nnan,ninf)', ignore_exact('feq', b, 0.0), 1.0, ('fexp2', ('fmul@32', a, 'mb'))), ('fexp2', ('fmulz', a, 'mb')), has_fmulz), {'mb': b}), + *add_fabs_fneg((('bcsel', ignore_exact('feq', b, 0.0), 1.0, ('fexp2', ('fmulz', a, 'mb'))), + ('fexp2', ('fmulz', a, 'mb'))), {'mb': b}), ] # Shorthand for the expansion of just the dot product part of the [iu]dp4a @@ -1518,6 +1520,8 @@ optimizations.extend([ # Exponential/logarithmic identities (('~fexp2', ('flog2', a)), a), # 2^lg2(a) = a (('~flog2', ('fexp2', a)), a), # lg2(2^a) = a + # 32-bit fpow should use fmulz to fix https://gitlab.freedesktop.org/mesa/mesa/-/issues/11464 (includes apitrace) + (('fpow@32', a, b), ('fexp2', ('fmulz', ('flog2', a), b)), 'options->lower_fpow && ' + has_fmulz), # a^b = 2^(lg2(a)*b) (('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b) (('~fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b (('~fexp2', ('fadd', ('fmul', ('flog2', a), b), ('fmul', ('flog2', c), d))),