From dd3862c1b6c2c85c9ae6e0b3df9a75b49c7a98c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Mon, 8 Jul 2024 07:56:20 -0400
Subject: [PATCH] nir/opt_algebraic: use fmulz for fpow lowering to fix
 incorrect rendering

The original implementation in all radeon drivers had this behavior.

Fixes: 9bc1fb4c07fa3e - ac/llvm,radeonsi: lower nir_fpow for aco and llvm
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11464

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30069>
---
 .pick_status.json                     | 2 +-
 src/compiler/nir/nir_opt_algebraic.py | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/.pick_status.json b/.pick_status.json
index 7fa0bef3f2a..7121ad96980 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -10314,7 +10314,7 @@
         "description": "nir/opt_algebraic: use fmulz for fpow lowering to fix incorrect rendering",
         "nominated": true,
         "nomination_type": 1,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": "9bc1fb4c07fa3ea50a71c434b5f8161bc0954bb4",
         "notes": null
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index cf94f60185a..ec148eca16f 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -292,6 +292,9 @@ optimizations = [
    (('bcsel', ignore_exact('feq', b, 0.0), 1.0, ('fexp2', ('fmul@32', a, b))),
     ('fexp2', ('fmulz', a, b)),
     has_fmulz+' && !'+signed_zero_inf_nan_preserve_32),
+   (('bcsel', ignore_exact('feq', b, 0.0), 1.0, ('fexp2', ('fmulz@32', a, b))),
+    ('fexp2', ('fmulz', a, b)),
+    '!'+signed_zero_inf_nan_preserve_32),
 ]
 
 # Shorthand for the expansion of just the dot product part of the [iu]dp4a
@@ -1460,6 +1463,8 @@ optimizations.extend([
    # Exponential/logarithmic identities
    (('~fexp2', ('flog2', a)), a), # 2^lg2(a) = a
    (('~flog2', ('fexp2', a)), a), # lg2(2^a) = a
+   # 32-bit fpow should use fmulz to fix https://gitlab.freedesktop.org/mesa/mesa/-/issues/11464 (includes apitrace)
+   (('fpow@32', a, b), ('fexp2', ('fmulz', ('flog2', a), b)), 'options->lower_fpow && ' + has_fmulz), # a^b = 2^(lg2(a)*b)
    (('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b)
    (('~fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b
    (('~fexp2', ('fadd', ('fmul', ('flog2', a), b), ('fmul', ('flog2', c), d))),