From aeea2e7c1f0ac93adb530485e19b6b84409a30c4 Mon Sep 17 00:00:00 2001
From: Karol Herbst <kherbst@redhat.com>
Date: Mon, 4 May 2026 20:56:23 +0200
Subject: [PATCH] nir: add fmad_or_ffma helpers and use it in lower_double_ops

We skip emitting ffma_weak here, because otherwise we'd require a lowering
loop with opt_algebraic and lower_double_ops and this way it's also
cheaper.

Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41165>
---
 src/compiler/nir/nir_lower_double_ops.c | 36 ++++++++++++++++++-------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c
index 1670f564ec6..f204476f289 100644
--- a/src/compiler/nir/nir_lower_double_ops.c
+++ b/src/compiler/nir/nir_lower_double_ops.c
@@ -29,6 +29,22 @@
 #include <float.h>
 #include <math.h>
 
+static nir_def *
+nir_fmad_or_ffma(nir_builder *build, nir_def *src0, nir_def *src1,
+                 nir_def *src2)
+{
+   if (nir_prefers_fmad(build->shader, src0->bit_size))
+      return nir_fadd(build, nir_fmul(build, src0, src1), src2);
+   else
+      return nir_ffma(build, src0, src1, src2);
+}
+
+static nir_def *
+nir_fmad_or_fma_imm2(nir_builder *build, nir_def *src0, nir_def *src1, double src2)
+{
+   return nir_fmad_or_ffma(build, src0, src1, nir_imm_floatN_t(build, src2, src0->bit_size));
+}
+
 /*
  * Lowers some unsupported double operations, using only:
  *
@@ -172,8 +188,8 @@ lower_rcp(nir_builder *b, nir_def *src)
     * See https://en.wikipedia.org/wiki/Division_algorithm for more details.
     */
 
-   ra = nir_ffma_old(b, nir_fneg(b, ra), nir_ffma_imm2(b, ra, src, -1), ra);
-   ra = nir_ffma_old(b, nir_fneg(b, ra), nir_ffma_imm2(b, ra, src, -1), ra);
+   ra = nir_fmad_or_ffma(b, nir_fneg(b, ra), nir_fmad_or_fma_imm2(b, ra, src, -1), ra);
+   ra = nir_fmad_or_ffma(b, nir_fneg(b, ra), nir_fmad_or_fma_imm2(b, ra, src, -1), ra);
 
    return fix_inv_result(b, ra, src, new_exp);
 }
@@ -299,18 +315,18 @@ lower_sqrt_rsq(nir_builder *b, nir_def *src, bool sqrt)
    nir_def *one_half = nir_imm_double(b, 0.5);
    nir_def *h_0 = nir_fmul(b, one_half, ra);
    nir_def *g_0 = nir_fmul(b, src, ra);
-   nir_def *r_0 = nir_ffma_old(b, nir_fneg(b, h_0), g_0, one_half);
-   nir_def *h_1 = nir_ffma_old(b, h_0, r_0, h_0);
+   nir_def *r_0 = nir_fmad_or_ffma(b, nir_fneg(b, h_0), g_0, one_half);
+   nir_def *h_1 = nir_fmad_or_ffma(b, h_0, r_0, h_0);
    nir_def *res;
    if (sqrt) {
-      nir_def *g_1 = nir_ffma_old(b, g_0, r_0, g_0);
-      nir_def *r_1 = nir_ffma_old(b, nir_fneg(b, g_1), g_1, src);
-      res = nir_ffma_old(b, h_1, r_1, g_1);
+      nir_def *g_1 = nir_fmad_or_ffma(b, g_0, r_0, g_0);
+      nir_def *r_1 = nir_fmad_or_ffma(b, nir_fneg(b, g_1), g_1, src);
+      res = nir_fmad_or_ffma(b, h_1, r_1, g_1);
    } else {
       nir_def *y_1 = nir_fmul_imm(b, h_1, 2.0);
-      nir_def *r_1 = nir_ffma_old(b, nir_fneg(b, y_1), nir_fmul(b, h_1, src),
-                              one_half);
-      res = nir_ffma_old(b, y_1, r_1, y_1);
+      nir_def *r_1 = nir_fmad_or_ffma(b, nir_fneg(b, y_1), nir_fmul(b, h_1, src),
+                                         one_half);
+      res = nir_fmad_or_ffma(b, y_1, r_1, y_1);
    }
 
    if (sqrt) {