nir: add fmad_or_ffma helpers and use it in lower_double_ops

We skip emitting ffma_weak here, because otherwise we'd require a lowering loop with opt_algebraic and lower_double_ops and this way it's also cheaper. Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41165>
2026-05-23 21:38:18 +02:00 · 2026-05-04 20:56:23 +02:00 · 2026-05-04 20:56:23 +02:00 · aeea2e7c1f
commit aeea2e7c1f
parent eb817b1f63
1 changed files with 26 additions and 10 deletions
--- a/src/compiler/nir/nir_lower_double_ops.c
+++ b/src/compiler/nir/nir_lower_double_ops.c
@ -29,6 +29,22 @@
 #include <float.h>
 #include <math.h>

+static nir_def *
+nir_fmad_or_ffma(nir_builder *build, nir_def *src0, nir_def *src1,
+                 nir_def *src2)
+{
+   if (nir_prefers_fmad(build->shader, src0->bit_size))
+      return nir_fadd(build, nir_fmul(build, src0, src1), src2);
+   else
+      return nir_ffma(build, src0, src1, src2);
+}
+
+static nir_def *
+nir_fmad_or_fma_imm2(nir_builder *build, nir_def *src0, nir_def *src1, double src2)
+{
+   return nir_fmad_or_ffma(build, src0, src1, nir_imm_floatN_t(build, src2, src0->bit_size));
+}
+
 /*
 * Lowers some unsupported double operations, using only:
 *
@ -172,8 +188,8 @@ lower_rcp(nir_builder *b, nir_def *src)
    * See https://en.wikipedia.org/wiki/Division_algorithm for more details.
    */

-   ra = nir_ffma_old(b, nir_fneg(b, ra), nir_ffma_imm2(b, ra, src, -1), ra);
-   ra = nir_ffma_old(b, nir_fneg(b, ra), nir_ffma_imm2(b, ra, src, -1), ra);
+   ra = nir_fmad_or_ffma(b, nir_fneg(b, ra), nir_fmad_or_fma_imm2(b, ra, src, -1), ra);
+   ra = nir_fmad_or_ffma(b, nir_fneg(b, ra), nir_fmad_or_fma_imm2(b, ra, src, -1), ra);

   return fix_inv_result(b, ra, src, new_exp);
 }
@ -299,18 +315,18 @@ lower_sqrt_rsq(nir_builder *b, nir_def *src, bool sqrt)
   nir_def *one_half = nir_imm_double(b, 0.5);
   nir_def *h_0 = nir_fmul(b, one_half, ra);
   nir_def *g_0 = nir_fmul(b, src, ra);
-   nir_def *r_0 = nir_ffma_old(b, nir_fneg(b, h_0), g_0, one_half);
-   nir_def *h_1 = nir_ffma_old(b, h_0, r_0, h_0);
+   nir_def *r_0 = nir_fmad_or_ffma(b, nir_fneg(b, h_0), g_0, one_half);
+   nir_def *h_1 = nir_fmad_or_ffma(b, h_0, r_0, h_0);
   nir_def *res;
   if (sqrt) {
-      nir_def *g_1 = nir_ffma_old(b, g_0, r_0, g_0);
-      nir_def *r_1 = nir_ffma_old(b, nir_fneg(b, g_1), g_1, src);
-      res = nir_ffma_old(b, h_1, r_1, g_1);
+      nir_def *g_1 = nir_fmad_or_ffma(b, g_0, r_0, g_0);
+      nir_def *r_1 = nir_fmad_or_ffma(b, nir_fneg(b, g_1), g_1, src);
+      res = nir_fmad_or_ffma(b, h_1, r_1, g_1);
   } else {
      nir_def *y_1 = nir_fmul_imm(b, h_1, 2.0);
-      nir_def *r_1 = nir_ffma_old(b, nir_fneg(b, y_1), nir_fmul(b, h_1, src),
-                              one_half);
-      res = nir_ffma_old(b, y_1, r_1, y_1);
+      nir_def *r_1 = nir_fmad_or_ffma(b, nir_fneg(b, y_1), nir_fmul(b, h_1, src),
+                                         one_half);
+      res = nir_fmad_or_ffma(b, y_1, r_1, y_1);
   }

   if (sqrt) {