From aeea2e7c1f0ac93adb530485e19b6b84409a30c4 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Mon, 4 May 2026 20:56:23 +0200 Subject: [PATCH] nir: add fmad_or_ffma helpers and use it in lower_double_ops We skip emitting ffma_weak here, because otherwise we'd require a lowering loop with opt_algebraic and lower_double_ops and this way it's also cheaper. Reviewed-by: Georg Lehmann Part-of: --- src/compiler/nir/nir_lower_double_ops.c | 36 ++++++++++++++++++------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c index 1670f564ec6..f204476f289 100644 --- a/src/compiler/nir/nir_lower_double_ops.c +++ b/src/compiler/nir/nir_lower_double_ops.c @@ -29,6 +29,22 @@ #include #include +static nir_def * +nir_fmad_or_ffma(nir_builder *build, nir_def *src0, nir_def *src1, + nir_def *src2) +{ + if (nir_prefers_fmad(build->shader, src0->bit_size)) + return nir_fadd(build, nir_fmul(build, src0, src1), src2); + else + return nir_ffma(build, src0, src1, src2); +} + +static nir_def * +nir_fmad_or_fma_imm2(nir_builder *build, nir_def *src0, nir_def *src1, double src2) +{ + return nir_fmad_or_ffma(build, src0, src1, nir_imm_floatN_t(build, src2, src0->bit_size)); +} + /* * Lowers some unsupported double operations, using only: * @@ -172,8 +188,8 @@ lower_rcp(nir_builder *b, nir_def *src) * See https://en.wikipedia.org/wiki/Division_algorithm for more details. */ - ra = nir_ffma_old(b, nir_fneg(b, ra), nir_ffma_imm2(b, ra, src, -1), ra); - ra = nir_ffma_old(b, nir_fneg(b, ra), nir_ffma_imm2(b, ra, src, -1), ra); + ra = nir_fmad_or_ffma(b, nir_fneg(b, ra), nir_fmad_or_fma_imm2(b, ra, src, -1), ra); + ra = nir_fmad_or_ffma(b, nir_fneg(b, ra), nir_fmad_or_fma_imm2(b, ra, src, -1), ra); return fix_inv_result(b, ra, src, new_exp); } @@ -299,18 +315,18 @@ lower_sqrt_rsq(nir_builder *b, nir_def *src, bool sqrt) nir_def *one_half = nir_imm_double(b, 0.5); nir_def *h_0 = nir_fmul(b, one_half, ra); nir_def *g_0 = nir_fmul(b, src, ra); - nir_def *r_0 = nir_ffma_old(b, nir_fneg(b, h_0), g_0, one_half); - nir_def *h_1 = nir_ffma_old(b, h_0, r_0, h_0); + nir_def *r_0 = nir_fmad_or_ffma(b, nir_fneg(b, h_0), g_0, one_half); + nir_def *h_1 = nir_fmad_or_ffma(b, h_0, r_0, h_0); nir_def *res; if (sqrt) { - nir_def *g_1 = nir_ffma_old(b, g_0, r_0, g_0); - nir_def *r_1 = nir_ffma_old(b, nir_fneg(b, g_1), g_1, src); - res = nir_ffma_old(b, h_1, r_1, g_1); + nir_def *g_1 = nir_fmad_or_ffma(b, g_0, r_0, g_0); + nir_def *r_1 = nir_fmad_or_ffma(b, nir_fneg(b, g_1), g_1, src); + res = nir_fmad_or_ffma(b, h_1, r_1, g_1); } else { nir_def *y_1 = nir_fmul_imm(b, h_1, 2.0); - nir_def *r_1 = nir_ffma_old(b, nir_fneg(b, y_1), nir_fmul(b, h_1, src), - one_half); - res = nir_ffma_old(b, y_1, r_1, y_1); + nir_def *r_1 = nir_fmad_or_ffma(b, nir_fneg(b, y_1), nir_fmul(b, h_1, src), + one_half); + res = nir_fmad_or_ffma(b, y_1, r_1, y_1); } if (sqrt) {