From a140781aece24f8f314e1fd4094d5ed5d996f19b Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Mon, 20 Apr 2026 04:14:22 +0200 Subject: [PATCH] vtn/opencl: map mad to ffma_weak and fma to ffma Reviewed-by: Georg Lehmann Part-of: --- src/compiler/spirv/vtn_opencl.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/src/compiler/spirv/vtn_opencl.c b/src/compiler/spirv/vtn_opencl.c index db754eff91b..996861c58f3 100644 --- a/src/compiler/spirv/vtn_opencl.c +++ b/src/compiler/spirv/vtn_opencl.c @@ -639,24 +639,11 @@ handle_special(struct vtn_builder *b, uint32_t opcode, * * Implemented either as a correctly rounded fma or as a multiply * followed by an add both of which are correctly rounded - * - * So lower to fmul+fadd if we have to, but fuse to an ffma if the backend - * supports that. This can be significantly faster. */ - bool lower = - ((nb->shader->options->lower_ffma16 && srcs[0]->bit_size == 16) || - (nb->shader->options->lower_ffma32 && srcs[0]->bit_size == 32) || - (nb->shader->options->lower_ffma64 && srcs[0]->bit_size == 64)); const unsigned save_math_ctrl = nb->fp_math_ctrl; - nir_def *res; - - nb->fp_math_ctrl |= nir_fp_exact; - if (lower) - res = nir_fmad_old(nb, srcs[0], srcs[1], srcs[2]); - else - res = nir_ffma_old(nb, srcs[0], srcs[1], srcs[2]); - + nb->fp_math_ctrl |= nir_fp_no_contract | nir_fp_no_transform; + nir_def *res = nir_ffma_weak(nb, srcs[0], srcs[1], srcs[2]); nb->fp_math_ctrl = save_math_ctrl; return res; } @@ -696,14 +683,13 @@ handle_special(struct vtn_builder *b, uint32_t opcode, return nir_ldexp(nb, srcs[0], srcs[1]); case OpenCLstd_Fma: { /* FIXME: the software implementation only supports fp32 for now. */ - if ((nb->shader->options->lower_ffma32 && srcs[0]->bit_size == 32) || - (nb->shader->options->lower_ffma16 && srcs[0]->bit_size == 16)) + if (srcs[0]->bit_size != 64 && !nir_has_ffma(nb->shader, srcs[0]->bit_size)) break; /* OpenCL FMA is not allowed to be split. */ - const bool save_math_ctrl = nb->fp_math_ctrl; + const unsigned save_math_ctrl = nb->fp_math_ctrl; nb->fp_math_ctrl |= nir_fp_exact; - nir_def *res = nir_ffma_old(nb, srcs[0], srcs[1], srcs[2]); + nir_def *res = nir_ffma(nb, srcs[0], srcs[1], srcs[2]); nb->fp_math_ctrl = save_math_ctrl; return res; }