vtn/opencl: map mad to ffma_weak and fma to ffma

Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41165>
This commit is contained in:
Karol Herbst 2026-04-20 04:14:22 +02:00 committed by Marge Bot
parent 09d50cc7a3
commit a140781aec

View file

@ -639,24 +639,11 @@ handle_special(struct vtn_builder *b, uint32_t opcode,
*
* Implemented either as a correctly rounded fma or as a multiply
* followed by an add both of which are correctly rounded
*
* So lower to fmul+fadd if we have to, but fuse to an ffma if the backend
* supports that. This can be significantly faster.
*/
bool lower =
((nb->shader->options->lower_ffma16 && srcs[0]->bit_size == 16) ||
(nb->shader->options->lower_ffma32 && srcs[0]->bit_size == 32) ||
(nb->shader->options->lower_ffma64 && srcs[0]->bit_size == 64));
const unsigned save_math_ctrl = nb->fp_math_ctrl;
nir_def *res;
nb->fp_math_ctrl |= nir_fp_exact;
if (lower)
res = nir_fmad_old(nb, srcs[0], srcs[1], srcs[2]);
else
res = nir_ffma_old(nb, srcs[0], srcs[1], srcs[2]);
nb->fp_math_ctrl |= nir_fp_no_contract | nir_fp_no_transform;
nir_def *res = nir_ffma_weak(nb, srcs[0], srcs[1], srcs[2]);
nb->fp_math_ctrl = save_math_ctrl;
return res;
}
@ -696,14 +683,13 @@ handle_special(struct vtn_builder *b, uint32_t opcode,
return nir_ldexp(nb, srcs[0], srcs[1]);
case OpenCLstd_Fma: {
/* FIXME: the software implementation only supports fp32 for now. */
if ((nb->shader->options->lower_ffma32 && srcs[0]->bit_size == 32) ||
(nb->shader->options->lower_ffma16 && srcs[0]->bit_size == 16))
if (srcs[0]->bit_size != 64 && !nir_has_ffma(nb->shader, srcs[0]->bit_size))
break;
/* OpenCL FMA is not allowed to be split. */
const bool save_math_ctrl = nb->fp_math_ctrl;
const unsigned save_math_ctrl = nb->fp_math_ctrl;
nb->fp_math_ctrl |= nir_fp_exact;
nir_def *res = nir_ffma_old(nb, srcs[0], srcs[1], srcs[2]);
nir_def *res = nir_ffma(nb, srcs[0], srcs[1], srcs[2]);
nb->fp_math_ctrl = save_math_ctrl;
return res;
}