From ec18b2d28a92db28373973e6d19844cb06472740 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 2 Mar 2026 12:57:44 -0800 Subject: [PATCH] ir3: Lower ffma OpenCL requires that ffma is fused, where fmad can be unfused. Rusticl will lower CL ffma for us, but that requires us to set .lower_ffmaN. Late-opts will still re-fuse inexact fmul+fadd, since we are also setting .fuse_ffmaN. https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37807#note_3215155 proposes separate ffma and fmad instructions for the two cases, which would be a cleaner solution. Signed-off-by: Rob Clark Part-of: --- src/freedreno/ir3/ir3_compiler.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index cc3d7144b75..dac61355395 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -130,6 +130,16 @@ static const nir_shader_compiler_options ir3_base_options = { .lower_usub_borrow = true, .lower_mul_high = true, .lower_mul_2x32_64 = true, + /* ir3's mad is an unfused mul-add instruction, so we need to flag fma + * lowering so that CL can implement fused fma in software. GLSL, + * SPIRV, and NIR don't require either fused or unfused behavior from + * fma, and we'll turn mul+adds back into nir_op_ffma (again, implemented + * as unfused) during nir_opt_algebraic_late() (assuming it's not + * decorated with GLSL's precise, or SPIRV's NoContraction). + */ + .lower_ffma16 = true, + .lower_ffma32 = true, + .lower_ffma64 = true, .fuse_ffma16 = true, .fuse_ffma32 = true, .fuse_ffma64 = true,