From 22dc06798b1d69e2588b755d48a21c63e17d55c2 Mon Sep 17 00:00:00 2001
From: Georg Lehmann <dadschoorse@gmail.com>
Date: Fri, 31 Oct 2025 13:12:34 +0100
Subject: [PATCH] aco/optimizer: never unfuse fma
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This shouldn't change anything in practice, and reducing precision
if precise isn't set is weird.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38183>
---
 src/amd/compiler/aco_optimizer.cpp        |  5 ++---
 src/amd/compiler/tests/test_optimizer.cpp | 16 ++++++++++++----
 2 files changed, 14 insertions(+), 7 deletions(-)
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index faa772317fe..73724495e14 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -880,8 +880,7 @@ alu_opt_info_is_valid(opt_ctx& ctx, alu_opt_info& info)
          info.operands[2].neg[0] = true;
          break;
       case aco_opcode::v_fma_f32:
-         // TODO remove precise, not clear why unfusing fma would be valid
-         if (!ctx.program->dev.fused_mad_mix && info.defs[0].isPrecise())
+         if (!ctx.program->dev.fused_mad_mix)
             return false;
          break;
       case aco_opcode::v_mad_f32:
@@ -4370,7 +4369,7 @@ can_use_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
    case aco_opcode::v_subrev_f32:
    case aco_opcode::v_mul_f32: return !instr->isSDWA() && !instr->isDPP();
    case aco_opcode::v_fma_f32:
-      return ctx.program->dev.fused_mad_mix || !instr->definitions[0].isPrecise();
+      return ctx.program->dev.fused_mad_mix;
    case aco_opcode::v_fma_mix_f32:
    case aco_opcode::v_fma_mixlo_f16: return true;
    default: return false;
diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp
index 04ba4512680..93b95684361 100644
--- a/src/amd/compiler/tests/test_optimizer.cpp
+++ b/src/amd/compiler/tests/test_optimizer.cpp
@@ -1050,9 +1050,11 @@ BEGIN_TEST(optimize.mad_mix.input_conv.basic)
 
       //! v1: %res3 = v_fma_mix_f32 %a, %a, lo(%a16)
       //! p_unit_test 3, %res3
-      writeout(3, fma(a, a, f2f32(a16)));
+      writeout(3, fadd(fmul(a, a), f2f32(a16)));
 
-      //! v1: %res4 = v_fma_mix_f32 %a, %a, lo(%a16)
+      //~gfx9! v1: %tmp4 = v_cvt_f32_f16 %a16
+      //~gfx9! v1: %res4 = v_fma_f32 %a, %a, %tmp4
+      //~gfx10! v1: %res4 = v_fma_mix_f32 %a, %a, lo(%a16)
       //! p_unit_test 4, %res4
       writeout(4, fma(a, a, f2f32(a16)));
 
@@ -1239,7 +1241,9 @@ BEGIN_TEST(optimize.mad_mix.output_conv.basic)
       //! p_unit_test 1, %res1
       writeout(1, f2f16(fadd(a, b)));
 
-      //! v2b: %res2 = v_fma_mixlo_f16 %a, %b, %c
+      //~gfx9! v1: %tmp2 = v_fma_f32 %a, %b, %c
+      //~gfx9! v2b: %res2 = v_cvt_f16_f32 %tmp2
+      //~gfx10! v2b: %res2 = v_fma_mixlo_f16 %a, %b, %c
       //! p_unit_test 2, %res2
       writeout(2, f2f16(fma(a, b, c)));
 
@@ -1253,7 +1257,11 @@ BEGIN_TEST(optimize.mad_mix.output_conv.basic)
 
       //! v2b: %res5 = v_fma_mixlo_f16 %a, lo(%b16), %c
       //! p_unit_test 5, %res5
-      writeout(5, f2f16(fma(a, f2f32(b16), c)));
+      writeout(5, f2f16(fadd(fmul(a, f2f32(b16)), c)));
+
+      //! v2b: %res6 = v_fma_mixlo_f16 %a, %b, %c
+      //! p_unit_test 6, %res6
+      writeout(6, f2f16(fadd(fmul(a, b), c)));
 
       finish_opt_test();
    }