aco/optimizer: never unfuse fma

This shouldn't change anything in practice, and reducing precision if precise isn't set is weird. Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38183>
2026-01-03 22:20:09 +01:00 · 2025-10-31 13:12:34 +01:00 · 2025-10-31 13:12:34 +01:00 · 22dc06798b
commit 22dc06798b
parent 6610905b43
2 changed files with 14 additions and 7 deletions
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@ -880,8 +880,7 @@ alu_opt_info_is_valid(opt_ctx& ctx, alu_opt_info& info)
         info.operands[2].neg[0] = true;
         break;
      case aco_opcode::v_fma_f32:
-         // TODO remove precise, not clear why unfusing fma would be valid
-         if (!ctx.program->dev.fused_mad_mix && info.defs[0].isPrecise())
+         if (!ctx.program->dev.fused_mad_mix)
            return false;
         break;
      case aco_opcode::v_mad_f32:
@ -4370,7 +4369,7 @@ can_use_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
   case aco_opcode::v_subrev_f32:
   case aco_opcode::v_mul_f32: return !instr->isSDWA() && !instr->isDPP();
   case aco_opcode::v_fma_f32:
-      return ctx.program->dev.fused_mad_mix || !instr->definitions[0].isPrecise();
+      return ctx.program->dev.fused_mad_mix;
   case aco_opcode::v_fma_mix_f32:
   case aco_opcode::v_fma_mixlo_f16: return true;
   default: return false;
--- a/src/amd/compiler/tests/test_optimizer.cpp
+++ b/src/amd/compiler/tests/test_optimizer.cpp
@ -1050,9 +1050,11 @@ BEGIN_TEST(optimize.mad_mix.input_conv.basic)

      //! v1: %res3 = v_fma_mix_f32 %a, %a, lo(%a16)
      //! p_unit_test 3, %res3
-      writeout(3, fma(a, a, f2f32(a16)));
+      writeout(3, fadd(fmul(a, a), f2f32(a16)));

-      //! v1: %res4 = v_fma_mix_f32 %a, %a, lo(%a16)
+      //~gfx9! v1: %tmp4 = v_cvt_f32_f16 %a16
+      //~gfx9! v1: %res4 = v_fma_f32 %a, %a, %tmp4
+      //~gfx10! v1: %res4 = v_fma_mix_f32 %a, %a, lo(%a16)
      //! p_unit_test 4, %res4
      writeout(4, fma(a, a, f2f32(a16)));

@ -1239,7 +1241,9 @@ BEGIN_TEST(optimize.mad_mix.output_conv.basic)
      //! p_unit_test 1, %res1
      writeout(1, f2f16(fadd(a, b)));

-      //! v2b: %res2 = v_fma_mixlo_f16 %a, %b, %c
+      //~gfx9! v1: %tmp2 = v_fma_f32 %a, %b, %c
+      //~gfx9! v2b: %res2 = v_cvt_f16_f32 %tmp2
+      //~gfx10! v2b: %res2 = v_fma_mixlo_f16 %a, %b, %c
      //! p_unit_test 2, %res2
      writeout(2, f2f16(fma(a, b, c)));

@ -1253,7 +1257,11 @@ BEGIN_TEST(optimize.mad_mix.output_conv.basic)

      //! v2b: %res5 = v_fma_mixlo_f16 %a, lo(%b16), %c
      //! p_unit_test 5, %res5
-      writeout(5, f2f16(fma(a, f2f32(b16), c)));
+      writeout(5, f2f16(fadd(fmul(a, f2f32(b16)), c)));
+
+      //! v2b: %res6 = v_fma_mixlo_f16 %a, %b, %c
+      //! p_unit_test 6, %res6
+      writeout(6, f2f16(fadd(fmul(a, b), c)));

      finish_opt_test();
   }