aco/optimizer: never unfuse fma
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

This shouldn't change anything in practice, and reducing precision
if precise isn't set is weird.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38183>
This commit is contained in:
Georg Lehmann 2025-10-31 13:12:34 +01:00 committed by Marge Bot
parent 6610905b43
commit 22dc06798b
2 changed files with 14 additions and 7 deletions

View file

@ -880,8 +880,7 @@ alu_opt_info_is_valid(opt_ctx& ctx, alu_opt_info& info)
info.operands[2].neg[0] = true;
break;
case aco_opcode::v_fma_f32:
// TODO remove precise, not clear why unfusing fma would be valid
if (!ctx.program->dev.fused_mad_mix && info.defs[0].isPrecise())
if (!ctx.program->dev.fused_mad_mix)
return false;
break;
case aco_opcode::v_mad_f32:
@ -4370,7 +4369,7 @@ can_use_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
case aco_opcode::v_subrev_f32:
case aco_opcode::v_mul_f32: return !instr->isSDWA() && !instr->isDPP();
case aco_opcode::v_fma_f32:
return ctx.program->dev.fused_mad_mix || !instr->definitions[0].isPrecise();
return ctx.program->dev.fused_mad_mix;
case aco_opcode::v_fma_mix_f32:
case aco_opcode::v_fma_mixlo_f16: return true;
default: return false;

View file

@ -1050,9 +1050,11 @@ BEGIN_TEST(optimize.mad_mix.input_conv.basic)
//! v1: %res3 = v_fma_mix_f32 %a, %a, lo(%a16)
//! p_unit_test 3, %res3
writeout(3, fma(a, a, f2f32(a16)));
writeout(3, fadd(fmul(a, a), f2f32(a16)));
//! v1: %res4 = v_fma_mix_f32 %a, %a, lo(%a16)
//~gfx9! v1: %tmp4 = v_cvt_f32_f16 %a16
//~gfx9! v1: %res4 = v_fma_f32 %a, %a, %tmp4
//~gfx10! v1: %res4 = v_fma_mix_f32 %a, %a, lo(%a16)
//! p_unit_test 4, %res4
writeout(4, fma(a, a, f2f32(a16)));
@ -1239,7 +1241,9 @@ BEGIN_TEST(optimize.mad_mix.output_conv.basic)
//! p_unit_test 1, %res1
writeout(1, f2f16(fadd(a, b)));
//! v2b: %res2 = v_fma_mixlo_f16 %a, %b, %c
//~gfx9! v1: %tmp2 = v_fma_f32 %a, %b, %c
//~gfx9! v2b: %res2 = v_cvt_f16_f32 %tmp2
//~gfx10! v2b: %res2 = v_fma_mixlo_f16 %a, %b, %c
//! p_unit_test 2, %res2
writeout(2, f2f16(fma(a, b, c)));
@ -1253,7 +1257,11 @@ BEGIN_TEST(optimize.mad_mix.output_conv.basic)
//! v2b: %res5 = v_fma_mixlo_f16 %a, lo(%b16), %c
//! p_unit_test 5, %res5
writeout(5, f2f16(fma(a, f2f32(b16), c)));
writeout(5, f2f16(fadd(fmul(a, f2f32(b16)), c)));
//! v2b: %res6 = v_fma_mixlo_f16 %a, %b, %c
//! p_unit_test 6, %res6
writeout(6, f2f16(fadd(fmul(a, b), c)));
finish_opt_test();
}