aco/optimizer: create fma with s_mul_f32/f16

Foz-DB Navi48:
Totals from 14473 (17.56% of 82419) affected shaders:
MaxWaves: 397738 -> 397720 (-0.00%); split: +0.00%, -0.01%
Instrs: 22133626 -> 21984649 (-0.67%); split: -0.68%, +0.01%
CodeSize: 117440104 -> 117111440 (-0.28%); split: -0.30%, +0.02%
VGPRs: 825820 -> 825928 (+0.01%); split: -0.01%, +0.02%
SpillSGPRs: 15496 -> 15512 (+0.10%); split: -0.19%, +0.29%
Latency: 152141755 -> 152058676 (-0.05%); split: -0.07%, +0.02%
InvThroughput: 25715152 -> 25681160 (-0.13%); split: -0.14%, +0.01%
VClause: 402752 -> 400798 (-0.49%); split: -0.53%, +0.04%
SClause: 587448 -> 586772 (-0.12%); split: -0.19%, +0.07%
Copies: 1650891 -> 1661495 (+0.64%); split: -0.14%, +0.78%
Branches: 541341 -> 541334 (-0.00%); split: -0.00%, +0.00%
PreSGPRs: 748235 -> 748332 (+0.01%); split: -0.03%, +0.04%
VALU: 11754090 -> 11755396 (+0.01%); split: -0.01%, +0.02%
SALU: 3659133 -> 3536435 (-3.35%); split: -3.36%, +0.01%
VOPD: 17201 -> 17083 (-0.69%); split: +0.05%, -0.74%

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38150>
This commit is contained in:
Georg Lehmann 2024-12-12 19:35:01 +01:00 committed by Marge Bot
parent 5abc961514
commit 2c05aa34aa

View file

@ -5089,8 +5089,10 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
add_opt(v_mul_f32, v_mad_f32, 0x3, "120");
add_opt(v_mul_legacy_f32, v_mad_legacy_f32, 0x3, "120");
}
if (ctx.program->dev.has_fast_fma32)
if (ctx.program->dev.has_fast_fma32) {
add_opt(v_mul_f32, v_fma_f32, 0x3, "120", create_fma_cb);
add_opt(s_mul_f32, v_fma_f32, 0x3, "120", create_fma_cb);
}
if (ctx.program->gfx_level >= GFX10_3)
add_opt(v_mul_legacy_f32, v_fma_legacy_f32, 0x3, "120", create_fma_cb);
} else if (info.opcode == aco_opcode::v_add_f16) {
@ -5099,14 +5101,20 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
else if (ctx.program->gfx_level < GFX10 && ctx.fp_mode.denorm16_64 == 0)
add_opt(v_mul_f16, v_mad_f16, 0x3, "120");
if (ctx.program->gfx_level < GFX9)
if (ctx.program->gfx_level < GFX9) {
add_opt(v_mul_f16, v_fma_legacy_f16, 0x3, "120", create_fma_cb);
else
} else {
add_opt(v_mul_f16, v_fma_f16, 0x3, "120", create_fma_cb);
add_opt(s_mul_f16, v_fma_f16, 0x3, "120", create_fma_cb);
}
} else if (info.opcode == aco_opcode::v_add_f64) {
add_opt(v_mul_f64, v_fma_f64, 0x3, "120", create_fma_cb);
} else if (info.opcode == aco_opcode::v_add_f64_e64) {
add_opt(v_mul_f64_e64, v_fma_f64, 0x3, "120", create_fma_cb);
} else if (info.opcode == aco_opcode::s_add_f32) {
add_opt(s_mul_f32, s_fmac_f32, 0x3, "120", create_fma_cb);
} else if (info.opcode == aco_opcode::s_add_f16) {
add_opt(s_mul_f16, s_fmac_f16, 0x3, "120", create_fma_cb);
}
if (match_and_apply_patterns(ctx, info, patterns)) {