mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 00:00:11 +01:00
aco/tests: add tests for v_fma_f32 with 2 fp16 literals
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21633>
This commit is contained in:
parent
afb0289d7c
commit
736d6643bb
1 changed files with 111 additions and 0 deletions
|
|
@ -1836,3 +1836,114 @@ BEGIN_TEST(optimize.vop3p_constants)
|
|||
}
|
||||
}
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(optimize.fmamix_two_literals)
|
||||
/* This test has to recreate literals sometimes because we don't combine them at all if there's
|
||||
* at least one uncombined use.
|
||||
*/
|
||||
for (unsigned i = GFX10; i <= GFX10_3; i++) {
|
||||
//>> v1: %a, v1: %b = p_startpgm
|
||||
if (!setup_cs("v1 v1", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
Temp a = inputs[0];
|
||||
Temp b = inputs[1];
|
||||
|
||||
Temp c15 = bld.copy(bld.def(v1), Operand::c32(fui(1.5f)));
|
||||
Temp c30 = bld.copy(bld.def(v1), Operand::c32(fui(3.0f)));
|
||||
Temp c_denorm = bld.copy(bld.def(v1), Operand::c32(0x387fc000));
|
||||
|
||||
//! v1: %res0 = v_fma_mix_f32 %a, lo(0x42003e00), hi(0x42003e00)
|
||||
//! p_unit_test 0, %res0
|
||||
writeout(0, fma(a, c15, c30));
|
||||
|
||||
/* No need to use v_fma_mix_f32. */
|
||||
//! v1: %res1 = v_fmaak_f32 %a, %b, 0x40400000
|
||||
//! p_unit_test 1, %res1
|
||||
writeout(1, fma(a, b, c30));
|
||||
|
||||
/* Separate mul/add can become v_fma_mix_f32 if it's not precise. */
|
||||
//! v1: %res2 = v_fma_mix_f32 %a, lo(0x42003e00), hi(0x42003e00)
|
||||
//! p_unit_test 2, %res2
|
||||
writeout(2, fadd(fmul(a, c15), c30));
|
||||
|
||||
//~gfx10! v1: %c15 = p_parallelcopy 0x3fc00000
|
||||
c15 = bld.copy(bld.def(v1), Operand::c32(fui(1.5f)));
|
||||
c30 = bld.copy(bld.def(v1), Operand::c32(fui(3.0f)));
|
||||
|
||||
/* v_fma_mix_f32 is a fused mul/add, so it can't be used for precise separate mul/add. */
|
||||
//~gfx10! v1: (precise)%res3 = v_madak_f32 %a, %c15, 0x40400000
|
||||
//~gfx10_3! v1: (precise)%res3_tmp = v_mul_f32 %a, 0x3fc00000
|
||||
//~gfx10_3! v1: %res3 = v_add_f32 %res3_tmp, 0x40400000
|
||||
//! p_unit_test 3, %res3
|
||||
writeout(3, fadd(bld.precise().vop2(aco_opcode::v_mul_f32, bld.def(v1), a, c15), c30));
|
||||
|
||||
//~gfx10! v1: (precise)%res4 = v_madak_f32 %1, %c16, 0x40400000
|
||||
//~gfx10_3! v1: %res4_tmp = v_mul_f32 %a, 0x3fc00000
|
||||
//~gfx10_3! v1: (precise)%res4 = v_add_f32 %res4_tmp, 0x40400000
|
||||
//! p_unit_test 4, %res4
|
||||
writeout(4, bld.precise().vop2(aco_opcode::v_add_f32, bld.def(v1), fmul(a, c15), c30));
|
||||
|
||||
/* Can't convert to fp16 if it will be flushed as a denormal. */
|
||||
//! v1: %res5 = v_fma_mix_f32 %1, lo(0x3ff3e00), hi(0x3ff3e00)
|
||||
//! p_unit_test 5, %res5
|
||||
c15 = bld.copy(bld.def(v1), Operand::c32(fui(1.5f)));
|
||||
writeout(5, fma(a, c15, c_denorm));
|
||||
|
||||
//>> BB1
|
||||
//! /* logical preds: / linear preds: / kind: uniform, */
|
||||
program->next_fp_mode.denorm16_64 = fp_denorm_flush;
|
||||
bld.reset(program->create_and_insert_block());
|
||||
|
||||
//~gfx10; del c15
|
||||
//! v1: %c15 = p_parallelcopy 0x3fc00000
|
||||
//! v1: %res6 = v_fmaak_f32 %a, %c15, 0x387fc000
|
||||
//! p_unit_test 6, %res6
|
||||
c15 = bld.copy(bld.def(v1), Operand::c32(fui(1.5f)));
|
||||
c_denorm = bld.copy(bld.def(v1), Operand::c32(0x387fc000));
|
||||
writeout(6, fma(a, c15, c_denorm));
|
||||
|
||||
/* Can't accept more than 3 unique fp16 literals. */
|
||||
//! v1: %c45 = p_parallelcopy 0x40900000
|
||||
//! v1: %res7 = v_fma_mix_f32 lo(0x42003e00), hi(0x42003e00), %c45
|
||||
//! p_unit_test 7, %res7
|
||||
Temp c45 = bld.copy(bld.def(v1), Operand::c32(fui(4.5f)));
|
||||
writeout(7, fma(c15, c30, c45));
|
||||
|
||||
/* Modifiers must be preserved. */
|
||||
//! v1: %res8 = v_fma_mix_f32 -%a, lo(0x44804200), hi(0x44804200)
|
||||
//! p_unit_test 8, %res8
|
||||
writeout(8, fma(fneg(a), c30, c45));
|
||||
|
||||
//! v1: %res9 = v_fma_mix_f32 lo(0x44804200), |%a|, hi(0x44804200)
|
||||
//! p_unit_test 9, %res9
|
||||
writeout(9, fma(c30, fabs(a), c45));
|
||||
|
||||
//! v1: %res10 = v_fma_mix_f32 %a, lo(0x44804200), hi(0x44804200) clamp
|
||||
//! p_unit_test 10, %res10
|
||||
writeout(10, fsat(fma(a, c30, c45)));
|
||||
|
||||
/* Output modifiers are not supported by v_fma_mix_f32. */
|
||||
c30 = bld.copy(bld.def(v1), Operand::c32(fui(3.0f)));
|
||||
//; del c45
|
||||
//! v1: %c45 = p_parallelcopy 0x40900000
|
||||
//! v1: %res11 = v_fma_f32 %a, 0x40400000, %c45 *0.5
|
||||
//! p_unit_test 11, %res11
|
||||
c45 = bld.copy(bld.def(v1), Operand::c32(fui(4.5f)));
|
||||
writeout(11, fmul(fma(a, c30, c45), bld.copy(bld.def(v1), Operand::c32(0x3f000000))));
|
||||
|
||||
/* Has a literal which can't be represented as fp16. */
|
||||
//! v1: %c03 = p_parallelcopy 0x3e99999a
|
||||
//! v1: %res12 = v_fmaak_f32 %a, %c03, 0x40400000
|
||||
//! p_unit_test 12, %res12
|
||||
Temp c03 = bld.copy(bld.def(v1), Operand::c32(fui(0.3f)));
|
||||
writeout(12, fma(a, c03, c30));
|
||||
|
||||
/* We should still use fmaak/fmamk if the two literals are identical. */
|
||||
//! v1: %res13 = v_fmaak_f32 0x40400000, %a, 0x40400000
|
||||
//! p_unit_test 13, %res13
|
||||
writeout(13, fma(a, c30, c30));
|
||||
|
||||
finish_opt_test();
|
||||
}
|
||||
END_TEST
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue