diff --git a/src/amd/compiler/tests/helpers.cpp b/src/amd/compiler/tests/helpers.cpp index 7a026cb7dd8..7334182d11f 100644 --- a/src/amd/compiler/tests/helpers.cpp +++ b/src/amd/compiler/tests/helpers.cpp @@ -242,6 +242,14 @@ finish_to_hw_instr_test() aco_print_program(program.get(), output); } +void +finish_schedule_vopd_test() +{ + finish_program(program.get()); + aco::schedule_vopd(program.get()); + aco_print_program(program.get(), output); +} + void finish_waitcnt_test() { diff --git a/src/amd/compiler/tests/helpers.h b/src/amd/compiler/tests/helpers.h index 69ae63f8de8..1d54ec40e86 100644 --- a/src/amd/compiler/tests/helpers.h +++ b/src/amd/compiler/tests/helpers.h @@ -85,6 +85,7 @@ void finish_setup_reduce_temp_test(); void finish_ra_test(aco::ra_test_policy, bool lower = false); void finish_optimizer_postRA_test(); void finish_to_hw_instr_test(); +void finish_schedule_vopd_test(); void finish_waitcnt_test(); void finish_insert_nops_test(bool endpgm = true); void finish_form_hard_clause_test(); diff --git a/src/amd/compiler/tests/meson.build b/src/amd/compiler/tests/meson.build index 7e1aaf7cb1e..b763457423d 100644 --- a/src/amd/compiler/tests/meson.build +++ b/src/amd/compiler/tests/meson.build @@ -33,6 +33,7 @@ aco_tests_files = files( 'test_reduce_assign.cpp', 'test_regalloc.cpp', 'test_optimizer_postRA.cpp', + 'test_scheduler.cpp', 'test_sdwa.cpp', 'test_to_hw_instr.cpp', 'test_tests.cpp', diff --git a/src/amd/compiler/tests/test_scheduler.cpp b/src/amd/compiler/tests/test_scheduler.cpp new file mode 100644 index 00000000000..7f7722c5e4b --- /dev/null +++ b/src/amd/compiler/tests/test_scheduler.cpp @@ -0,0 +1,149 @@ +/* + * Copyright 2024 Valve Corporation + * SPDX-License-Identifier: MIT + */ + +#include "helpers.h" + +using namespace aco; + +BEGIN_TEST(vopd_sched.commutative) + if (!setup_cs(NULL, GFX11, CHIP_UNKNOWN, "", 32)) + return; + + PhysReg reg_v0{256}; + PhysReg reg_v1{257}; + PhysReg reg_v2{258}; + PhysReg reg_v3{259}; + + //>> p_unit_test 0 + //! v1: %0:v[1] = v_dual_add_f32 %0:v[3], %0:v[2] :: v1: %0:v[0] = v_dual_mul_f32 %0:v[2], %0:v[3] + bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); + bld.vop2(aco_opcode::v_mul_f32, Definition(reg_v0, v1), Operand(reg_v2, v1), + Operand(reg_v3, v1)); + bld.vop2(aco_opcode::v_add_f32, Definition(reg_v1, v1), Operand(reg_v2, v1), + Operand(reg_v3, v1)); + + /* Neither of these opcodes are commutative. */ + bld.reset(program->create_and_insert_block()); + //>> p_unit_test 1 + //! v1: %0:v[0] = v_fmamk_f32 %0:v[2], %0:v[3], 0 + //! v1: %0:v[1] = v_lshlrev_b32 %0:v[2], %0:v[3] + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); + bld.vop2(aco_opcode::v_fmamk_f32, Definition(reg_v0, v1), Operand(reg_v2, v1), + Operand(reg_v3, v1), Operand::zero()); + bld.vop2(aco_opcode::v_lshlrev_b32, Definition(reg_v1, v1), Operand(reg_v2, v1), + Operand(reg_v3, v1)); + + /* We have to change the opcode for subtractions. */ + bld.reset(program->create_and_insert_block()); + //>> p_unit_test 2 + //! v1: %0:v[1] = v_dual_subrev_f32 %0:v[3], %0:v[2] :: v1: %0:v[0] = v_dual_fmamk_f32 %0:v[2], %0:v[3], 0 + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); + bld.vop2(aco_opcode::v_fmamk_f32, Definition(reg_v0, v1), Operand(reg_v2, v1), + Operand(reg_v3, v1), Operand::zero()); + bld.vop2(aco_opcode::v_sub_f32, Definition(reg_v1, v1), Operand(reg_v2, v1), + Operand(reg_v3, v1)); + + bld.reset(program->create_and_insert_block()); + //>> p_unit_test 3 + //! v1: %0:v[0] = v_dual_fmamk_f32 %0:v[2], %0:v[3], 0 :: v1: %0:v[1] = v_dual_sub_f32 %0:v[3], %0:v[2] + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3)); + bld.vop2(aco_opcode::v_subrev_f32, Definition(reg_v1, v1), Operand(reg_v2, v1), + Operand(reg_v3, v1)); + bld.vop2(aco_opcode::v_fmamk_f32, Definition(reg_v0, v1), Operand(reg_v2, v1), + Operand(reg_v3, v1), Operand::zero()); + + /* If we have to move the second instruction into OPY instead of OPX, then swapping must still be + * correct. */ + bld.reset(program->create_and_insert_block()); + //>> p_unit_test 4 + //! v1: %0:v[0] = v_dual_mul_f32 %0:v[3], %0:v[2] :: v1: %0:v[1] = v_dual_lshlrev_b32 %0:v[2], %0:v[3] + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4)); + bld.vop2(aco_opcode::v_mul_f32, Definition(reg_v0, v1), Operand(reg_v2, v1), + Operand(reg_v3, v1)); + bld.vop2(aco_opcode::v_lshlrev_b32, Definition(reg_v1, v1), Operand(reg_v2, v1), + Operand(reg_v3, v1)); + + bld.reset(program->create_and_insert_block()); + //>> p_unit_test 5 + //! v1: %0:v[0] = v_dual_mul_f32 %0:v[3], %0:v[2] :: v1: %0:v[1] = v_dual_lshlrev_b32 %0:v[2], %0:v[3] + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5)); + bld.vop2(aco_opcode::v_lshlrev_b32, Definition(reg_v1, v1), Operand(reg_v2, v1), + Operand(reg_v3, v1)); + bld.vop2(aco_opcode::v_mul_f32, Definition(reg_v0, v1), Operand(reg_v2, v1), + Operand(reg_v3, v1)); + + finish_schedule_vopd_test(); +END_TEST + +BEGIN_TEST(vopd_sched.mov_to_add) + if (!setup_cs(NULL, GFX11, CHIP_UNKNOWN, "", 32)) + return; + + PhysReg reg_v0{256}; + PhysReg reg_v1{257}; + PhysReg reg_v2{258}; + PhysReg reg_v3{259}; + + //>> p_unit_test 0 + //! v1: %0:v[1] = v_dual_mov_b32 %0:v[2] :: v1: %0:v[0] = v_dual_add_nc_u32 0, %0:v[2] + bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); + bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v0, v1), Operand(reg_v2, v1)); + bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v1, v1), Operand(reg_v2, v1)); + + /* We can't turn the v_mov_b32 into a v_add_u32 because then both instructions would be OPY-only. + */ + bld.reset(program->create_and_insert_block()); + //>> p_unit_test 1 + //! v1: %0:v[0] = v_mov_b32 %0:v[2] + //! v1: %0:v[1] = v_lshlrev_b32 %0:v[2], %0:v[3] + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); + bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v0, v1), Operand(reg_v2, v1)); + bld.vop2(aco_opcode::v_lshlrev_b32, Definition(reg_v1, v1), Operand(reg_v2, v1), + Operand(reg_v3, v1)); + + bld.reset(program->create_and_insert_block()); + //>> p_unit_test 2 + //! v1: %0:v[1] = v_lshlrev_b32 %0:v[2], %0:v[3] + //! v1: %0:v[0] = v_mov_b32 %0:v[2] + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); + bld.vop2(aco_opcode::v_lshlrev_b32, Definition(reg_v1, v1), Operand(reg_v2, v1), + Operand(reg_v3, v1)); + bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v0, v1), Operand(reg_v2, v1)); + + bld.reset(program->create_and_insert_block()); + //>> p_unit_test 3 + //! v1: %0:v[0] = v_dual_mov_b32 %0:v[2] :: v1: %0:v[1] = v_dual_and_b32 %0:v[3], %0:v[2] + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3)); + bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v0, v1), Operand(reg_v2, v1)); + bld.vop2(aco_opcode::v_and_b32, Definition(reg_v1, v1), Operand(reg_v2, v1), + Operand(reg_v3, v1)); + + bld.reset(program->create_and_insert_block()); + //>> p_unit_test 4 + //! v1: %0:v[0] = v_dual_mov_b32 %0:v[2] :: v1: %0:v[1] = v_dual_and_b32 %0:v[3], %0:v[2] + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4)); + bld.vop2(aco_opcode::v_and_b32, Definition(reg_v1, v1), Operand(reg_v2, v1), + Operand(reg_v3, v1)); + bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v0, v1), Operand(reg_v2, v1)); + + /* The v_add_u32 should be OPY, not OPX. */ + bld.reset(program->create_and_insert_block()); + //>> p_unit_test 5 + //! v1: %0:v[1] = v_dual_fmamk_f32 %0:v[2], %0:v[3], 0 :: v1: %0:v[0] = v_dual_add_nc_u32 0, %0:v[2] + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5)); + bld.vop2(aco_opcode::v_fmamk_f32, Definition(reg_v1, v1), Operand(reg_v2, v1), + Operand(reg_v3, v1), Operand::zero()); + bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v0, v1), Operand(reg_v2, v1)); + + bld.reset(program->create_and_insert_block()); + //>> p_unit_test 6 + //! v1: %0:v[1] = v_dual_fmamk_f32 %0:v[2], %0:v[3], 0 :: v1: %0:v[0] = v_dual_add_nc_u32 0, %0:v[2] + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6)); + bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v0, v1), Operand(reg_v2, v1)); + bld.vop2(aco_opcode::v_fmamk_f32, Definition(reg_v1, v1), Operand(reg_v2, v1), + Operand(reg_v3, v1), Operand::zero()); + + finish_schedule_vopd_test(); +END_TEST