diff --git a/src/amd/compiler/tests/test_optimizer_postRA.cpp b/src/amd/compiler/tests/test_optimizer_postRA.cpp index 9887ab60d2c..f0345296fc4 100644 --- a/src/amd/compiler/tests/test_optimizer_postRA.cpp +++ b/src/amd/compiler/tests/test_optimizer_postRA.cpp @@ -276,3 +276,121 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt) finish_optimizer_postRA_test(); END_TEST + +BEGIN_TEST(optimizer_postRA.dpp) + //>> v1: %a:v[0], v1: %b:v[1], s2: %c:vcc, s2: %d:s[0-1] = p_startpgm + if (!setup_cs("v1 v1 s2 s2", GFX10_3)) + return; + + bld.instructions->at(0)->definitions[0].setFixed(PhysReg(256)); + bld.instructions->at(0)->definitions[1].setFixed(PhysReg(257)); + bld.instructions->at(0)->definitions[2].setFixed(vcc); + bld.instructions->at(0)->definitions[3].setFixed(PhysReg(0)); + + PhysReg reg_v0(256); + PhysReg reg_v2(258); + Operand a(inputs[0], PhysReg(256)); + Operand b(inputs[1], PhysReg(257)); + Operand c(inputs[2], vcc); + Operand d(inputs[3], PhysReg(0)); + + /* basic optimization */ + //! v1: %res0:v[2] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 + //! p_unit_test 0, %res0:v[2] + Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); + Temp res0 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp0, reg_v2), b); + writeout(0, Operand(res0, reg_v2)); + + /* operand swapping */ + //! v1: %res1:v[2] = v_subrev_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 + //! p_unit_test 1, %res1:v[2] + Temp tmp1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); + Temp res1 = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp1, reg_v2)); + writeout(1, Operand(res1, reg_v2)); + + //! v1: %tmp2:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 + //! v1: %res2:v[2] = v_sub_f32 %b:v[1], %tmp2:v[2] row_half_mirror bound_ctrl:1 + //! p_unit_test 2, %res2:v[2] + Temp tmp2 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); + Temp res2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp2, reg_v2), dpp_row_half_mirror); + writeout(2, Operand(res2, reg_v2)); + + /* modifiers */ + //! v1: %res3:v[2] = v_add_f32 -%a:v[0], %b:v[1] row_mirror bound_ctrl:1 + //! p_unit_test 3, %res3:v[2] + auto tmp3 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); + tmp3.instr->dpp().neg[0] = true; + Temp res3 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp3, reg_v2), b); + writeout(3, Operand(res3, reg_v2)); + + //! v1: %res4:v[2] = v_add_f32 -%a:v[0], %b:v[1] row_mirror bound_ctrl:1 + //! p_unit_test 4, %res4:v[2] + Temp tmp4 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); + auto res4 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp4, reg_v2), b); + res4.instr->vop3().neg[0] = true; + writeout(4, Operand(res4, reg_v2)); + + //! v1: %tmp5:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 + //! v1: %res5:v[2] = v_add_f32 %tmp5:v[2], %b:v[1] clamp + //! p_unit_test 5, %res5:v[2] + Temp tmp5 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); + auto res5 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp5, reg_v2), b); + res5.instr->vop3().clamp = true; + writeout(5, Operand(res5, reg_v2)); + + //! v1: %res6:v[2] = v_add_f32 |%a:v[0]|, %b:v[1] row_mirror bound_ctrl:1 + //! p_unit_test 6, %res6:v[2] + auto tmp6 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); + tmp6.instr->dpp().neg[0] = true; + auto res6 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp6, reg_v2), b); + res6.instr->vop3().abs[0] = true; + writeout(6, Operand(res6, reg_v2)); + + //! v1: %res7:v[2] = v_subrev_f32 %a:v[0], |%b:v[1]| row_mirror bound_ctrl:1 + //! p_unit_test 7, %res7:v[2] + Temp tmp7 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); + auto res7 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp7, reg_v2)); + res7.instr->vop3().abs[0] = true; + writeout(7, Operand(res7, reg_v2)); + + /* vcc */ + //! v1: %res8:v[2] = v_cndmask_b32 %a:v[0], %b:v[1], %c:vcc row_mirror bound_ctrl:1 + //! p_unit_test 8, %res8:v[2] + Temp tmp8 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); + Temp res8 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp8, reg_v2), b, c); + writeout(8, Operand(res8, reg_v2)); + + //! v1: %tmp9:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 + //! v1: %res9:v[2] = v_cndmask_b32 %tmp9:v[2], %b:v[1], %d:s[0-1] + //! p_unit_test 9, %res9:v[2] + Temp tmp9 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); + Temp res9 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp9, reg_v2), b, d); + writeout(9, Operand(res9, reg_v2)); + + /* control flow */ + //! BB1 + //! /* logical preds: / linear preds: BB0, / kind: uniform, */ + //! v1: %res10:v[2] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 + //! p_unit_test 10, %res10:v[2] + Temp tmp10 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); + + bld.reset(program->create_and_insert_block()); + program->blocks[0].linear_succs.push_back(1); + program->blocks[1].linear_preds.push_back(0); + + Temp res10 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp10, reg_v2), b); + writeout(10, Operand(res10, reg_v2)); + + /* can't combine if the v_mov_b32's operand is modified */ + //! v1: %tmp11_1:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 + //! v1: %tmp11_2:v[0] = v_mov_b32 0 + //! v1: %res11:v[2] = v_add_f32 %tmp11_1:v[2], %b:v[1] + //! p_unit_test 11, %res11_1:v[2], %tmp11_2:v[0] + Temp tmp11_1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); + Temp tmp11_2 = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1, reg_v0), Operand::c32(0)); + Temp res11 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp11_1, reg_v2), b); + writeout(11, Operand(res11, reg_v2), Operand(tmp11_2, reg_v0)); + + finish_optimizer_postRA_test(); +END_TEST +