aco/tests: add tests for post-RA DPP combining

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11924>
This commit is contained in:
Rhys Perry 2021-07-15 17:46:40 +01:00 committed by Marge Bot
parent 12be7c8feb
commit 4a7714ab7b

View file

@ -276,3 +276,121 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt)
finish_optimizer_postRA_test();
END_TEST
BEGIN_TEST(optimizer_postRA.dpp)
//>> v1: %a:v[0], v1: %b:v[1], s2: %c:vcc, s2: %d:s[0-1] = p_startpgm
if (!setup_cs("v1 v1 s2 s2", GFX10_3))
return;
bld.instructions->at(0)->definitions[0].setFixed(PhysReg(256));
bld.instructions->at(0)->definitions[1].setFixed(PhysReg(257));
bld.instructions->at(0)->definitions[2].setFixed(vcc);
bld.instructions->at(0)->definitions[3].setFixed(PhysReg(0));
PhysReg reg_v0(256);
PhysReg reg_v2(258);
Operand a(inputs[0], PhysReg(256));
Operand b(inputs[1], PhysReg(257));
Operand c(inputs[2], vcc);
Operand d(inputs[3], PhysReg(0));
/* basic optimization */
//! v1: %res0:v[2] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1
//! p_unit_test 0, %res0:v[2]
Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
Temp res0 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp0, reg_v2), b);
writeout(0, Operand(res0, reg_v2));
/* operand swapping */
//! v1: %res1:v[2] = v_subrev_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1
//! p_unit_test 1, %res1:v[2]
Temp tmp1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
Temp res1 = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp1, reg_v2));
writeout(1, Operand(res1, reg_v2));
//! v1: %tmp2:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
//! v1: %res2:v[2] = v_sub_f32 %b:v[1], %tmp2:v[2] row_half_mirror bound_ctrl:1
//! p_unit_test 2, %res2:v[2]
Temp tmp2 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
Temp res2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp2, reg_v2), dpp_row_half_mirror);
writeout(2, Operand(res2, reg_v2));
/* modifiers */
//! v1: %res3:v[2] = v_add_f32 -%a:v[0], %b:v[1] row_mirror bound_ctrl:1
//! p_unit_test 3, %res3:v[2]
auto tmp3 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
tmp3.instr->dpp().neg[0] = true;
Temp res3 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp3, reg_v2), b);
writeout(3, Operand(res3, reg_v2));
//! v1: %res4:v[2] = v_add_f32 -%a:v[0], %b:v[1] row_mirror bound_ctrl:1
//! p_unit_test 4, %res4:v[2]
Temp tmp4 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
auto res4 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp4, reg_v2), b);
res4.instr->vop3().neg[0] = true;
writeout(4, Operand(res4, reg_v2));
//! v1: %tmp5:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
//! v1: %res5:v[2] = v_add_f32 %tmp5:v[2], %b:v[1] clamp
//! p_unit_test 5, %res5:v[2]
Temp tmp5 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
auto res5 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp5, reg_v2), b);
res5.instr->vop3().clamp = true;
writeout(5, Operand(res5, reg_v2));
//! v1: %res6:v[2] = v_add_f32 |%a:v[0]|, %b:v[1] row_mirror bound_ctrl:1
//! p_unit_test 6, %res6:v[2]
auto tmp6 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
tmp6.instr->dpp().neg[0] = true;
auto res6 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp6, reg_v2), b);
res6.instr->vop3().abs[0] = true;
writeout(6, Operand(res6, reg_v2));
//! v1: %res7:v[2] = v_subrev_f32 %a:v[0], |%b:v[1]| row_mirror bound_ctrl:1
//! p_unit_test 7, %res7:v[2]
Temp tmp7 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
auto res7 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp7, reg_v2));
res7.instr->vop3().abs[0] = true;
writeout(7, Operand(res7, reg_v2));
/* vcc */
//! v1: %res8:v[2] = v_cndmask_b32 %a:v[0], %b:v[1], %c:vcc row_mirror bound_ctrl:1
//! p_unit_test 8, %res8:v[2]
Temp tmp8 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
Temp res8 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp8, reg_v2), b, c);
writeout(8, Operand(res8, reg_v2));
//! v1: %tmp9:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
//! v1: %res9:v[2] = v_cndmask_b32 %tmp9:v[2], %b:v[1], %d:s[0-1]
//! p_unit_test 9, %res9:v[2]
Temp tmp9 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
Temp res9 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp9, reg_v2), b, d);
writeout(9, Operand(res9, reg_v2));
/* control flow */
//! BB1
//! /* logical preds: / linear preds: BB0, / kind: uniform, */
//! v1: %res10:v[2] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1
//! p_unit_test 10, %res10:v[2]
Temp tmp10 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
bld.reset(program->create_and_insert_block());
program->blocks[0].linear_succs.push_back(1);
program->blocks[1].linear_preds.push_back(0);
Temp res10 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp10, reg_v2), b);
writeout(10, Operand(res10, reg_v2));
/* can't combine if the v_mov_b32's operand is modified */
//! v1: %tmp11_1:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
//! v1: %tmp11_2:v[0] = v_mov_b32 0
//! v1: %res11:v[2] = v_add_f32 %tmp11_1:v[2], %b:v[1]
//! p_unit_test 11, %res11_1:v[2], %tmp11_2:v[0]
Temp tmp11_1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
Temp tmp11_2 = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1, reg_v0), Operand::c32(0));
Temp res11 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp11_1, reg_v2), b);
writeout(11, Operand(res11, reg_v2), Operand(tmp11_2, reg_v0));
finish_optimizer_postRA_test();
END_TEST