diff --git a/.pick_status.json b/.pick_status.json index 58f380d4504..def42b7c883 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -22,7 +22,7 @@ "description": "aco: don't create DPP instructions with SGPR operands", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "2e6834d4f6c9985bcaedd5ebc35ac5afc93c8f6f" }, diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 5923fea9589..156227a345c 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -318,6 +318,8 @@ can_use_DPP(const aco_ptr& instr, bool pre_ra) return false; if (instr->format == Format::VOP3) return false; + if (instr->operands.size() > 1 && !instr->operands[1].isOfType(RegType::vgpr)) + return false; } /* there are more cases but those all take 64-bit inputs */ diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp index 31a229f99e9..f33de4caf76 100644 --- a/src/amd/compiler/tests/test_optimizer.cpp +++ b/src/amd/compiler/tests/test_optimizer.cpp @@ -954,13 +954,14 @@ BEGIN_TEST(optimize.denorm_propagation) END_TEST BEGIN_TEST(optimizer.dpp) - //>> v1: %a, v1: %b, s2: %c = p_startpgm - if (!setup_cs("v1 v1 s2", GFX10_3)) + //>> v1: %a, v1: %b, s2: %c, s1: %d = p_startpgm + if (!setup_cs("v1 v1 s2 s1", GFX10_3)) return; Operand a(inputs[0]); Operand b(inputs[1]); Operand c(inputs[2]); + Operand d(inputs[3]); /* basic optimization */ //! v1: %res0 = v_add_f32 %a, %b row_mirror bound_ctrl:1 @@ -1028,6 +1029,21 @@ BEGIN_TEST(optimizer.dpp) Temp res8 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), tmp8, b, c); writeout(8, res8); + /* sgprs */ + //! v1: %tmp9 = v_mov_b32 %a row_mirror bound_ctrl:1 + //! v1: %res9 = v_add_f32 %tmp9, %d + //! p_unit_test 9, %res9 + Temp tmp9 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror); + Temp res9 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1), tmp9, d); + writeout(9, res9); + + //! v1: %tmp10 = v_mov_b32 %a row_mirror bound_ctrl:1 + //! v1: %res10 = v_add_f32 %d, %tmp10 + //! p_unit_test 10, %res10 + Temp tmp10 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror); + Temp res10 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), d, tmp10); + writeout(10, res10); + finish_opt_test(); END_TEST