aco: disallow various v_add_u32 opts if modifiers are used

Check for clamp, SDWA or DPP. The optimization isn't possible with SDWA
and DPP, so it would have been skipped anyway. Doing any of these with a
clamp modifier present would be incorrect.

No fossil-db changes.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7045>
(cherry picked from commit 966732e8ca)
This commit is contained in:
Rhys Perry 2020-10-07 11:45:30 +01:00 committed by Dylan Baker
parent 46ab4f9171
commit 004b8b105f
3 changed files with 30 additions and 2 deletions

View file

@ -220,7 +220,7 @@
"description": "aco: disallow various v_add_u32 opts if modifiers are used",
"nominated": true,
"nomination_type": 0,
"resolution": 0,
"resolution": 1,
"master_sha": null,
"because_sha": null
},

View file

@ -2848,7 +2848,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
else combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xor3_b32, "012", 1 | 2);
} else if (instr->opcode == aco_opcode::v_add_u32) {
if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) ;
else if (ctx.program->chip_class >= GFX9) {
else if (ctx.program->chip_class >= GFX9 && !instr->usesModifiers()) {
if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xad_u32, "120", 1 | 2)) ;
else if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xad_u32, "120", 1 | 2)) ;
else if (combine_three_valu_op(ctx, instr, aco_opcode::s_add_i32, aco_opcode::v_add3_u32, "012", 1 | 2)) ;

View file

@ -223,3 +223,31 @@ BEGIN_TEST(optimize.const_comparison_ordering)
finish_opt_test();
END_TEST
BEGIN_TEST(optimize.add3)
//>> v1: %a, v1: %b, v1: %c, s2: %_:exec = p_startpgm
if (!setup_cs("v1 v1 v1", GFX9))
return;
//! v1: %res0 = v_add3_u32 %a, %b, %c
//! p_unit_test 0, %res0
Builder::Result tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
writeout(0, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp));
//! v1: %tmp1 = v_add_u32 %b, %c clamp
//! v1: %res1 = v_add_u32 %a, %tmp1
//! p_unit_test 1, %res1
tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
static_cast<VOP3A_instruction *>(tmp.instr)->clamp = true;
writeout(1, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp));
//! v1: %tmp2 = v_add_u32 %b, %c
//! v1: %res2 = v_add_u32 %a, %tmp2 clamp
//! p_unit_test 2, %res2
tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp);
static_cast<VOP3A_instruction *>(tmp.instr)->clamp = true;
writeout(2, tmp);
finish_opt_test();
END_TEST