mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 02:10:11 +01:00
aco: disallow various v_add_u32 opts if modifiers are used
Check for clamp, SDWA or DPP. The optimization isn't possible with SDWA and DPP, so it would have been skipped anyway. Doing any of these with a clamp modifier present would be incorrect. No fossil-db changes. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7045>
This commit is contained in:
parent
91ffeed88a
commit
966732e8ca
2 changed files with 29 additions and 1 deletions
|
|
@ -2907,7 +2907,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
|
|||
} else if (instr->opcode == aco_opcode::v_add_u32) {
|
||||
if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) ;
|
||||
else if (combine_add_bcnt(ctx, instr)) ;
|
||||
else if (ctx.program->chip_class >= GFX9) {
|
||||
else if (ctx.program->chip_class >= GFX9 && !instr->usesModifiers()) {
|
||||
if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xad_u32, "120", 1 | 2)) ;
|
||||
else if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xad_u32, "120", 1 | 2)) ;
|
||||
else if (combine_three_valu_op(ctx, instr, aco_opcode::s_add_i32, aco_opcode::v_add3_u32, "012", 1 | 2)) ;
|
||||
|
|
|
|||
|
|
@ -365,3 +365,31 @@ BEGIN_TEST(optimize.const_comparison_ordering)
|
|||
|
||||
finish_opt_test();
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(optimize.add3)
|
||||
//>> v1: %a, v1: %b, v1: %c, s2: %_:exec = p_startpgm
|
||||
if (!setup_cs("v1 v1 v1", GFX9))
|
||||
return;
|
||||
|
||||
//! v1: %res0 = v_add3_u32 %a, %b, %c
|
||||
//! p_unit_test 0, %res0
|
||||
Builder::Result tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
|
||||
writeout(0, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp));
|
||||
|
||||
//! v1: %tmp1 = v_add_u32 %b, %c clamp
|
||||
//! v1: %res1 = v_add_u32 %a, %tmp1
|
||||
//! p_unit_test 1, %res1
|
||||
tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
|
||||
static_cast<VOP3A_instruction *>(tmp.instr)->clamp = true;
|
||||
writeout(1, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp));
|
||||
|
||||
//! v1: %tmp2 = v_add_u32 %b, %c
|
||||
//! v1: %res2 = v_add_u32 %a, %tmp2 clamp
|
||||
//! p_unit_test 2, %res2
|
||||
tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
|
||||
tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp);
|
||||
static_cast<VOP3A_instruction *>(tmp.instr)->clamp = true;
|
||||
writeout(2, tmp);
|
||||
|
||||
finish_opt_test();
|
||||
END_TEST
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue