diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 2713cf5a611..e2faeee42af 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -4594,9 +4594,6 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) instr->opcode == aco_opcode::v_subrev_co_u32 || instr->opcode == aco_opcode::v_subrev_co_u32_e64) { combine_add_sub_b2i(ctx, instr, aco_opcode::v_subbrev_co_u32, 1); - } else if (instr->opcode == aco_opcode::v_lshlrev_b32 && ctx.program->gfx_level >= GFX9) { - combine_three_valu_op(ctx, instr, aco_opcode::v_add_u32, aco_opcode::v_add_lshl_u32, "120", - 2); } else if (instr->opcode == aco_opcode::s_not_b32 || instr->opcode == aco_opcode::s_not_b64) { if (!combine_salu_not_bitwise(ctx, instr)) combine_inverse_comparison(ctx, instr); @@ -4818,6 +4815,8 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) add_opt(s_lshl_b32, s_lshl2_add_u32, 0x3, "102", remove_const_cb<2>); add_opt(s_lshl_b32, s_lshl3_add_u32, 0x3, "102", remove_const_cb<3>); add_opt(s_lshl_b32, s_lshl4_add_u32, 0x3, "102", remove_const_cb<4>); + } else if (info.opcode == aco_opcode::v_lshlrev_b32 && ctx.program->gfx_level >= GFX9) { + add_opt(v_add_u32, v_add_lshl_u32, 0x2, "120", nullptr, true); } if (match_and_apply_patterns(ctx, info, patterns)) {