diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 493738a8849..71f827fd645 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -2389,16 +2389,14 @@ combine_constant_comparison_ordering(opt_ctx& ctx, aco_ptr& instr) return true; } -/* s_andn2(exec, cmp(a, b)) -> get_inverse(cmp)(a, b) */ +/* s_not(cmp(a, b)) -> get_inverse(cmp)(a, b) */ bool combine_inverse_comparison(opt_ctx& ctx, aco_ptr& instr) { - if (!instr->operands[0].isFixed() || instr->operands[0].physReg() != exec) - return false; if (ctx.uses[instr->definitions[1].tempId()]) return false; - Instruction* cmp = follow_operand(ctx, instr->operands[1]); + Instruction* cmp = follow_operand(ctx, instr->operands[0]); if (!cmp) return false; @@ -4214,7 +4212,8 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) ctx.program->gfx_level >= GFX9) { combine_salu_lshl_add(ctx, instr); } else if (instr->opcode == aco_opcode::s_not_b32 || instr->opcode == aco_opcode::s_not_b64) { - combine_salu_not_bitwise(ctx, instr); + if (!combine_salu_not_bitwise(ctx, instr)) + combine_inverse_comparison(ctx, instr); } else if (instr->opcode == aco_opcode::s_and_b32 || instr->opcode == aco_opcode::s_or_b32 || instr->opcode == aco_opcode::s_and_b64 || instr->opcode == aco_opcode::s_or_b64) { if (combine_ordering_test(ctx, instr)) { @@ -4243,10 +4242,6 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) } } } - - /* do this after combine_salu_n2() */ - if (instr->opcode == aco_opcode::s_andn2_b32 || instr->opcode == aco_opcode::s_andn2_b64) - combine_inverse_comparison(ctx, instr); } bool