diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index af7dc3a3f68..8e4daaa8167 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -1642,6 +1642,13 @@ label_instruction(opt_ctx& ctx, aco_ptr& instr) ctx.info[instr->definitions[0].tempId()].set_uniform_bool( ctx.info[instr->operands[0].tempId()].instr->definitions[1].getTemp()); break; + } else if ((ctx.program->stage.num_sw_stages() > 1 || + ctx.program->stage.hw == HWStage::NGG) && + instr->pass_flags == 1) { + /* In case of merged shaders, pass_flags=1 means that all lanes are active (exec=-1), so + * s_and is unnecessary. */ + ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp()); + break; } else if (ctx.info[instr->operands[0].tempId()].is_vopc()) { Instruction* vopc_instr = ctx.info[instr->operands[0].tempId()].instr; /* Remove superfluous s_and when the VOPC instruction uses the same exec and thus