diff --git a/.pick_status.json b/.pick_status.json index 069f82608bc..f09c079bd1f 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -2254,7 +2254,7 @@ "description": "aco/insert_exec_mask: Don't immediately set exec to zero in break/continue blocks", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp index 734385c5e47..7b3d2b26e08 100644 --- a/src/amd/compiler/aco_insert_exec_mask.cpp +++ b/src/amd/compiler/aco_insert_exec_mask.cpp @@ -300,6 +300,21 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector> } else if (preds.size() == 1) { ctx.info[idx].exec = ctx.info[preds[0]].exec; + + /* After continue and break blocks, we implicitly set exec to zero. + * This is so that parallelcopies can be inserted before the branch + * without being affected by the changed exec mask. + */ + if (ctx.info[idx].exec.back().op.constantEquals(0)) { + assert(block->logical_succs.empty()); + /* Check whether the successor block already restores exec. */ + uint16_t block_kind = ctx.program->blocks[block->linear_succs[0]].kind; + if (!(block_kind & (block_kind_loop_header | block_kind_loop_exit | block_kind_invert | + block_kind_merge))) { + /* The successor does not restore exec. */ + restore_exec = true; + } + } } else { assert(preds.size() == 2); assert(ctx.info[preds[0]].exec.size() == ctx.info[preds[1]].exec.size()); @@ -703,14 +718,8 @@ add_branch_code(exec_ctx& ctx, Block* block) break; } - /* check if the successor is the merge block, otherwise set exec to 0 */ - // TODO: this could be done better by directly branching to the merge block - unsigned succ_idx = ctx.program->blocks[block->linear_succs[1]].linear_succs[0]; - Block& succ = ctx.program->blocks[succ_idx]; - if (!(succ.kind & block_kind_invert || succ.kind & block_kind_merge)) { - bld.copy(Definition(exec, bld.lm), Operand::zero(bld.lm.bytes())); - } - + /* Implicitly set exec to zero and branch. */ + ctx.info[idx].exec.back().op = Operand::zero(bld.lm.bytes()); bld.branch(aco_opcode::p_cbranch_nz, bld.scc(cond), block->linear_succs[1], block->linear_succs[0]); } else if (block->kind & block_kind_continue) { @@ -729,14 +738,8 @@ add_branch_code(exec_ctx& ctx, Block* block) } assert(cond != Temp()); - /* check if the successor is the merge block, otherwise set exec to 0 */ - // TODO: this could be done better by directly branching to the merge block - unsigned succ_idx = ctx.program->blocks[block->linear_succs[1]].linear_succs[0]; - Block& succ = ctx.program->blocks[succ_idx]; - if (!(succ.kind & block_kind_invert || succ.kind & block_kind_merge)) { - bld.copy(Definition(exec, bld.lm), Operand::zero(bld.lm.bytes())); - } - + /* Implicitly set exec to zero and branch. */ + ctx.info[idx].exec.back().op = Operand::zero(bld.lm.bytes()); bld.branch(aco_opcode::p_cbranch_nz, bld.scc(cond), block->linear_succs[1], block->linear_succs[0]); } else {