aco/insert_exec_mask: Don't immediately set exec to zero in break/continue blocks

Instead, only indicate that exec should be zero and do
so in the successive helper block. This allows to insert
the parallelcopies from logical phis directly before the
branch in break and continue blocks.

Totals from 56 (0.07% of 79377) affected shaders: (Navi31)
Latency: 2472367 -> 2472422 (+0.00%); split: -0.00%, +0.00%
InvThroughput: 253053 -> 253055 (+0.00%); split: -0.00%, +0.00%

Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33527>
(cherry picked from commit 7f7c1d463a)
This commit is contained in:
Daniel Schürmann 2025-02-13 09:16:05 +01:00 committed by Eric Engestrom
parent 33a7ae1f0a
commit 1bb39be75e
2 changed files with 20 additions and 17 deletions

View file

@ -2254,7 +2254,7 @@
"description": "aco/insert_exec_mask: Don't immediately set exec to zero in break/continue blocks",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": null,
"notes": null

View file

@ -300,6 +300,21 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
} else if (preds.size() == 1) {
ctx.info[idx].exec = ctx.info[preds[0]].exec;
/* After continue and break blocks, we implicitly set exec to zero.
* This is so that parallelcopies can be inserted before the branch
* without being affected by the changed exec mask.
*/
if (ctx.info[idx].exec.back().op.constantEquals(0)) {
assert(block->logical_succs.empty());
/* Check whether the successor block already restores exec. */
uint16_t block_kind = ctx.program->blocks[block->linear_succs[0]].kind;
if (!(block_kind & (block_kind_loop_header | block_kind_loop_exit | block_kind_invert |
block_kind_merge))) {
/* The successor does not restore exec. */
restore_exec = true;
}
}
} else {
assert(preds.size() == 2);
assert(ctx.info[preds[0]].exec.size() == ctx.info[preds[1]].exec.size());
@ -703,14 +718,8 @@ add_branch_code(exec_ctx& ctx, Block* block)
break;
}
/* check if the successor is the merge block, otherwise set exec to 0 */
// TODO: this could be done better by directly branching to the merge block
unsigned succ_idx = ctx.program->blocks[block->linear_succs[1]].linear_succs[0];
Block& succ = ctx.program->blocks[succ_idx];
if (!(succ.kind & block_kind_invert || succ.kind & block_kind_merge)) {
bld.copy(Definition(exec, bld.lm), Operand::zero(bld.lm.bytes()));
}
/* Implicitly set exec to zero and branch. */
ctx.info[idx].exec.back().op = Operand::zero(bld.lm.bytes());
bld.branch(aco_opcode::p_cbranch_nz, bld.scc(cond), block->linear_succs[1],
block->linear_succs[0]);
} else if (block->kind & block_kind_continue) {
@ -729,14 +738,8 @@ add_branch_code(exec_ctx& ctx, Block* block)
}
assert(cond != Temp());
/* check if the successor is the merge block, otherwise set exec to 0 */
// TODO: this could be done better by directly branching to the merge block
unsigned succ_idx = ctx.program->blocks[block->linear_succs[1]].linear_succs[0];
Block& succ = ctx.program->blocks[succ_idx];
if (!(succ.kind & block_kind_invert || succ.kind & block_kind_merge)) {
bld.copy(Definition(exec, bld.lm), Operand::zero(bld.lm.bytes()));
}
/* Implicitly set exec to zero and branch. */
ctx.info[idx].exec.back().op = Operand::zero(bld.lm.bytes());
bld.branch(aco_opcode::p_cbranch_nz, bld.scc(cond), block->linear_succs[1],
block->linear_succs[0]);
} else {