aco/insert_exec_mask: Don't immediately set exec to zero in break/continue blocks

Instead, only indicate that exec should be zero and do
so in the successive helper block. This allows to insert
the parallelcopies from logical phis directly before the
branch in break and continue blocks.

Totals from 56 (0.07% of 79377) affected shaders: (Navi31)
Latency: 2472367 -> 2472422 (+0.00%); split: -0.00%, +0.00%
InvThroughput: 253053 -> 253055 (+0.00%); split: -0.00%, +0.00%

Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33527>
This commit is contained in:
Daniel Schürmann 2025-02-13 09:16:05 +01:00 committed by Marge Bot
parent 4975ac79ef
commit 7f7c1d463a

View file

@ -300,6 +300,21 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
} else if (preds.size() == 1) {
ctx.info[idx].exec = ctx.info[preds[0]].exec;
/* After continue and break blocks, we implicitly set exec to zero.
* This is so that parallelcopies can be inserted before the branch
* without being affected by the changed exec mask.
*/
if (ctx.info[idx].exec.back().op.constantEquals(0)) {
assert(block->logical_succs.empty());
/* Check whether the successor block already restores exec. */
uint16_t block_kind = ctx.program->blocks[block->linear_succs[0]].kind;
if (!(block_kind & (block_kind_loop_header | block_kind_loop_exit | block_kind_invert |
block_kind_merge))) {
/* The successor does not restore exec. */
restore_exec = true;
}
}
} else {
assert(preds.size() == 2);
assert(ctx.info[preds[0]].exec.size() == ctx.info[preds[1]].exec.size());
@ -703,14 +718,8 @@ add_branch_code(exec_ctx& ctx, Block* block)
break;
}
/* check if the successor is the merge block, otherwise set exec to 0 */
// TODO: this could be done better by directly branching to the merge block
unsigned succ_idx = ctx.program->blocks[block->linear_succs[1]].linear_succs[0];
Block& succ = ctx.program->blocks[succ_idx];
if (!(succ.kind & block_kind_invert || succ.kind & block_kind_merge)) {
bld.copy(Definition(exec, bld.lm), Operand::zero(bld.lm.bytes()));
}
/* Implicitly set exec to zero and branch. */
ctx.info[idx].exec.back().op = Operand::zero(bld.lm.bytes());
bld.branch(aco_opcode::p_cbranch_nz, bld.scc(cond), block->linear_succs[1],
block->linear_succs[0]);
} else if (block->kind & block_kind_continue) {
@ -729,14 +738,8 @@ add_branch_code(exec_ctx& ctx, Block* block)
}
assert(cond != Temp());
/* check if the successor is the merge block, otherwise set exec to 0 */
// TODO: this could be done better by directly branching to the merge block
unsigned succ_idx = ctx.program->blocks[block->linear_succs[1]].linear_succs[0];
Block& succ = ctx.program->blocks[succ_idx];
if (!(succ.kind & block_kind_invert || succ.kind & block_kind_merge)) {
bld.copy(Definition(exec, bld.lm), Operand::zero(bld.lm.bytes()));
}
/* Implicitly set exec to zero and branch. */
ctx.info[idx].exec.back().op = Operand::zero(bld.lm.bytes());
bld.branch(aco_opcode::p_cbranch_nz, bld.scc(cond), block->linear_succs[1],
block->linear_succs[0]);
} else {