aco/lower_branches: allow for non-fallthrough loop exits in try_merge_break_with_continue()

Totals from 211 (0.27% of 79395) affected shaders: (Navi31)

Instrs: 276961 -> 276545 (-0.15%)
CodeSize: 1404356 -> 1402248 (-0.15%)
Latency: 1344722 -> 1344887 (+0.01%); split: -0.00%, +0.01%
InvThroughput: 165624 -> 165622 (-0.00%); split: -0.00%, +0.00%
Branches: 6149 -> 5987 (-2.63%)
SALU: 25722 -> 25468 (-0.99%)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32477>
This commit is contained in:
Daniel Schürmann 2024-11-25 13:07:31 +01:00 committed by Marge Bot
parent 12656ea5f5
commit c677809f25

View file

@ -177,14 +177,6 @@ try_merge_break_with_continue(branch_ctx& ctx, Block& block)
if (merge.linear_succs.size() != 1)
return;
/* We want to use the loopexit as the fallthrough block from merge,
* so there shouldn't be a block inbetween.
*/
for (unsigned i = merge.index + 1; i < loopexit.index; i++) {
if (!ctx.program->blocks[i].instructions.empty())
return;
}
for (unsigned merge_pred : merge.linear_preds) {
if (merge_pred == block.index)
continue;
@ -214,7 +206,6 @@ try_merge_break_with_continue(branch_ctx& ctx, Block& block)
return;
/* Use conditional branch in merge block. */
block.instructions.pop_back();
merge.instructions.back()->opcode = aco_opcode::s_cbranch_scc1;
block.linear_succs.pop_back();
block.linear_succs[0] = merge.index;
@ -223,6 +214,18 @@ try_merge_break_with_continue(branch_ctx& ctx, Block& block)
std::replace(loopexit.linear_preds.begin(), loopexit.linear_preds.end(), block.index,
merge.index);
/* Check if we can use the loopexit as the fallthrough block.
* Otherwise, we'll need an extra branch instruction.
*/
for (unsigned i = merge.index + 1; i < loopexit.index; i++) {
if (!ctx.program->blocks[i].instructions.empty()) {
branch->opcode = aco_opcode::s_branch;
merge.instructions.emplace_back(std::move(block.instructions.back()));
break;
}
}
block.instructions.pop_back();
if (ctx.program->gfx_level >= GFX9) {
/* Combine s_andn2 and copy to exec to s_andn2_wrexec. */
Instruction* wr_exec =