From c677809f256292ac025dfa47ef85a57ee81d7fb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Mon, 25 Nov 2024 13:07:31 +0100 Subject: [PATCH] aco/lower_branches: allow for non-fallthrough loop exits in try_merge_break_with_continue() Totals from 211 (0.27% of 79395) affected shaders: (Navi31) Instrs: 276961 -> 276545 (-0.15%) CodeSize: 1404356 -> 1402248 (-0.15%) Latency: 1344722 -> 1344887 (+0.01%); split: -0.00%, +0.01% InvThroughput: 165624 -> 165622 (-0.00%); split: -0.00%, +0.00% Branches: 6149 -> 5987 (-2.63%) SALU: 25722 -> 25468 (-0.99%) Part-of: --- src/amd/compiler/aco_lower_branches.cpp | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/amd/compiler/aco_lower_branches.cpp b/src/amd/compiler/aco_lower_branches.cpp index 4f8ebf0f79f..8bbbb39b83f 100644 --- a/src/amd/compiler/aco_lower_branches.cpp +++ b/src/amd/compiler/aco_lower_branches.cpp @@ -177,14 +177,6 @@ try_merge_break_with_continue(branch_ctx& ctx, Block& block) if (merge.linear_succs.size() != 1) return; - /* We want to use the loopexit as the fallthrough block from merge, - * so there shouldn't be a block inbetween. - */ - for (unsigned i = merge.index + 1; i < loopexit.index; i++) { - if (!ctx.program->blocks[i].instructions.empty()) - return; - } - for (unsigned merge_pred : merge.linear_preds) { if (merge_pred == block.index) continue; @@ -214,7 +206,6 @@ try_merge_break_with_continue(branch_ctx& ctx, Block& block) return; /* Use conditional branch in merge block. */ - block.instructions.pop_back(); merge.instructions.back()->opcode = aco_opcode::s_cbranch_scc1; block.linear_succs.pop_back(); block.linear_succs[0] = merge.index; @@ -223,6 +214,18 @@ try_merge_break_with_continue(branch_ctx& ctx, Block& block) std::replace(loopexit.linear_preds.begin(), loopexit.linear_preds.end(), block.index, merge.index); + /* Check if we can use the loopexit as the fallthrough block. + * Otherwise, we'll need an extra branch instruction. + */ + for (unsigned i = merge.index + 1; i < loopexit.index; i++) { + if (!ctx.program->blocks[i].instructions.empty()) { + branch->opcode = aco_opcode::s_branch; + merge.instructions.emplace_back(std::move(block.instructions.back())); + break; + } + } + block.instructions.pop_back(); + if (ctx.program->gfx_level >= GFX9) { /* Combine s_andn2 and copy to exec to s_andn2_wrexec. */ Instruction* wr_exec =