aco: completely skip branches if they're never taken

fossil-db (navi21):
Totals from 196 (0.25% of 79395) affected shaders:
Instrs: 101902 -> 101706 (-0.19%)
CodeSize: 576988 -> 576232 (-0.13%)
Latency: 750344 -> 750280 (-0.01%); split: -0.01%, +0.00%
InvThroughput: 119170 -> 119161 (-0.01%)
Branches: 3933 -> 3737 (-4.98%)

fossil-db (vega10):
Totals from 585 (0.93% of 63053) affected shaders:
Instrs: 346877 -> 346292 (-0.17%)
CodeSize: 1810600 -> 1808260 (-0.13%)
Latency: 1817743 -> 1814233 (-0.19%)
InvThroughput: 652142 -> 651944 (-0.03%)
Branches: 5087 -> 4502 (-11.50%)

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30321>
This commit is contained in:
Rhys Perry 2024-07-23 16:52:46 +01:00 committed by Marge Bot
parent c29d9f1184
commit 9f1a5645cf
2 changed files with 10 additions and 7 deletions

View file

@ -2824,6 +2824,11 @@ lower_to_hw_instr(Program* program)
branch->opcode == aco_opcode::p_cbranch_nz) &&
branch->operands[0].physReg() == exec);
if (branch->never_taken) {
assert(!uniform_branch);
continue;
}
/* Check if the branch instruction can be removed.
* This is beneficial when executing the next block with an empty exec mask
* is faster than the branch instruction itself.
@ -2880,18 +2885,14 @@ lower_to_hw_instr(Program* program)
num_scalar++;
}
}
} else if (inst->isEXP()) {
/* Export instructions with exec=0 can hang some GFX10+ (unclear on old GPUs). */
} else if (inst->isEXP() || inst->isSMEM() || inst->isBarrier()) {
/* Export instructions with exec=0 can hang some GFX10+ (unclear on old GPUs),
* SMEM might be an invalid access, and barriers are probably expensive. */
can_remove = false;
} else if (inst->isVMEM() || inst->isFlatLike() || inst->isDS() ||
inst->isLDSDIR()) {
// TODO: GFX6-9 can use vskip
can_remove = prefer_remove;
} else if (inst->isSMEM()) {
/* SMEM are at least as expensive as branches */
can_remove = prefer_remove && branch->never_taken;
} else if (inst->isBarrier()) {
can_remove = prefer_remove && branch->never_taken;
} else {
can_remove = false;
assert(false && "Pseudo instructions should be lowered by this point.");

View file

@ -215,6 +215,7 @@ try_remove_simple_block(ssa_elimination_ctx& ctx, Block* block)
assert(branch.target[1] == block->index);
branch.target[1] = succ.index;
branch.opcode = aco_opcode::p_branch;
branch.rarely_taken = branch.never_taken = false;
} else if (branch.target[1] == block->index) {
/* check if there is a fall-through path from block to succ */
bool falls_through = block->index < succ.index;
@ -256,6 +257,7 @@ try_remove_simple_block(ssa_elimination_ctx& ctx, Block* block)
branch.operands.pop_back();
branch.opcode = aco_opcode::p_branch;
branch.rarely_taken = branch.never_taken = false;
}
for (unsigned i = 0; i < pred.linear_succs.size(); i++)