From b731be2e96c4dcde9c38dff44da74a396d198fe3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Wed, 4 May 2022 13:58:39 +0200 Subject: [PATCH] aco: Remove branch instruction when exec is constant non-zero. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This mainly helps the "if (elect())" that is used in NGG culling shaders, effectively removing a useless branch from every culling shader. Totals from 58346 (45.35% of 128653) affected shaders: CodeSize: 153238668 -> 153005284 (-0.15%) Instrs: 29066198 -> 29007852 (-0.20%) Latency: 133626003 -> 133598182 (-0.02%); split: -0.02%, +0.00% InvThroughput: 20208765 -> 20208689 (-0.00%) Branches: 1190209 -> 1131863 (-4.90%) Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_ssa_elimination.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp index c91519f0f8c..3f18b09f812 100644 --- a/src/amd/compiler/aco_ssa_elimination.cpp +++ b/src/amd/compiler/aco_ssa_elimination.cpp @@ -420,6 +420,14 @@ try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const in copy->operands[0] = Operand(exec, ctx.program->lane_mask); block.instructions.insert(it, std::move(copy)); } + + if (exec_val->opcode == aco_opcode::p_parallelcopy && exec_val->operands[0].isConstant() && + exec_val->operands[0].constantValue()) { + /* Remove the branch instruction when exec is constant non-zero. */ + aco_ptr& branch = block.instructions.back(); + if (branch->isBranch() && branch->operands.size() && branch->operands[0].physReg() == exec) + block.instructions.back().reset(); + } } void