aco: Better phi lowering for merge block when else-side is const.

Add a new special case for binary merge blocks to boolean
phi lowerig. This special case benefits shaders that
have divergent branches with an empty else block,
for example all NGG culling shaders.

Fossil DB stats on Rembrandt (NGG culling enabled):

Totals from 61778 (45.79% of 134913) affected shaders:
SpillVGPRs: 2268 -> 2284 (+0.71%); split: -1.10%, +1.81%
CodeSize: 164317952 -> 162962772 (-0.82%); split: -0.83%, +0.00%
Instrs: 31249824 -> 30910686 (-1.09%); split: -1.09%, +0.00%
Latency: 154948555 -> 154781097 (-0.11%); split: -0.12%, +0.02%
InvThroughput: 30397664 -> 30370872 (-0.09%); split: -0.13%, +0.04%
VClause: 529239 -> 529229 (-0.00%); split: -0.00%, +0.00%
SClause: 783417 -> 783430 (+0.00%)
Copies: 2627570 -> 2595161 (-1.23%); split: -1.25%, +0.02%
Branches: 976506 -> 976508 (+0.00%); split: -0.00%, +0.00%

Fossil DB stats on GFX11 (NGG culling disabled):

Totals from 895 (0.66% of 134913) affected shaders:
SpillVGPRs: 2258 -> 2322 (+2.83%); split: -0.44%, +3.28%
CodeSize: 6229152 -> 6215880 (-0.21%); split: -0.37%, +0.16%
Scratch: 216576 -> 215808 (-0.35%); split: -0.47%, +0.12%
Instrs: 1202077 -> 1198396 (-0.31%); split: -0.43%, +0.13%
Latency: 15921336 -> 16000561 (+0.50%); split: -0.74%, +1.24%
InvThroughput: 7425765 -> 7474891 (+0.66%); split: -0.67%, +1.33%
VClause: 22976 -> 23008 (+0.14%); split: -0.03%, +0.17%
SClause: 38269 -> 38271 (+0.01%)
Copies: 123244 -> 123896 (+0.53%); split: -0.30%, +0.83%
Branches: 47570 -> 47574 (+0.01%); split: -0.00%, +0.01%

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21493>
This commit is contained in:
Timur Kristóf 2023-02-21 15:04:40 +01:00 committed by Marge Bot
parent 81b4806d64
commit 836204da25

View file

@ -192,6 +192,50 @@ build_merge_code(Program* program, ssa_state* state, Block* block, Operand cur)
return;
}
void
build_const_else_merge_code(Program* program, Block& invert_block, aco_ptr<Instruction>& phi)
{
/* When the else-side operand of a binary merge phi is constant,
* we can use a simpler way to lower the phi by emitting some
* instructions to the invert block instead.
* This allows us to actually delete the else block when it's empty.
*/
assert(invert_block.kind & block_kind_invert);
Builder bld(program);
Operand then = phi->operands[0];
const Operand els = phi->operands[1];
/* Only -1 (all lanes true) and 0 (all lanes false) constants are supported here. */
assert(!then.isConstant() || then.constantEquals(0) || then.constantEquals(-1));
assert(els.constantEquals(0) || els.constantEquals(-1));
if (!then.isConstant()) {
/* Left-hand operand is not constant, so we need to emit a phi to access it. */
bld.reset(&invert_block.instructions, invert_block.instructions.begin());
then = bld.pseudo(aco_opcode::p_linear_phi, bld.def(bld.lm), then, Operand(bld.lm));
}
auto after_phis =
std::find_if(invert_block.instructions.begin(), invert_block.instructions.end(),
[](const aco_ptr<Instruction>& instr) -> bool { return !is_phi(instr.get()); });
bld.reset(&invert_block.instructions, after_phis);
Temp tmp;
if (then.constantEquals(-1) && els.constantEquals(0)) {
tmp = bld.copy(bld.def(bld.lm), Operand(exec, bld.lm));
} else {
Builder::WaveSpecificOpcode opc = els.constantEquals(0) ? Builder::s_and : Builder::s_orn2;
tmp = bld.sop2(opc, bld.def(bld.lm), bld.def(s1, scc), then, Operand(exec, bld.lm));
}
/* We can't delete the original phi because that'd invalidate the iterator in lower_phis,
* so just make it a trivial phi instead.
*/
phi->opcode = aco_opcode::p_linear_phi;
phi->operands[0] = Operand(tmp);
phi->operands[1] = Operand(tmp);
}
void
init_any_pred_defined(Program* program, ssa_state* state, Block* block, aco_ptr<Instruction>& phi)
{
@ -268,6 +312,12 @@ lower_divergent_bool_phi(Program* program, ssa_state* state, Block* block,
return;
}
if (phi->operands.size() == 2 && phi->operands[1].isConstant() &&
(block->kind & block_kind_merge)) {
build_const_else_merge_code(program, program->blocks[block->linear_idom], phi);
return;
}
/* do this here to avoid resizing in case of no boolean phis */
state->visited.resize(program->blocks.size());
state->outputs.resize(program->blocks.size());