mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 18:00:13 +01:00
aco: optimize boolean phis with uniform selections
Even though the boolean can be divergent, the control flow can be (at
least partially) uniform. For example, we don't have to create any
s_andn2_b64/s_and_b64/s_or_b64 instructions with this code:
a = ...
loop {
b = bool_phi a, c
if (uniform)
break
c = ...
}
d = phi c
fossil-db (Navi):
Totals from 5506 (4.05% of 135946) affected shaders:
SGPRs: 605720 -> 604024 (-0.28%)
SpillSGPRs: 52025 -> 51733 (-0.56%)
CodeSize: 65221188 -> 64957808 (-0.40%); split: -0.41%, +0.00%
Instrs: 12637881 -> 12584610 (-0.42%); split: -0.42%, +0.00%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3388>
This commit is contained in:
parent
f622e80494
commit
9a089baff1
1 changed files with 27 additions and 1 deletions
|
|
@ -35,6 +35,9 @@
|
||||||
namespace aco {
|
namespace aco {
|
||||||
|
|
||||||
struct ssa_state {
|
struct ssa_state {
|
||||||
|
bool checked_preds_for_uniform;
|
||||||
|
bool all_preds_uniform;
|
||||||
|
|
||||||
bool needs_init;
|
bool needs_init;
|
||||||
uint64_t cur_undef_operands;
|
uint64_t cur_undef_operands;
|
||||||
|
|
||||||
|
|
@ -152,6 +155,19 @@ void lower_divergent_bool_phi(Program *program, ssa_state *state, Block *block,
|
||||||
{
|
{
|
||||||
Builder bld(program);
|
Builder bld(program);
|
||||||
|
|
||||||
|
if (!state->checked_preds_for_uniform) {
|
||||||
|
state->all_preds_uniform = !(block->kind & block_kind_merge);
|
||||||
|
for (unsigned pred : block->logical_preds)
|
||||||
|
state->all_preds_uniform = state->all_preds_uniform && (program->blocks[pred].kind & block_kind_uniform);
|
||||||
|
state->checked_preds_for_uniform = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state->all_preds_uniform) {
|
||||||
|
assert(block->logical_preds.size() == block->linear_preds.size());
|
||||||
|
phi->opcode = aco_opcode::p_linear_phi;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
state->latest.resize(program->blocks.size());
|
state->latest.resize(program->blocks.size());
|
||||||
|
|
||||||
uint64_t undef_operands = 0;
|
uint64_t undef_operands = 0;
|
||||||
|
|
@ -180,14 +196,23 @@ void lower_divergent_bool_phi(Program *program, ssa_state *state, Block *block,
|
||||||
state->writes[block->logical_preds[i]] = program->allocateId();
|
state->writes[block->logical_preds[i]] = program->allocateId();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool uniform_merge = block->kind & block_kind_loop_header;
|
||||||
|
|
||||||
for (unsigned i = 0; i < phi->operands.size(); i++) {
|
for (unsigned i = 0; i < phi->operands.size(); i++) {
|
||||||
Block *pred = &program->blocks[block->logical_preds[i]];
|
Block *pred = &program->blocks[block->logical_preds[i]];
|
||||||
|
|
||||||
|
bool need_get_ssa = !uniform_merge;
|
||||||
|
if (block->kind & block_kind_loop_header && !(pred->kind & block_kind_uniform))
|
||||||
|
uniform_merge = false;
|
||||||
|
|
||||||
if (phi->operands[i].isUndefined())
|
if (phi->operands[i].isUndefined())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
Operand cur = get_ssa(program, pred->index, state, true);
|
Operand cur(bld.lm);
|
||||||
|
if (need_get_ssa)
|
||||||
|
cur = get_ssa(program, pred->index, state, true);
|
||||||
assert(cur.regClass() == bld.lm);
|
assert(cur.regClass() == bld.lm);
|
||||||
|
|
||||||
Temp new_cur = {state->writes.at(pred->index), program->lane_mask};
|
Temp new_cur = {state->writes.at(pred->index), program->lane_mask};
|
||||||
assert(new_cur.regClass() == bld.lm);
|
assert(new_cur.regClass() == bld.lm);
|
||||||
|
|
||||||
|
|
@ -241,6 +266,7 @@ void lower_phis(Program* program)
|
||||||
ssa_state state;
|
ssa_state state;
|
||||||
|
|
||||||
for (Block& block : program->blocks) {
|
for (Block& block : program->blocks) {
|
||||||
|
state.checked_preds_for_uniform = false;
|
||||||
state.needs_init = true;
|
state.needs_init = true;
|
||||||
for (aco_ptr<Instruction>& phi : block.instructions) {
|
for (aco_ptr<Instruction>& phi : block.instructions) {
|
||||||
if (phi->opcode == aco_opcode::p_phi) {
|
if (phi->opcode == aco_opcode::p_phi) {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue