2019-09-17 13:22:17 +02:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2019 Valve Corporation
|
|
|
|
|
*
|
2024-04-08 09:02:30 +02:00
|
|
|
* SPDX-License-Identifier: MIT
|
2019-09-17 13:22:17 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "aco_builder.h"
|
2021-06-09 15:40:03 +02:00
|
|
|
#include "aco_ir.h"
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2023-03-31 21:59:33 +02:00
|
|
|
#include "util/enum_operators.h"
|
|
|
|
|
|
2021-06-09 15:40:03 +02:00
|
|
|
#include <algorithm>
|
|
|
|
|
#include <map>
|
|
|
|
|
#include <vector>
|
2019-09-17 13:22:17 +02:00
|
|
|
|
|
|
|
|
namespace aco {
|
|
|
|
|
|
2024-06-15 16:17:29 +02:00
|
|
|
namespace {
|
|
|
|
|
|
2021-06-30 14:17:28 +02:00
|
|
|
enum class pred_defined : uint8_t {
|
|
|
|
|
undef = 0,
|
|
|
|
|
const_1 = 1,
|
|
|
|
|
const_0 = 2,
|
|
|
|
|
temp = 3,
|
2021-06-30 14:21:44 +02:00
|
|
|
zero = 4, /* all disabled lanes are zero'd out */
|
2021-06-30 14:17:28 +02:00
|
|
|
};
|
|
|
|
|
MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(pred_defined);
|
|
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
struct ssa_state {
|
2020-01-06 15:46:28 +00:00
|
|
|
unsigned loop_nest_depth;
|
2024-04-09 14:39:34 +02:00
|
|
|
RegClass rc;
|
2021-06-24 22:11:20 +02:00
|
|
|
|
2021-06-30 14:17:28 +02:00
|
|
|
std::vector<pred_defined> any_pred_defined;
|
2020-07-10 16:31:31 +01:00
|
|
|
std::vector<bool> visited;
|
2021-06-24 22:11:20 +02:00
|
|
|
std::vector<Operand> outputs; /* the output per block */
|
2019-09-17 13:22:17 +02:00
|
|
|
};
|
|
|
|
|
|
2023-06-08 18:56:43 +02:00
|
|
|
Operand get_output(Program* program, unsigned block_idx, ssa_state* state);
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
init_outputs(Program* program, ssa_state* state, unsigned start, unsigned end)
|
2019-09-17 13:22:17 +02:00
|
|
|
{
|
2024-04-09 14:39:34 +02:00
|
|
|
for (unsigned i = start; i <= end; ++i) {
|
2023-06-08 18:56:43 +02:00
|
|
|
if (state->visited[i])
|
|
|
|
|
continue;
|
|
|
|
|
state->outputs[i] = get_output(program, i, state);
|
|
|
|
|
state->visited[i] = true;
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
2023-06-08 18:56:43 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Operand
|
|
|
|
|
get_output(Program* program, unsigned block_idx, ssa_state* state)
|
|
|
|
|
{
|
|
|
|
|
Block& block = program->blocks[block_idx];
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2021-06-30 14:17:28 +02:00
|
|
|
if (state->any_pred_defined[block_idx] == pred_defined::undef)
|
2024-04-09 14:39:34 +02:00
|
|
|
return Operand(state->rc);
|
2020-07-10 16:31:31 +01:00
|
|
|
|
2023-06-08 18:56:43 +02:00
|
|
|
if (block.loop_nest_depth < state->loop_nest_depth)
|
2021-06-30 14:21:44 +02:00
|
|
|
/* loop-carried value for loop exit phis */
|
2024-04-09 14:39:34 +02:00
|
|
|
return Operand::zero(state->rc.bytes());
|
2023-06-08 18:56:43 +02:00
|
|
|
|
|
|
|
|
size_t num_preds = block.linear_preds.size();
|
|
|
|
|
|
|
|
|
|
if (block.loop_nest_depth > state->loop_nest_depth || num_preds == 1 ||
|
|
|
|
|
block.kind & block_kind_loop_exit)
|
|
|
|
|
return state->outputs[block.linear_preds[0]];
|
|
|
|
|
|
|
|
|
|
Operand output;
|
|
|
|
|
|
|
|
|
|
/* Loop headers can contain back edges, in which case the predecessor
|
|
|
|
|
* outputs aren't yet determined because the predecessor is after the block.
|
|
|
|
|
* The predecessor outputs also depend on the output of the loop header,
|
|
|
|
|
* so allocate a temporary that will store this block's output and use that
|
|
|
|
|
* to calculate the predecessor block output. In this case, we always emit a phi
|
|
|
|
|
* to ensure the allocated temporary is defined. */
|
|
|
|
|
if (block.kind & block_kind_loop_header) {
|
|
|
|
|
unsigned start_idx = block_idx + 1;
|
2024-04-09 14:39:34 +02:00
|
|
|
unsigned end_idx = block.linear_preds.back();
|
2023-06-08 18:56:43 +02:00
|
|
|
|
2024-04-09 14:39:34 +02:00
|
|
|
state->outputs[block_idx] = Operand(Temp(program->allocateTmp(state->rc)));
|
2023-06-08 18:56:43 +02:00
|
|
|
init_outputs(program, state, start_idx, end_idx);
|
|
|
|
|
output = state->outputs[block_idx];
|
|
|
|
|
} else if (std::all_of(block.linear_preds.begin() + 1, block.linear_preds.end(),
|
|
|
|
|
[&](unsigned pred) {
|
|
|
|
|
return state->outputs[pred] == state->outputs[block.linear_preds[0]];
|
|
|
|
|
})) {
|
|
|
|
|
return state->outputs[block.linear_preds[0]];
|
2020-01-06 15:46:28 +00:00
|
|
|
} else {
|
2024-04-09 14:39:34 +02:00
|
|
|
output = Operand(Temp(program->allocateTmp(state->rc)));
|
2023-06-08 18:56:43 +02:00
|
|
|
}
|
2021-06-24 22:59:21 +02:00
|
|
|
|
2023-06-08 18:56:43 +02:00
|
|
|
/* create phi */
|
2024-03-25 15:55:27 +01:00
|
|
|
aco_ptr<Instruction> phi{
|
|
|
|
|
create_instruction(aco_opcode::p_linear_phi, Format::PSEUDO, num_preds, 1)};
|
2023-06-08 18:56:43 +02:00
|
|
|
for (unsigned i = 0; i < num_preds; i++)
|
|
|
|
|
phi->operands[i] = state->outputs[block.linear_preds[i]];
|
|
|
|
|
phi->definitions[0] = Definition(output.getTemp());
|
|
|
|
|
block.instructions.emplace(block.instructions.begin(), std::move(phi));
|
2021-06-24 22:11:20 +02:00
|
|
|
|
2024-04-09 14:39:34 +02:00
|
|
|
assert(output.size() == state->rc.size());
|
2021-06-24 22:11:20 +02:00
|
|
|
|
2023-06-08 18:56:43 +02:00
|
|
|
return output;
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
insert_before_logical_end(Block* block, aco_ptr<Instruction> instr)
|
|
|
|
|
{
|
2020-11-03 14:40:05 +01:00
|
|
|
auto IsLogicalEnd = [](const aco_ptr<Instruction>& inst) -> bool
|
|
|
|
|
{ return inst->opcode == aco_opcode::p_logical_end; };
|
2019-08-12 20:40:37 +02:00
|
|
|
auto it = std::find_if(block->instructions.crbegin(), block->instructions.crend(), IsLogicalEnd);
|
|
|
|
|
|
|
|
|
|
if (it == block->instructions.crend()) {
|
2021-01-20 15:27:16 +00:00
|
|
|
assert(block->instructions.back()->isBranch());
|
2019-08-12 20:40:37 +02:00
|
|
|
block->instructions.insert(std::prev(block->instructions.end()), std::move(instr));
|
2020-01-06 16:50:41 +00:00
|
|
|
} else {
|
2019-08-12 20:40:37 +02:00
|
|
|
block->instructions.insert(std::prev(it.base()), std::move(instr));
|
2020-01-06 16:50:41 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
2021-06-30 14:17:28 +02:00
|
|
|
build_merge_code(Program* program, ssa_state* state, Block* block, Operand cur)
|
2020-01-06 16:50:41 +00:00
|
|
|
{
|
2021-06-30 14:17:28 +02:00
|
|
|
unsigned block_idx = block->index;
|
|
|
|
|
Definition dst = Definition(state->outputs[block_idx].getTemp());
|
2023-06-08 18:56:43 +02:00
|
|
|
Operand prev = get_output(program, block_idx, state);
|
2021-06-30 14:17:28 +02:00
|
|
|
if (cur.isUndefined())
|
2024-04-09 14:39:34 +02:00
|
|
|
return;
|
2020-01-06 16:50:41 +00:00
|
|
|
|
2021-06-30 14:17:28 +02:00
|
|
|
Builder bld(program);
|
2020-01-06 16:50:41 +00:00
|
|
|
auto IsLogicalEnd = [](const aco_ptr<Instruction>& instr) -> bool
|
|
|
|
|
{ return instr->opcode == aco_opcode::p_logical_end; };
|
|
|
|
|
auto it = std::find_if(block->instructions.rbegin(), block->instructions.rend(), IsLogicalEnd);
|
|
|
|
|
assert(it != block->instructions.rend());
|
|
|
|
|
bld.reset(&block->instructions, std::prev(it.base()));
|
|
|
|
|
|
2021-06-30 14:17:28 +02:00
|
|
|
pred_defined defined = state->any_pred_defined[block_idx];
|
|
|
|
|
if (defined == pred_defined::undef) {
|
|
|
|
|
return;
|
|
|
|
|
} else if (defined == pred_defined::const_0) {
|
|
|
|
|
bld.sop2(Builder::s_and, dst, bld.def(s1, scc), cur, Operand(exec, bld.lm));
|
|
|
|
|
return;
|
|
|
|
|
} else if (defined == pred_defined::const_1) {
|
|
|
|
|
bld.sop2(Builder::s_orn2, dst, bld.def(s1, scc), cur, Operand(exec, bld.lm));
|
2020-01-06 16:50:41 +00:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-30 14:17:28 +02:00
|
|
|
assert(prev.isTemp());
|
2021-06-30 14:21:44 +02:00
|
|
|
/* simpler sequence in case prev has only zeros in disabled lanes */
|
|
|
|
|
if ((defined & pred_defined::zero) == pred_defined::zero) {
|
|
|
|
|
if (cur.isConstant()) {
|
|
|
|
|
if (!cur.constantValue()) {
|
|
|
|
|
bld.copy(dst, prev);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
cur = Operand(exec, bld.lm);
|
|
|
|
|
} else {
|
|
|
|
|
cur =
|
|
|
|
|
bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), cur, Operand(exec, bld.lm));
|
|
|
|
|
}
|
|
|
|
|
bld.sop2(Builder::s_or, dst, bld.def(s1, scc), prev, cur);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-30 14:17:28 +02:00
|
|
|
if (cur.isConstant()) {
|
|
|
|
|
if (cur.constantValue())
|
2020-01-06 16:50:41 +00:00
|
|
|
bld.sop2(Builder::s_or, dst, bld.def(s1, scc), prev, Operand(exec, bld.lm));
|
|
|
|
|
else
|
2021-06-30 14:17:28 +02:00
|
|
|
bld.sop2(Builder::s_andn2, dst, bld.def(s1, scc), prev, Operand(exec, bld.lm));
|
|
|
|
|
return;
|
2020-01-06 16:50:41 +00:00
|
|
|
}
|
2021-06-30 14:17:28 +02:00
|
|
|
prev =
|
|
|
|
|
bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), prev, Operand(exec, bld.lm));
|
|
|
|
|
cur = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), cur, Operand(exec, bld.lm));
|
|
|
|
|
bld.sop2(Builder::s_or, dst, bld.def(s1, scc), prev, cur);
|
|
|
|
|
return;
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
|
aco: Better phi lowering for merge block when else-side is const.
Add a new special case for binary merge blocks to boolean
phi lowerig. This special case benefits shaders that
have divergent branches with an empty else block,
for example all NGG culling shaders.
Fossil DB stats on Rembrandt (NGG culling enabled):
Totals from 61778 (45.79% of 134913) affected shaders:
SpillVGPRs: 2268 -> 2284 (+0.71%); split: -1.10%, +1.81%
CodeSize: 164317952 -> 162962772 (-0.82%); split: -0.83%, +0.00%
Instrs: 31249824 -> 30910686 (-1.09%); split: -1.09%, +0.00%
Latency: 154948555 -> 154781097 (-0.11%); split: -0.12%, +0.02%
InvThroughput: 30397664 -> 30370872 (-0.09%); split: -0.13%, +0.04%
VClause: 529239 -> 529229 (-0.00%); split: -0.00%, +0.00%
SClause: 783417 -> 783430 (+0.00%)
Copies: 2627570 -> 2595161 (-1.23%); split: -1.25%, +0.02%
Branches: 976506 -> 976508 (+0.00%); split: -0.00%, +0.00%
Fossil DB stats on GFX11 (NGG culling disabled):
Totals from 895 (0.66% of 134913) affected shaders:
SpillVGPRs: 2258 -> 2322 (+2.83%); split: -0.44%, +3.28%
CodeSize: 6229152 -> 6215880 (-0.21%); split: -0.37%, +0.16%
Scratch: 216576 -> 215808 (-0.35%); split: -0.47%, +0.12%
Instrs: 1202077 -> 1198396 (-0.31%); split: -0.43%, +0.13%
Latency: 15921336 -> 16000561 (+0.50%); split: -0.74%, +1.24%
InvThroughput: 7425765 -> 7474891 (+0.66%); split: -0.67%, +1.33%
VClause: 22976 -> 23008 (+0.14%); split: -0.03%, +0.17%
SClause: 38269 -> 38271 (+0.01%)
Copies: 123244 -> 123896 (+0.53%); split: -0.30%, +0.83%
Branches: 47570 -> 47574 (+0.01%); split: -0.00%, +0.01%
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21493>
2023-02-21 15:04:40 +01:00
|
|
|
void
|
|
|
|
|
build_const_else_merge_code(Program* program, Block& invert_block, aco_ptr<Instruction>& phi)
|
|
|
|
|
{
|
|
|
|
|
/* When the else-side operand of a binary merge phi is constant,
|
|
|
|
|
* we can use a simpler way to lower the phi by emitting some
|
|
|
|
|
* instructions to the invert block instead.
|
|
|
|
|
* This allows us to actually delete the else block when it's empty.
|
|
|
|
|
*/
|
|
|
|
|
assert(invert_block.kind & block_kind_invert);
|
|
|
|
|
Builder bld(program);
|
|
|
|
|
Operand then = phi->operands[0];
|
|
|
|
|
const Operand els = phi->operands[1];
|
|
|
|
|
|
|
|
|
|
/* Only -1 (all lanes true) and 0 (all lanes false) constants are supported here. */
|
|
|
|
|
assert(!then.isConstant() || then.constantEquals(0) || then.constantEquals(-1));
|
|
|
|
|
assert(els.constantEquals(0) || els.constantEquals(-1));
|
|
|
|
|
|
|
|
|
|
if (!then.isConstant()) {
|
|
|
|
|
/* Left-hand operand is not constant, so we need to emit a phi to access it. */
|
|
|
|
|
bld.reset(&invert_block.instructions, invert_block.instructions.begin());
|
|
|
|
|
then = bld.pseudo(aco_opcode::p_linear_phi, bld.def(bld.lm), then, Operand(bld.lm));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto after_phis =
|
|
|
|
|
std::find_if(invert_block.instructions.begin(), invert_block.instructions.end(),
|
|
|
|
|
[](const aco_ptr<Instruction>& instr) -> bool { return !is_phi(instr.get()); });
|
|
|
|
|
bld.reset(&invert_block.instructions, after_phis);
|
|
|
|
|
|
|
|
|
|
Temp tmp;
|
|
|
|
|
if (then.constantEquals(-1) && els.constantEquals(0)) {
|
|
|
|
|
tmp = bld.copy(bld.def(bld.lm), Operand(exec, bld.lm));
|
|
|
|
|
} else {
|
|
|
|
|
Builder::WaveSpecificOpcode opc = els.constantEquals(0) ? Builder::s_and : Builder::s_orn2;
|
|
|
|
|
tmp = bld.sop2(opc, bld.def(bld.lm), bld.def(s1, scc), then, Operand(exec, bld.lm));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* We can't delete the original phi because that'd invalidate the iterator in lower_phis,
|
|
|
|
|
* so just make it a trivial phi instead.
|
|
|
|
|
*/
|
|
|
|
|
phi->opcode = aco_opcode::p_linear_phi;
|
|
|
|
|
phi->operands[0] = Operand(tmp);
|
|
|
|
|
phi->operands[1] = Operand(tmp);
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-09 20:10:51 +01:00
|
|
|
void
|
2023-06-08 18:56:43 +02:00
|
|
|
init_state(Program* program, Block* block, ssa_state* state, aco_ptr<Instruction>& phi)
|
2021-06-09 20:10:51 +01:00
|
|
|
{
|
2023-06-08 18:56:43 +02:00
|
|
|
Builder bld(program);
|
|
|
|
|
|
|
|
|
|
/* do this here to avoid resizing in case of no boolean phis */
|
2024-04-09 14:39:34 +02:00
|
|
|
state->rc = phi->definitions[0].regClass();
|
2023-06-08 18:56:43 +02:00
|
|
|
state->visited.resize(program->blocks.size());
|
|
|
|
|
state->outputs.resize(program->blocks.size());
|
|
|
|
|
state->any_pred_defined.resize(program->blocks.size());
|
|
|
|
|
state->loop_nest_depth = block->loop_nest_depth;
|
|
|
|
|
if (block->kind & block_kind_loop_exit)
|
|
|
|
|
state->loop_nest_depth += 1;
|
|
|
|
|
std::fill(state->visited.begin(), state->visited.end(), false);
|
2021-06-30 14:17:28 +02:00
|
|
|
std::fill(state->any_pred_defined.begin(), state->any_pred_defined.end(), pred_defined::undef);
|
2023-06-08 18:56:43 +02:00
|
|
|
|
2021-06-09 20:10:51 +01:00
|
|
|
for (unsigned i = 0; i < block->logical_preds.size(); i++) {
|
|
|
|
|
if (phi->operands[i].isUndefined())
|
|
|
|
|
continue;
|
2021-06-30 14:17:28 +02:00
|
|
|
pred_defined defined = pred_defined::temp;
|
2024-04-09 14:39:34 +02:00
|
|
|
if (phi->operands[i].isConstant() && phi->opcode == aco_opcode::p_boolean_phi)
|
2021-06-30 14:17:28 +02:00
|
|
|
defined = phi->operands[i].constantValue() ? pred_defined::const_1 : pred_defined::const_0;
|
2021-06-09 20:10:51 +01:00
|
|
|
for (unsigned succ : program->blocks[block->logical_preds[i]].linear_succs)
|
2021-06-30 14:17:28 +02:00
|
|
|
state->any_pred_defined[succ] |= defined;
|
2021-06-09 20:10:51 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unsigned start = block->logical_preds[0];
|
2024-04-09 14:39:34 +02:00
|
|
|
unsigned end = block->linear_preds.back();
|
2021-06-09 20:10:51 +01:00
|
|
|
|
2024-05-20 17:57:10 +01:00
|
|
|
/* The value might not be loop-invariant if the loop has a divergent break and
|
|
|
|
|
* - this is a boolean phi, which must be combined with logical exits from previous iterations
|
|
|
|
|
* - or the loop also has an additional linear exit (continue_or_break), which might be taken in
|
|
|
|
|
* a different iteration than the logical exit
|
|
|
|
|
*/
|
2024-05-02 19:32:48 +01:00
|
|
|
bool continue_or_break = block->linear_preds.size() > block->logical_preds.size();
|
|
|
|
|
bool has_divergent_break = std::any_of(
|
|
|
|
|
block->logical_preds.begin(), block->logical_preds.end(),
|
|
|
|
|
[&](unsigned pred) { return !(program->blocks[pred].kind & block_kind_uniform); });
|
2024-05-20 17:57:10 +01:00
|
|
|
if (block->kind & block_kind_loop_exit && has_divergent_break &&
|
|
|
|
|
(phi->opcode == aco_opcode::p_boolean_phi || continue_or_break)) {
|
|
|
|
|
/* Start at the loop pre-header as we need the value from previous iterations. */
|
2023-06-08 18:56:43 +02:00
|
|
|
while (program->blocks[start].loop_nest_depth >= state->loop_nest_depth)
|
2021-06-24 18:11:12 +02:00
|
|
|
start--;
|
2024-04-09 14:39:34 +02:00
|
|
|
end = block->index - 1;
|
2021-06-24 18:11:12 +02:00
|
|
|
/* If the loop-header has a back-edge, we need to insert a phi.
|
|
|
|
|
* This will contain a defined value */
|
2023-06-08 18:56:43 +02:00
|
|
|
if (program->blocks[start + 1].linear_preds.size() > 1) {
|
2024-05-02 19:32:48 +01:00
|
|
|
if (phi->opcode == aco_opcode::p_boolean_phi) {
|
|
|
|
|
state->any_pred_defined[start + 1] = pred_defined::temp | pred_defined::zero;
|
|
|
|
|
/* add dominating zero: this allows to emit simpler merge sequences
|
|
|
|
|
* if we can ensure that all disabled lanes are always zero on incoming values
|
|
|
|
|
*/
|
|
|
|
|
state->any_pred_defined[start] = pred_defined::const_0;
|
|
|
|
|
} else {
|
|
|
|
|
state->any_pred_defined[start + 1] = pred_defined::temp;
|
|
|
|
|
}
|
2021-06-30 14:21:44 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-09 14:39:34 +02:00
|
|
|
/* For loop header phis, don't propagate the incoming value */
|
|
|
|
|
if (block->kind & block_kind_loop_header) {
|
|
|
|
|
state->any_pred_defined[block->index] = pred_defined::undef;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (unsigned j = start; j <= end; j++) {
|
2021-06-30 14:17:28 +02:00
|
|
|
if (state->any_pred_defined[j] == pred_defined::undef)
|
2021-06-24 18:11:12 +02:00
|
|
|
continue;
|
|
|
|
|
for (unsigned succ : program->blocks[j].linear_succs)
|
2021-06-30 14:17:28 +02:00
|
|
|
state->any_pred_defined[succ] |= state->any_pred_defined[j];
|
2021-06-24 18:11:12 +02:00
|
|
|
}
|
|
|
|
|
|
2021-06-30 14:17:28 +02:00
|
|
|
state->any_pred_defined[block->index] = pred_defined::undef;
|
2023-06-08 18:56:43 +02:00
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < phi->operands.size(); i++) {
|
2024-04-09 14:39:34 +02:00
|
|
|
/* If the Operand is undefined, just propagate the previous value. */
|
|
|
|
|
if (phi->operands[i].isUndefined())
|
|
|
|
|
continue;
|
|
|
|
|
|
2023-06-08 18:56:43 +02:00
|
|
|
unsigned pred = block->logical_preds[i];
|
2024-04-09 14:39:34 +02:00
|
|
|
if (phi->opcode == aco_opcode::p_boolean_phi &&
|
|
|
|
|
state->any_pred_defined[pred] != pred_defined::undef) {
|
|
|
|
|
/* Needs merge code sequence. */
|
|
|
|
|
state->outputs[pred] = Operand(bld.tmp(state->rc));
|
|
|
|
|
} else {
|
2023-06-08 18:56:43 +02:00
|
|
|
state->outputs[pred] = phi->operands[i];
|
2024-04-09 14:39:34 +02:00
|
|
|
}
|
|
|
|
|
assert(state->outputs[pred].size() == state->rc.size());
|
2023-06-08 18:56:43 +02:00
|
|
|
state->visited[pred] = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
init_outputs(program, state, start, end);
|
2021-06-09 20:10:51 +01:00
|
|
|
}
|
|
|
|
|
|
2020-01-06 15:46:28 +00:00
|
|
|
void
|
2024-04-09 17:56:29 +02:00
|
|
|
lower_phi_to_linear(Program* program, ssa_state* state, Block* block, aco_ptr<Instruction>& phi)
|
2019-09-17 13:22:17 +02:00
|
|
|
{
|
aco: ensure phis uniformized by divergence analysis are SGPR
Otherwise, they might not actually be uniform when divergence analysis
claimed they are.
fossil-db (navi31):
Totals from 5118 (6.45% of 79395) affected shaders:
MaxWaves: 159520 -> 159560 (+0.03%); split: +0.03%, -0.01%
Instrs: 2138456 -> 2165351 (+1.26%); split: -0.02%, +1.28%
CodeSize: 11152460 -> 11260340 (+0.97%); split: -0.02%, +0.98%
VGPRs: 225144 -> 218124 (-3.12%); split: -3.25%, +0.13%
Latency: 11116102 -> 11059208 (-0.51%); split: -0.69%, +0.18%
InvThroughput: 1230193 -> 1252148 (+1.78%); split: -0.01%, +1.80%
VClause: 39518 -> 39513 (-0.01%); split: -0.49%, +0.48%
SClause: 59378 -> 59434 (+0.09%); split: -0.02%, +0.11%
Copies: 156172 -> 165997 (+6.29%); split: -0.81%, +7.10%
PreSGPRs: 181094 -> 181203 (+0.06%)
PreVGPRs: 139731 -> 139393 (-0.24%)
VALU: 1220769 -> 1244301 (+1.93%); split: -0.02%, +1.95%
SALU: 199567 -> 200240 (+0.34%); split: -0.00%, +0.34%
fossil-db (navi21):
Totals from 35520 (44.74% of 79395) affected shaders:
MaxWaves: 951830 -> 951870 (+0.00%)
Instrs: 20227773 -> 20229388 (+0.01%); split: -0.00%, +0.01%
CodeSize: 105513724 -> 105379916 (-0.13%); split: -0.13%, +0.01%
VGPRs: 1375400 -> 1375232 (-0.01%)
Latency: 81013985 -> 81046435 (+0.04%); split: -0.00%, +0.04%
InvThroughput: 15273291 -> 15269166 (-0.03%); split: -0.04%, +0.01%
VClause: 354310 -> 354314 (+0.00%); split: -0.00%, +0.00%
SClause: 417047 -> 417049 (+0.00%); split: -0.00%, +0.00%
Copies: 1699486 -> 1699445 (-0.00%); split: -0.01%, +0.01%
Branches: 591269 -> 591274 (+0.00%); split: -0.00%, +0.00%
PreSGPRs: 1370567 -> 1371062 (+0.04%)
PreVGPRs: 1100953 -> 1100716 (-0.02%)
VALU: 11075164 -> 11076189 (+0.01%); split: -0.00%, +0.01%
SALU: 3647378 -> 3648002 (+0.02%); split: -0.00%, +0.02%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30211>
2024-10-08 17:35:44 +01:00
|
|
|
if (phi->opcode == aco_opcode::p_phi) {
|
|
|
|
|
/* Insert p_as_uniform for VGPR->SGPR phis. */
|
|
|
|
|
Builder bld(program);
|
|
|
|
|
for (unsigned i = 0; i < phi->operands.size(); i++) {
|
|
|
|
|
if (phi->operands[i].isOfType(RegType::vgpr)) {
|
|
|
|
|
Block* pred = &program->blocks[block->logical_preds[i]];
|
|
|
|
|
Temp new_phi_src = bld.tmp(phi->definitions[0].regClass());
|
|
|
|
|
insert_before_logical_end(
|
|
|
|
|
pred, bld.pseudo(aco_opcode::p_as_uniform, Definition(new_phi_src), phi->operands[i])
|
|
|
|
|
.get_ptr());
|
|
|
|
|
phi->operands[i].setTemp(new_phi_src);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-06 10:59:09 +02:00
|
|
|
if (block->linear_preds == block->logical_preds) {
|
2020-01-09 16:51:34 +00:00
|
|
|
phi->opcode = aco_opcode::p_linear_phi;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-09 17:56:29 +02:00
|
|
|
if ((block->kind & block_kind_merge) && phi->opcode == aco_opcode::p_boolean_phi &&
|
|
|
|
|
phi->operands.size() == 2 && phi->operands[1].isConstant()) {
|
aco: Better phi lowering for merge block when else-side is const.
Add a new special case for binary merge blocks to boolean
phi lowerig. This special case benefits shaders that
have divergent branches with an empty else block,
for example all NGG culling shaders.
Fossil DB stats on Rembrandt (NGG culling enabled):
Totals from 61778 (45.79% of 134913) affected shaders:
SpillVGPRs: 2268 -> 2284 (+0.71%); split: -1.10%, +1.81%
CodeSize: 164317952 -> 162962772 (-0.82%); split: -0.83%, +0.00%
Instrs: 31249824 -> 30910686 (-1.09%); split: -1.09%, +0.00%
Latency: 154948555 -> 154781097 (-0.11%); split: -0.12%, +0.02%
InvThroughput: 30397664 -> 30370872 (-0.09%); split: -0.13%, +0.04%
VClause: 529239 -> 529229 (-0.00%); split: -0.00%, +0.00%
SClause: 783417 -> 783430 (+0.00%)
Copies: 2627570 -> 2595161 (-1.23%); split: -1.25%, +0.02%
Branches: 976506 -> 976508 (+0.00%); split: -0.00%, +0.00%
Fossil DB stats on GFX11 (NGG culling disabled):
Totals from 895 (0.66% of 134913) affected shaders:
SpillVGPRs: 2258 -> 2322 (+2.83%); split: -0.44%, +3.28%
CodeSize: 6229152 -> 6215880 (-0.21%); split: -0.37%, +0.16%
Scratch: 216576 -> 215808 (-0.35%); split: -0.47%, +0.12%
Instrs: 1202077 -> 1198396 (-0.31%); split: -0.43%, +0.13%
Latency: 15921336 -> 16000561 (+0.50%); split: -0.74%, +1.24%
InvThroughput: 7425765 -> 7474891 (+0.66%); split: -0.67%, +1.33%
VClause: 22976 -> 23008 (+0.14%); split: -0.03%, +0.17%
SClause: 38269 -> 38271 (+0.01%)
Copies: 123244 -> 123896 (+0.53%); split: -0.30%, +0.83%
Branches: 47570 -> 47574 (+0.01%); split: -0.00%, +0.01%
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21493>
2023-02-21 15:04:40 +01:00
|
|
|
build_const_else_merge_code(program, program->blocks[block->linear_idom], phi);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-08 18:56:43 +02:00
|
|
|
init_state(program, block, state, phi);
|
2020-01-06 15:46:28 +00:00
|
|
|
|
2024-04-09 17:56:29 +02:00
|
|
|
if (phi->opcode == aco_opcode::p_boolean_phi) {
|
|
|
|
|
/* Divergent boolean phis are lowered to logical arithmetic and linear phis. */
|
|
|
|
|
for (unsigned i = 0; i < phi->operands.size(); i++)
|
|
|
|
|
build_merge_code(program, state, &program->blocks[block->logical_preds[i]],
|
|
|
|
|
phi->operands[i]);
|
|
|
|
|
}
|
2019-09-17 13:22:17 +02:00
|
|
|
|
2019-10-07 02:32:54 +02:00
|
|
|
unsigned num_preds = block->linear_preds.size();
|
|
|
|
|
if (phi->operands.size() != num_preds) {
|
2024-03-25 15:55:27 +01:00
|
|
|
Instruction* new_phi{
|
|
|
|
|
create_instruction(aco_opcode::p_linear_phi, Format::PSEUDO, num_preds, 1)};
|
2019-10-07 02:32:54 +02:00
|
|
|
new_phi->definitions[0] = phi->definitions[0];
|
|
|
|
|
phi.reset(new_phi);
|
|
|
|
|
} else {
|
|
|
|
|
phi->opcode = aco_opcode::p_linear_phi;
|
|
|
|
|
}
|
|
|
|
|
assert(phi->operands.size() == num_preds);
|
|
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < num_preds; i++)
|
2023-06-08 18:56:43 +02:00
|
|
|
phi->operands[i] = state->outputs[block->linear_preds[i]];
|
2019-10-07 02:32:54 +02:00
|
|
|
|
|
|
|
|
return;
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
|
2020-04-07 12:16:57 +01:00
|
|
|
void
|
|
|
|
|
lower_subdword_phis(Program* program, Block* block, aco_ptr<Instruction>& phi)
|
|
|
|
|
{
|
|
|
|
|
Builder bld(program);
|
|
|
|
|
for (unsigned i = 0; i < phi->operands.size(); i++) {
|
|
|
|
|
if (phi->operands[i].isUndefined())
|
|
|
|
|
continue;
|
|
|
|
|
if (phi->operands[i].regClass() == phi->definitions[0].regClass())
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
assert(phi->operands[i].isTemp());
|
|
|
|
|
Block* pred = &program->blocks[block->logical_preds[i]];
|
|
|
|
|
Temp phi_src = phi->operands[i].getTemp();
|
|
|
|
|
|
|
|
|
|
assert(phi_src.regClass().type() == RegType::sgpr);
|
|
|
|
|
Temp tmp = bld.tmp(RegClass(RegType::vgpr, phi_src.size()));
|
2020-10-15 15:09:20 +01:00
|
|
|
insert_before_logical_end(pred, bld.copy(Definition(tmp), phi_src).get_ptr());
|
2020-04-07 12:16:57 +01:00
|
|
|
Temp new_phi_src = bld.tmp(phi->definitions[0].regClass());
|
2021-07-13 11:22:46 +02:00
|
|
|
insert_before_logical_end(pred, bld.pseudo(aco_opcode::p_extract_vector,
|
|
|
|
|
Definition(new_phi_src), tmp, Operand::zero())
|
|
|
|
|
.get_ptr());
|
2020-04-07 12:16:57 +01:00
|
|
|
|
|
|
|
|
phi->operands[i].setTemp(new_phi_src);
|
|
|
|
|
}
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2024-06-15 16:17:29 +02:00
|
|
|
} /* end namespace */
|
|
|
|
|
|
2020-04-07 12:20:52 +01:00
|
|
|
void
|
|
|
|
|
lower_phis(Program* program)
|
2019-09-17 13:22:17 +02:00
|
|
|
{
|
2020-01-06 15:46:28 +00:00
|
|
|
ssa_state state;
|
|
|
|
|
|
2019-09-17 13:22:17 +02:00
|
|
|
for (Block& block : program->blocks) {
|
2019-10-07 02:52:55 +02:00
|
|
|
for (aco_ptr<Instruction>& phi : block.instructions) {
|
2024-04-08 16:15:53 +02:00
|
|
|
if (phi->opcode == aco_opcode::p_boolean_phi) {
|
|
|
|
|
assert(program->wave_size == 64 ? phi->definitions[0].regClass() == s2
|
|
|
|
|
: phi->definitions[0].regClass() == s1);
|
2024-04-09 17:56:29 +02:00
|
|
|
lower_phi_to_linear(program, &state, &block, phi);
|
2024-04-08 16:15:53 +02:00
|
|
|
} else if (phi->opcode == aco_opcode::p_phi) {
|
2024-04-09 17:56:29 +02:00
|
|
|
if (phi->definitions[0].regClass().type() == RegType::sgpr)
|
|
|
|
|
lower_phi_to_linear(program, &state, &block, phi);
|
|
|
|
|
else if (phi->definitions[0].regClass().is_subdword())
|
2020-04-07 12:16:57 +01:00
|
|
|
lower_subdword_phis(program, &block, phi);
|
2019-11-21 12:31:14 +01:00
|
|
|
} else if (!is_phi(phi)) {
|
2019-10-07 02:52:55 +02:00
|
|
|
break;
|
2019-09-17 13:22:17 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // namespace aco
|