aco/lower_phis: generalize init_state() so that it works with any scalar phis

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28661>
This commit is contained in:
Daniel Schürmann 2024-04-09 14:39:34 +02:00 committed by Marge Bot
parent 55130069b8
commit f2d32e1c13

View file

@ -26,6 +26,7 @@ MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(pred_defined);
struct ssa_state {
unsigned loop_nest_depth;
RegClass rc;
std::vector<pred_defined> any_pred_defined;
std::vector<bool> visited;
@ -37,7 +38,7 @@ Operand get_output(Program* program, unsigned block_idx, ssa_state* state);
void
init_outputs(Program* program, ssa_state* state, unsigned start, unsigned end)
{
for (unsigned i = start; i < end; ++i) {
for (unsigned i = start; i <= end; ++i) {
if (state->visited[i])
continue;
state->outputs[i] = get_output(program, i, state);
@ -51,11 +52,11 @@ get_output(Program* program, unsigned block_idx, ssa_state* state)
Block& block = program->blocks[block_idx];
if (state->any_pred_defined[block_idx] == pred_defined::undef)
return Operand(program->lane_mask);
return Operand(state->rc);
if (block.loop_nest_depth < state->loop_nest_depth)
/* loop-carried value for loop exit phis */
return Operand::zero(program->lane_mask.bytes());
return Operand::zero(state->rc.bytes());
size_t num_preds = block.linear_preds.size();
@ -73,9 +74,9 @@ get_output(Program* program, unsigned block_idx, ssa_state* state)
* to ensure the allocated temporary is defined. */
if (block.kind & block_kind_loop_header) {
unsigned start_idx = block_idx + 1;
unsigned end_idx = block.linear_preds.back() + 1;
unsigned end_idx = block.linear_preds.back();
state->outputs[block_idx] = Operand(Temp(program->allocateTmp(program->lane_mask)));
state->outputs[block_idx] = Operand(Temp(program->allocateTmp(state->rc)));
init_outputs(program, state, start_idx, end_idx);
output = state->outputs[block_idx];
} else if (std::all_of(block.linear_preds.begin() + 1, block.linear_preds.end(),
@ -84,7 +85,7 @@ get_output(Program* program, unsigned block_idx, ssa_state* state)
})) {
return state->outputs[block.linear_preds[0]];
} else {
output = Operand(Temp(program->allocateTmp(program->lane_mask)));
output = Operand(Temp(program->allocateTmp(state->rc)));
}
/* create phi */
@ -95,7 +96,7 @@ get_output(Program* program, unsigned block_idx, ssa_state* state)
phi->definitions[0] = Definition(output.getTemp());
block.instructions.emplace(block.instructions.begin(), std::move(phi));
assert(output.size() == program->lane_mask.size());
assert(output.size() == state->rc.size());
return output;
}
@ -122,7 +123,7 @@ build_merge_code(Program* program, ssa_state* state, Block* block, Operand cur)
Definition dst = Definition(state->outputs[block_idx].getTemp());
Operand prev = get_output(program, block_idx, state);
if (cur.isUndefined())
cur = Operand::zero(program->lane_mask.bytes());
return;
Builder bld(program);
auto IsLogicalEnd = [](const aco_ptr<Instruction>& instr) -> bool
@ -223,6 +224,7 @@ init_state(Program* program, Block* block, ssa_state* state, aco_ptr<Instruction
Builder bld(program);
/* do this here to avoid resizing in case of no boolean phis */
state->rc = phi->definitions[0].regClass();
state->visited.resize(program->blocks.size());
state->outputs.resize(program->blocks.size());
state->any_pred_defined.resize(program->blocks.size());
@ -236,45 +238,37 @@ init_state(Program* program, Block* block, ssa_state* state, aco_ptr<Instruction
if (phi->operands[i].isUndefined())
continue;
pred_defined defined = pred_defined::temp;
if (phi->operands[i].isConstant())
if (phi->operands[i].isConstant() && phi->opcode == aco_opcode::p_boolean_phi)
defined = phi->operands[i].constantValue() ? pred_defined::const_1 : pred_defined::const_0;
for (unsigned succ : program->blocks[block->logical_preds[i]].linear_succs)
state->any_pred_defined[succ] |= defined;
}
unsigned start = block->logical_preds[0];
unsigned end = block->index;
unsigned end = block->linear_preds.back();
/* for loop exit phis, start at the loop pre-header */
if (block->kind & block_kind_loop_exit) {
/* For boolean loop exit phis, start at the loop pre-header */
if (block->kind & block_kind_loop_exit && phi->opcode == aco_opcode::p_boolean_phi) {
while (program->blocks[start].loop_nest_depth >= state->loop_nest_depth)
start--;
end = block->index - 1;
/* If the loop-header has a back-edge, we need to insert a phi.
* This will contain a defined value */
if (program->blocks[start + 1].linear_preds.size() > 1)
state->any_pred_defined[start + 1] = pred_defined::temp;
}
/* for loop header phis, end at the loop exit */
if (block->kind & block_kind_loop_header) {
while (program->blocks[end].loop_nest_depth >= state->loop_nest_depth)
end++;
/* don't propagate the incoming value */
state->any_pred_defined[block->index] = pred_defined::undef;
}
/* add dominating zero: this allows to emit simpler merge sequences
* if we can ensure that all disabled lanes are always zero on incoming values */
// TODO: find more occasions where pred_defined::zero is beneficial (e.g. with 2+ temp merges)
if (block->kind & block_kind_loop_exit) {
/* zero the loop-carried variable */
if (program->blocks[start + 1].linear_preds.size() > 1) {
state->any_pred_defined[start + 1] |= pred_defined::zero;
// TODO: emit this zero explicitly
state->any_pred_defined[start + 1] = pred_defined::temp | pred_defined::zero;
/* add dominating zero: this allows to emit simpler merge sequences
* if we can ensure that all disabled lanes are always zero on incoming values
*/
state->any_pred_defined[start] = pred_defined::const_0;
}
}
for (unsigned j = start; j < end; j++) {
/* For loop header phis, don't propagate the incoming value */
if (block->kind & block_kind_loop_header) {
state->any_pred_defined[block->index] = pred_defined::undef;
}
for (unsigned j = start; j <= end; j++) {
if (state->any_pred_defined[j] == pred_defined::undef)
continue;
for (unsigned succ : program->blocks[j].linear_succs)
@ -284,12 +278,19 @@ init_state(Program* program, Block* block, ssa_state* state, aco_ptr<Instruction
state->any_pred_defined[block->index] = pred_defined::undef;
for (unsigned i = 0; i < phi->operands.size(); i++) {
/* If the Operand is undefined, just propagate the previous value. */
if (phi->operands[i].isUndefined())
continue;
unsigned pred = block->logical_preds[i];
if (state->any_pred_defined[pred] != pred_defined::undef)
state->outputs[pred] = Operand(bld.tmp(bld.lm));
else
if (phi->opcode == aco_opcode::p_boolean_phi &&
state->any_pred_defined[pred] != pred_defined::undef) {
/* Needs merge code sequence. */
state->outputs[pred] = Operand(bld.tmp(state->rc));
} else {
state->outputs[pred] = phi->operands[i];
assert(state->outputs[pred].size() == bld.lm.size());
}
assert(state->outputs[pred].size() == state->rc.size());
state->visited[pred] = true;
}