aco/insert_exec_mask: remove some unnecessary WQM loop handling code

These workarounds are were necessary to prevent infinite loops
with helper lane registers containing wrong data.

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14951>
This commit is contained in:
Daniel Schürmann 2022-01-26 11:08:42 +01:00 committed by Marge Bot
parent 580a63b4ac
commit cbb1b095ca

View file

@ -83,7 +83,6 @@ struct block_info {
std::vector<WQMState> instr_needs;
uint8_t block_needs;
uint8_t ever_again_needs;
/* more... */
};
struct exec_ctx {
@ -110,16 +109,6 @@ needs_exact(aco_ptr<Instruction>& instr)
}
}
void
set_needs_wqm(wqm_ctx& ctx, Temp tmp)
{
if (!ctx.needs_wqm[tmp.id()]) {
ctx.needs_wqm[tmp.id()] = true;
if (ctx.defined_in[tmp.id()] != 0xFFFF)
ctx.worklist.insert(ctx.defined_in[tmp.id()]);
}
}
void
mark_block_wqm(wqm_ctx& ctx, unsigned block_idx)
{
@ -146,7 +135,8 @@ get_block_needs(wqm_ctx& ctx, exec_ctx& exec_ctx, Block* block)
WQMState needs = needs_exact(instr) ? Exact : Unspecified;
bool propagate_wqm = instr->opcode == aco_opcode::p_wqm;
bool pred_by_exec = needs_exec_mask(instr.get());
bool pred_by_exec = needs_exec_mask(instr.get()) ||
instr->opcode == aco_opcode::p_logical_end;
for (const Definition& definition : instr->definitions) {
if (!definition.isTemp())
continue;
@ -161,13 +151,13 @@ get_block_needs(wqm_ctx& ctx, exec_ctx& exec_ctx, Block* block)
if (instr->isBranch() && ctx.branch_wqm[block->index]) {
assert(!(info.block_needs & Exact_Branch));
needs = WQM;
propagate_wqm = true;
}
if (propagate_wqm) {
needs = pred_by_exec ? WQM : Unspecified;
for (const Operand& op : instr->operands) {
if (op.isTemp()) {
set_needs_wqm(ctx, op.getTemp());
ctx.needs_wqm[op.tempId()] = true;
}
}
}
@ -175,12 +165,6 @@ get_block_needs(wqm_ctx& ctx, exec_ctx& exec_ctx, Block* block)
needs = pred_by_exec ? WQM : Unspecified;
}
if ((instr->opcode == aco_opcode::p_logical_end && ctx.branch_wqm[block->index]) ||
instr->opcode == aco_opcode::p_wqm) {
assert(needs != Exact);
needs = WQM;
}
instr_needs[i] = needs;
info.block_needs |= needs;
}
@ -194,67 +178,6 @@ get_block_needs(wqm_ctx& ctx, exec_ctx& exec_ctx, Block* block)
}
}
/* If an outer loop needs WQM but a nested loop does not, we have to ensure that
* the nested loop is done in WQM so that the exec is not empty upon entering
* the nested loop.
*
* TODO: This could be fixed with slightly better code (for loops with divergent
* breaks, which might benefit from being in exact) by adding Exact_Branch to a
* divergent branch surrounding the nested loop, if such a branch exists.
*/
void
handle_wqm_loops(wqm_ctx& ctx, exec_ctx& exec_ctx, unsigned preheader)
{
for (unsigned idx = preheader + 1; idx < exec_ctx.program->blocks.size(); idx++) {
Block& block = exec_ctx.program->blocks[idx];
if (block.kind & block_kind_break)
mark_block_wqm(ctx, idx);
if ((block.kind & block_kind_loop_exit) && block.loop_nest_depth == 0)
break;
}
}
/* If an outer loop and it's nested loops does not need WQM,
* add_branch_code() will ensure that it enters in Exact. We have to
* ensure that the exact exec mask is not empty by adding Exact_Branch to
* the outer divergent branch.
*/
void
handle_exact_loops(wqm_ctx& ctx, exec_ctx& exec_ctx, unsigned preheader)
{
assert(exec_ctx.program->blocks[preheader + 1].kind & block_kind_loop_header);
int parent_branch = preheader;
unsigned rel_branch_depth = 0;
for (; parent_branch >= 0; parent_branch--) {
Block& branch = exec_ctx.program->blocks[parent_branch];
if (branch.kind & block_kind_branch) {
if (rel_branch_depth == 0)
break;
rel_branch_depth--;
}
/* top-level blocks should never have empty exact exec masks */
if (branch.kind & block_kind_top_level)
return;
if (branch.kind & block_kind_merge)
rel_branch_depth++;
}
assert(parent_branch >= 0);
ASSERTED Block& branch = exec_ctx.program->blocks[parent_branch];
assert(branch.kind & block_kind_branch);
if (ctx.branch_wqm[parent_branch]) {
/* The branch can't be done in Exact because some other blocks in it
* are in WQM. So instead, ensure that the loop is done in WQM. */
handle_wqm_loops(ctx, exec_ctx, preheader);
} else {
exec_ctx.info[parent_branch].block_needs |= Exact_Branch;
}
}
void
calculate_wqm_needs(exec_ctx& exec_ctx)
{
@ -266,27 +189,6 @@ calculate_wqm_needs(exec_ctx& exec_ctx)
Block& block = exec_ctx.program->blocks[block_index];
get_block_needs(ctx, exec_ctx, &block);
/* handle_exact_loops() needs information on outer branches, so don't
* handle loops until a top-level block.
*/
if (block.kind & block_kind_top_level && block.index != exec_ctx.program->blocks.size() - 1) {
unsigned preheader = block.index;
do {
Block& preheader_block = exec_ctx.program->blocks[preheader];
if ((preheader_block.kind & block_kind_loop_preheader) &&
preheader_block.loop_nest_depth == 0) {
/* If the loop or a nested loop needs WQM, branch_wqm will be true for the
* preheader.
*/
if (ctx.branch_wqm[preheader])
handle_wqm_loops(ctx, exec_ctx, preheader);
else
handle_exact_loops(ctx, exec_ctx, preheader);
}
preheader++;
} while (!(exec_ctx.program->blocks[preheader].kind & block_kind_top_level));
}
}
uint8_t ever_again_needs = 0;