mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-03 18:38:11 +02:00
aco/insert_exec_mask: remove some unnecessary WQM loop handling code
These workarounds are were necessary to prevent infinite loops with helper lane registers containing wrong data. Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14951>
This commit is contained in:
parent
580a63b4ac
commit
cbb1b095ca
1 changed files with 4 additions and 102 deletions
|
|
@ -83,7 +83,6 @@ struct block_info {
|
|||
std::vector<WQMState> instr_needs;
|
||||
uint8_t block_needs;
|
||||
uint8_t ever_again_needs;
|
||||
/* more... */
|
||||
};
|
||||
|
||||
struct exec_ctx {
|
||||
|
|
@ -110,16 +109,6 @@ needs_exact(aco_ptr<Instruction>& instr)
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
set_needs_wqm(wqm_ctx& ctx, Temp tmp)
|
||||
{
|
||||
if (!ctx.needs_wqm[tmp.id()]) {
|
||||
ctx.needs_wqm[tmp.id()] = true;
|
||||
if (ctx.defined_in[tmp.id()] != 0xFFFF)
|
||||
ctx.worklist.insert(ctx.defined_in[tmp.id()]);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
mark_block_wqm(wqm_ctx& ctx, unsigned block_idx)
|
||||
{
|
||||
|
|
@ -146,7 +135,8 @@ get_block_needs(wqm_ctx& ctx, exec_ctx& exec_ctx, Block* block)
|
|||
|
||||
WQMState needs = needs_exact(instr) ? Exact : Unspecified;
|
||||
bool propagate_wqm = instr->opcode == aco_opcode::p_wqm;
|
||||
bool pred_by_exec = needs_exec_mask(instr.get());
|
||||
bool pred_by_exec = needs_exec_mask(instr.get()) ||
|
||||
instr->opcode == aco_opcode::p_logical_end;
|
||||
for (const Definition& definition : instr->definitions) {
|
||||
if (!definition.isTemp())
|
||||
continue;
|
||||
|
|
@ -161,13 +151,13 @@ get_block_needs(wqm_ctx& ctx, exec_ctx& exec_ctx, Block* block)
|
|||
if (instr->isBranch() && ctx.branch_wqm[block->index]) {
|
||||
assert(!(info.block_needs & Exact_Branch));
|
||||
needs = WQM;
|
||||
propagate_wqm = true;
|
||||
}
|
||||
|
||||
if (propagate_wqm) {
|
||||
needs = pred_by_exec ? WQM : Unspecified;
|
||||
for (const Operand& op : instr->operands) {
|
||||
if (op.isTemp()) {
|
||||
set_needs_wqm(ctx, op.getTemp());
|
||||
ctx.needs_wqm[op.tempId()] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -175,12 +165,6 @@ get_block_needs(wqm_ctx& ctx, exec_ctx& exec_ctx, Block* block)
|
|||
needs = pred_by_exec ? WQM : Unspecified;
|
||||
}
|
||||
|
||||
if ((instr->opcode == aco_opcode::p_logical_end && ctx.branch_wqm[block->index]) ||
|
||||
instr->opcode == aco_opcode::p_wqm) {
|
||||
assert(needs != Exact);
|
||||
needs = WQM;
|
||||
}
|
||||
|
||||
instr_needs[i] = needs;
|
||||
info.block_needs |= needs;
|
||||
}
|
||||
|
|
@ -194,67 +178,6 @@ get_block_needs(wqm_ctx& ctx, exec_ctx& exec_ctx, Block* block)
|
|||
}
|
||||
}
|
||||
|
||||
/* If an outer loop needs WQM but a nested loop does not, we have to ensure that
|
||||
* the nested loop is done in WQM so that the exec is not empty upon entering
|
||||
* the nested loop.
|
||||
*
|
||||
* TODO: This could be fixed with slightly better code (for loops with divergent
|
||||
* breaks, which might benefit from being in exact) by adding Exact_Branch to a
|
||||
* divergent branch surrounding the nested loop, if such a branch exists.
|
||||
*/
|
||||
void
|
||||
handle_wqm_loops(wqm_ctx& ctx, exec_ctx& exec_ctx, unsigned preheader)
|
||||
{
|
||||
for (unsigned idx = preheader + 1; idx < exec_ctx.program->blocks.size(); idx++) {
|
||||
Block& block = exec_ctx.program->blocks[idx];
|
||||
if (block.kind & block_kind_break)
|
||||
mark_block_wqm(ctx, idx);
|
||||
|
||||
if ((block.kind & block_kind_loop_exit) && block.loop_nest_depth == 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* If an outer loop and it's nested loops does not need WQM,
|
||||
* add_branch_code() will ensure that it enters in Exact. We have to
|
||||
* ensure that the exact exec mask is not empty by adding Exact_Branch to
|
||||
* the outer divergent branch.
|
||||
*/
|
||||
void
|
||||
handle_exact_loops(wqm_ctx& ctx, exec_ctx& exec_ctx, unsigned preheader)
|
||||
{
|
||||
assert(exec_ctx.program->blocks[preheader + 1].kind & block_kind_loop_header);
|
||||
|
||||
int parent_branch = preheader;
|
||||
unsigned rel_branch_depth = 0;
|
||||
for (; parent_branch >= 0; parent_branch--) {
|
||||
Block& branch = exec_ctx.program->blocks[parent_branch];
|
||||
if (branch.kind & block_kind_branch) {
|
||||
if (rel_branch_depth == 0)
|
||||
break;
|
||||
rel_branch_depth--;
|
||||
}
|
||||
|
||||
/* top-level blocks should never have empty exact exec masks */
|
||||
if (branch.kind & block_kind_top_level)
|
||||
return;
|
||||
|
||||
if (branch.kind & block_kind_merge)
|
||||
rel_branch_depth++;
|
||||
}
|
||||
assert(parent_branch >= 0);
|
||||
|
||||
ASSERTED Block& branch = exec_ctx.program->blocks[parent_branch];
|
||||
assert(branch.kind & block_kind_branch);
|
||||
if (ctx.branch_wqm[parent_branch]) {
|
||||
/* The branch can't be done in Exact because some other blocks in it
|
||||
* are in WQM. So instead, ensure that the loop is done in WQM. */
|
||||
handle_wqm_loops(ctx, exec_ctx, preheader);
|
||||
} else {
|
||||
exec_ctx.info[parent_branch].block_needs |= Exact_Branch;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
calculate_wqm_needs(exec_ctx& exec_ctx)
|
||||
{
|
||||
|
|
@ -266,27 +189,6 @@ calculate_wqm_needs(exec_ctx& exec_ctx)
|
|||
|
||||
Block& block = exec_ctx.program->blocks[block_index];
|
||||
get_block_needs(ctx, exec_ctx, &block);
|
||||
|
||||
/* handle_exact_loops() needs information on outer branches, so don't
|
||||
* handle loops until a top-level block.
|
||||
*/
|
||||
if (block.kind & block_kind_top_level && block.index != exec_ctx.program->blocks.size() - 1) {
|
||||
unsigned preheader = block.index;
|
||||
do {
|
||||
Block& preheader_block = exec_ctx.program->blocks[preheader];
|
||||
if ((preheader_block.kind & block_kind_loop_preheader) &&
|
||||
preheader_block.loop_nest_depth == 0) {
|
||||
/* If the loop or a nested loop needs WQM, branch_wqm will be true for the
|
||||
* preheader.
|
||||
*/
|
||||
if (ctx.branch_wqm[preheader])
|
||||
handle_wqm_loops(ctx, exec_ctx, preheader);
|
||||
else
|
||||
handle_exact_loops(ctx, exec_ctx, preheader);
|
||||
}
|
||||
preheader++;
|
||||
} while (!(exec_ctx.program->blocks[preheader].kind & block_kind_top_level));
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t ever_again_needs = 0;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue