mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 20:10:14 +01:00
aco: only create parallelcopy to restore exec at loop exit if needed
The operand isn't fixed to exec, which can mess up the spiller. This also
adds a new situation where a phi is needed.
Fixes dEQP-VK.ssbo.layout.random.descriptor_indexing.2 and an assertion
when compiling a Detroit: Become Human shader.
Fixes: 93c8ebfa ('aco: Initial commit of independent AMD compiler')
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3257>
This commit is contained in:
parent
4537b97410
commit
dba71de5c6
1 changed files with 29 additions and 2 deletions
|
|
@ -487,6 +487,7 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
|
||||||
assert(!(block->kind & block_kind_top_level) || info.num_exec_masks <= 2);
|
assert(!(block->kind & block_kind_top_level) || info.num_exec_masks <= 2);
|
||||||
|
|
||||||
/* create the loop exit phis if not trivial */
|
/* create the loop exit phis if not trivial */
|
||||||
|
bool need_parallelcopy = false;
|
||||||
for (unsigned k = 0; k < info.num_exec_masks; k++) {
|
for (unsigned k = 0; k < info.num_exec_masks; k++) {
|
||||||
Temp same = ctx.info[preds[0]].exec[k].first;
|
Temp same = ctx.info[preds[0]].exec[k].first;
|
||||||
uint8_t type = ctx.info[header_preds[0]].exec[k].second;
|
uint8_t type = ctx.info[header_preds[0]].exec[k].second;
|
||||||
|
|
@ -497,12 +498,31 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
|
||||||
trivial = false;
|
trivial = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (k == info.num_exec_masks - 1u) {
|
||||||
|
bool all_liveout_exec = true;
|
||||||
|
bool all_not_liveout_exec = true;
|
||||||
|
for (unsigned pred : preds) {
|
||||||
|
all_liveout_exec = all_liveout_exec && same == ctx.program->blocks[pred].live_out_exec;
|
||||||
|
all_not_liveout_exec = all_not_liveout_exec && same != ctx.program->blocks[pred].live_out_exec;
|
||||||
|
}
|
||||||
|
if (!all_liveout_exec && !all_not_liveout_exec)
|
||||||
|
trivial = false;
|
||||||
|
else if (all_not_liveout_exec)
|
||||||
|
need_parallelcopy = true;
|
||||||
|
|
||||||
|
need_parallelcopy |= !trivial;
|
||||||
|
}
|
||||||
|
|
||||||
if (trivial) {
|
if (trivial) {
|
||||||
ctx.info[idx].exec.emplace_back(same, type);
|
ctx.info[idx].exec.emplace_back(same, type);
|
||||||
} else {
|
} else {
|
||||||
/* create phi for loop footer */
|
/* create phi for loop footer */
|
||||||
aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)};
|
aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)};
|
||||||
phi->definitions[0] = bld.def(bld.lm);
|
phi->definitions[0] = bld.def(bld.lm);
|
||||||
|
if (k == info.num_exec_masks - 1) {
|
||||||
|
phi->definitions[0].setFixed(exec);
|
||||||
|
need_parallelcopy = false;
|
||||||
|
}
|
||||||
for (unsigned i = 0; i < phi->operands.size(); i++)
|
for (unsigned i = 0; i < phi->operands.size(); i++)
|
||||||
phi->operands[i] = Operand(ctx.info[preds[i]].exec[k].first);
|
phi->operands[i] = Operand(ctx.info[preds[i]].exec[k].first);
|
||||||
ctx.info[idx].exec.emplace_back(bld.insert(std::move(phi)), type);
|
ctx.info[idx].exec.emplace_back(bld.insert(std::move(phi)), type);
|
||||||
|
|
@ -533,8 +553,15 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(ctx.info[idx].exec.back().first.size() == bld.lm.size());
|
assert(ctx.info[idx].exec.back().first.size() == bld.lm.size());
|
||||||
ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(bld.lm, exec),
|
if (need_parallelcopy) {
|
||||||
ctx.info[idx].exec.back().first);
|
/* only create this parallelcopy is needed, since the operand isn't
|
||||||
|
* fixed to exec which causes the spiller to miscalculate register demand */
|
||||||
|
/* TODO: Fix register_demand calculation for spilling on loop exits.
|
||||||
|
* The problem is only mitigated because the register demand could be
|
||||||
|
* higher if the exec phi doesn't get assigned to exec. */
|
||||||
|
ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(bld.lm, exec),
|
||||||
|
ctx.info[idx].exec.back().first);
|
||||||
|
}
|
||||||
|
|
||||||
ctx.loop.pop_back();
|
ctx.loop.pop_back();
|
||||||
return i;
|
return i;
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue