aco: don't emit parallelcopy when switching to WQM.

The reason was an RA bug which has been fixed a while ago.
This change fixes some register demand miscalculations.

Totals from 1013 (0.73% of 139391) affected shaders (NAVI10):
CodeSize: 6050408 -> 6047504 (-0.05%); split: -0.05%, +0.00%
Instrs: 1160533 -> 1159765 (-0.07%); split: -0.07%, +0.00%
Cycles: 8027212 -> 8024140 (-0.04%); split: -0.04%, +0.00%
VMEM: 296195 -> 296091 (-0.04%)
SMEM: 73003 -> 73011 (+0.01%); split: +0.05%, -0.04%
SClause: 37221 -> 37222 (+0.00%)
Copies: 70931 -> 70166 (-1.08%)

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7903>
This commit is contained in:
Daniel Schürmann 2020-08-12 14:52:04 +02:00
parent f815b87e18
commit 2116b4504e

View file

@ -323,14 +323,7 @@ void transition_to_WQM(exec_ctx& ctx, Builder bld, unsigned idx)
return;
if (ctx.info[idx].exec.back().second & mask_type_global) {
Temp exec_mask = ctx.info[idx].exec.back().first;
/* TODO: we might generate better code if we pass the uncopied "exec_mask"
* directly to the s_wqm (we still need to keep this parallelcopy for
* potential later uses of exec_mask though). We currently can't do this
* because of a RA bug. */
exec_mask = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(bld.lm), bld.exec(exec_mask));
ctx.info[idx].exec.back().first = exec_mask;
exec_mask = bld.sop1(Builder::s_wqm, bld.def(bld.lm, exec), bld.def(s1, scc), exec_mask);
exec_mask = bld.sop1(Builder::s_wqm, bld.def(bld.lm, exec), bld.def(s1, scc), bld.exec(exec_mask));
ctx.info[idx].exec.emplace_back(exec_mask, mask_type_global | mask_type_wqm);
return;
}