aco/insert_exec_mask: Reduce latency when switching to WQM.

Change pattern:
s_mov_b64 s[0:1], exec         s_mov_b64 s[0:1], exec
s_wqm_b64 exec, s[0:1]   ->    s_wqm_b64 exec, exec

Totals from 16667 (21.03% of 79242) affected shaders: (GFX11)

Instrs: 11317502 -> 11307484 (-0.09%); split: -0.09%, +0.00%
CodeSize: 60194272 -> 60155088 (-0.07%); split: -0.07%, +0.00%
Latency: 94345873 -> 94338374 (-0.01%); split: -0.01%, +0.00%
InvThroughput: 13568872 -> 13568683 (-0.00%); split: -0.00%, +0.00%
Copies: 808334 -> 808332 (-0.00%)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27112>
This commit is contained in:
Daniel Schürmann 2024-01-30 18:26:33 +01:00 committed by Marge Bot
parent e89977ff71
commit 4fa27845e5

View file

@ -121,10 +121,8 @@ transition_to_WQM(exec_ctx& ctx, Builder bld, unsigned idx)
return;
if (ctx.info[idx].exec.back().second & mask_type_global) {
Operand exec_mask = ctx.info[idx].exec.back().first;
if (exec_mask.isUndefined()) {
exec_mask = bld.copy(bld.def(bld.lm), Operand(exec, bld.lm));
ctx.info[idx].exec.back().first = exec_mask;
}
if (exec_mask.isUndefined())
ctx.info[idx].exec.back().first = bld.copy(bld.def(bld.lm), Operand(exec, bld.lm));
exec_mask = bld.sop1(Builder::s_wqm, Definition(exec, bld.lm), bld.def(s1, scc),
get_exec_op(exec_mask));