jay/ra: use accumulator for stride=4 swaps

SIMD16:

   Totals:
   Instrs: 2767930 -> 2767190 (-0.03%)
   CodeSize: 44327408 -> 44312304 (-0.03%); split: -0.04%, +0.00%

   Totals from 142 (5.36% of 2647) affected shaders:
   Instrs: 658928 -> 658188 (-0.11%)
   CodeSize: 10514512 -> 10499408 (-0.14%); split: -0.16%, +0.01%

SIMD32:

   Totals:
   Instrs: 4884039 -> 4858179 (-0.53%)
   CodeSize: 79079008 -> 78651424 (-0.54%); split: -0.54%, +0.00%

   Totals from 761 (28.75% of 2647) affected shaders:
   Instrs: 3803274 -> 3777414 (-0.68%)
   CodeSize: 61707728 -> 61280144 (-0.69%); split: -0.70%, +0.00%

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41064>
This commit is contained in:
Alyssa Rosenzweig 2026-04-20 13:45:27 -04:00 committed by Marge Bot
parent 489f883277
commit 45845ea7f2

View file

@ -595,18 +595,28 @@ jay_emit_parallel_copies(jay_builder *b,
jay_def dst = def_from_reg(copy->dst), src = def_from_reg(copy->src);
assert(dst.file == src.file);
enum jay_file file = dst.file;
struct jay_temp_regs t = { .gpr = temps.gpr2, .ugpr = temps.ugpr2 };
jay_def temp_backing = jay_null();
jay_def temp =
push_temp(b, temps, file == GPR || file == MEM ? GPR : UGPR,
file == MEM /* stride4 */, true /* outer */,
&temp_backing, dst, src);
{
if (file == GPR &&
jay_def_stride(b->shader, dst) == JAY_STRIDE_4 &&
jay_def_stride(b->shader, src) == JAY_STRIDE_4) {
/* If everything is stride=4, swapping is easy */
jay_def acc = jay_bare_reg(ACCUM, 2);
jay_MOV(b, acc, dst)->type = JAY_TYPE_F32;
jay_MOV(b, dst, src)->type = JAY_TYPE_F32;
jay_MOV(b, src, acc)->type = JAY_TYPE_F32;
} else {
struct jay_temp_regs t = { .gpr = temps.gpr2, .ugpr = temps.ugpr2 };
jay_def temp_backing = jay_null();
jay_def temp =
push_temp(b, temps, file == GPR || file == MEM ? GPR : UGPR,
file == MEM /* stride4 */, true /* outer */,
&temp_backing, dst, src);
mov(b, temp, dst, t);
mov(b, dst, src, t);
mov(b, src, temp, t);
pop_temp(b, temp, temp_backing);
}
pop_temp(b, temp, temp_backing);
for (unsigned j = 0; j < num_copies; j++) {
if (pcopies[j].src == copy->dst)