From 45845ea7f2ba4d7bfa99f9748a9e38f55183188c Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 20 Apr 2026 13:45:27 -0400 Subject: [PATCH] jay/ra: use accumulator for stride=4 swaps SIMD16: Totals: Instrs: 2767930 -> 2767190 (-0.03%) CodeSize: 44327408 -> 44312304 (-0.03%); split: -0.04%, +0.00% Totals from 142 (5.36% of 2647) affected shaders: Instrs: 658928 -> 658188 (-0.11%) CodeSize: 10514512 -> 10499408 (-0.14%); split: -0.16%, +0.01% SIMD32: Totals: Instrs: 4884039 -> 4858179 (-0.53%) CodeSize: 79079008 -> 78651424 (-0.54%); split: -0.54%, +0.00% Totals from 761 (28.75% of 2647) affected shaders: Instrs: 3803274 -> 3777414 (-0.68%) CodeSize: 61707728 -> 61280144 (-0.69%); split: -0.70%, +0.00% Signed-off-by: Alyssa Rosenzweig Part-of: --- .../compiler/jay/jay_register_allocate.c | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/intel/compiler/jay/jay_register_allocate.c b/src/intel/compiler/jay/jay_register_allocate.c index e1ee4211185..ae998521829 100644 --- a/src/intel/compiler/jay/jay_register_allocate.c +++ b/src/intel/compiler/jay/jay_register_allocate.c @@ -595,18 +595,28 @@ jay_emit_parallel_copies(jay_builder *b, jay_def dst = def_from_reg(copy->dst), src = def_from_reg(copy->src); assert(dst.file == src.file); enum jay_file file = dst.file; - struct jay_temp_regs t = { .gpr = temps.gpr2, .ugpr = temps.ugpr2 }; - jay_def temp_backing = jay_null(); - jay_def temp = - push_temp(b, temps, file == GPR || file == MEM ? GPR : UGPR, - file == MEM /* stride4 */, true /* outer */, - &temp_backing, dst, src); - { + + if (file == GPR && + jay_def_stride(b->shader, dst) == JAY_STRIDE_4 && + jay_def_stride(b->shader, src) == JAY_STRIDE_4) { + + /* If everything is stride=4, swapping is easy */ + jay_def acc = jay_bare_reg(ACCUM, 2); + jay_MOV(b, acc, dst)->type = JAY_TYPE_F32; + jay_MOV(b, dst, src)->type = JAY_TYPE_F32; + jay_MOV(b, src, acc)->type = JAY_TYPE_F32; + } else { + struct jay_temp_regs t = { .gpr = temps.gpr2, .ugpr = temps.ugpr2 }; + jay_def temp_backing = jay_null(); + jay_def temp = + push_temp(b, temps, file == GPR || file == MEM ? GPR : UGPR, + file == MEM /* stride4 */, true /* outer */, + &temp_backing, dst, src); mov(b, temp, dst, t); mov(b, dst, src, t); mov(b, src, temp, t); + pop_temp(b, temp, temp_backing); } - pop_temp(b, temp, temp_backing); for (unsigned j = 0; j < num_copies; j++) { if (pcopies[j].src == copy->dst)