From 820e3a94030bf2bab3333f8585bb900d9572c912 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 13 Apr 2026 13:49:29 -0400 Subject: [PATCH] jay: roundrobin RA Totals: Instrs: 2827788 -> 2810815 (-0.60%); split: -1.12%, +0.52% CodeSize: 45449488 -> 45101440 (-0.77%); split: -1.28%, +0.51% Number of spill instructions: 1984 -> 1982 (-0.10%) Number of fill instructions: 2272 -> 2270 (-0.09%) Totals from 2449 (92.52% of 2647) affected shaders: Instrs: 2818824 -> 2801851 (-0.60%); split: -1.13%, +0.53% CodeSize: 45314880 -> 44966832 (-0.77%); split: -1.28%, +0.51% Number of spill instructions: 1984 -> 1982 (-0.10%) Number of fill instructions: 2272 -> 2270 (-0.09%) Signed-off-by: Alyssa Rosenzweig Part-of: --- .../compiler/jay/jay_register_allocate.c | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/jay/jay_register_allocate.c b/src/intel/compiler/jay/jay_register_allocate.c index 65cbf05c080..561581fb943 100644 --- a/src/intel/compiler/jay/jay_register_allocate.c +++ b/src/intel/compiler/jay/jay_register_allocate.c @@ -288,6 +288,9 @@ typedef struct jay_ra_state { /** Size of each register file */ unsigned num_regs[JAY_NUM_RA_FILES]; + /** Counter for roundrobin register allocation */ + unsigned roundrobin[JAY_NUM_RA_FILES]; + /** First GPR that may be used for EOT sends */ unsigned eot_offs; @@ -764,8 +767,23 @@ pick_regs(jay_ra_state *ra, ra->phi_web[phi_web_find(ra->phi_web, jay_channel(var, 0))].affinity; assert(alignment >= size && "alignment must be a multiple of size"); + unsigned nr = DIV_ROUND_UP((end + 1 - size - first), alignment); + unsigned roundrobin = (ra->roundrobin[file]++) % nr; + unsigned rr_al = roundrobin * alignment, nr_al = nr * alignment; + + for (unsigned i = rr_al; i < rr_al + nr_al; i += alignment) { + /* We select registers roundrobin. This has several benefits: + * + * 1. Easier coalescing since we are less likely statistically to allocate + * a register that a future instruction has an affinity. + * + * 2. More freedom for post-RA scheduling thanks to fewer dependencies. + * + * 3. Less stalling due to SWSB annotations from register reuse. + */ + unsigned r = first + (i >= nr_al ? (i - nr_al) : i); + assert(r >= first && r + size <= end); - for (unsigned r = first; r + size <= end; r += alignment) { unsigned cost = 0; bool tied = last_killed && last_killed->reg == r; enum jay_stride stride =