From f98e2b24bca4566a30cc76f642ee9457c1e592f6 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 15 Apr 2026 10:57:38 -0400 Subject: [PATCH] jay: fix the source pinning code I was just trying to get rid of the loop but it also generates better code. Totals: Instrs: 2806469 -> 2798036 (-0.30%); split: -0.33%, +0.02% CodeSize: 44950448 -> 44815024 (-0.30%); split: -0.32%, +0.02% Totals from 143 (5.40% of 2647) affected shaders: Instrs: 665554 -> 657121 (-1.27%); split: -1.37%, +0.10% CodeSize: 10611344 -> 10475920 (-1.28%); split: -1.37%, +0.10% Signed-off-by: Alyssa Rosenzweig Part-of: --- .../compiler/jay/jay_register_allocate.c | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/intel/compiler/jay/jay_register_allocate.c b/src/intel/compiler/jay/jay_register_allocate.c index c8b1096b66d..395303e4928 100644 --- a/src/intel/compiler/jay/jay_register_allocate.c +++ b/src/intel/compiler/jay/jay_register_allocate.c @@ -326,12 +326,14 @@ typedef struct jay_ra_state { BITSET_WORD *available_regs[JAY_NUM_RA_FILES]; /** - * Within assign_regs_for_inst, the set of registers that have respectively - * been 1. assigned and therefore pinned; 2. the base of a killed source. + * Within assign_regs_for_inst, the set of registers that are respectively + * 1. assigned and therefore pinned; 2. the base of a killed source; 3. used + * as sources not yet processed. * * Invariant: zeroed on entry to assign_regs_for_inst. */ - BITSET_WORD *pinned[JAY_NUM_RA_FILES], *killed[JAY_NUM_RA_FILES]; + BITSET_WORD *pinned[JAY_NUM_RA_FILES], *killed[JAY_NUM_RA_FILES], + *sources[JAY_NUM_RA_FILES]; /** Vector affinities for each def. */ struct affinity *affinities; @@ -853,17 +855,9 @@ pick_regs(jay_ra_state *ra, } /* Choosing this register will pin it, leaving it unavailable to later - * smaller sources which will need to be shuffled. Account for those - * moves. - * - * TODO: Faster algorithm. + * smaller sources which will need a move. */ - jay_foreach_src_index(I, s, c, index) { - if (jay_num_values(I->src[s]) < size && - ra->reg_for_index[index] == make_reg(file, i)) { - cost++; - } - } + cost += BITSET_TEST(ra->sources[file], i); } if (cost < best_cost) { @@ -927,7 +921,7 @@ assign_regs_for_inst(jay_ra_state *ra, jay_inst *I) */ jay_foreach_index(I->src[s], _, index) { jay_reg reg = current_reg(ra, index); - assert(reg != NO_REG); + BITSET_SET(ra->sources[r_file(reg)], r_reg(reg)); eviction_indices[nr_copies] = index; copies[nr_copies++] = (struct jay_parallel_copy) { .src = reg }; @@ -1003,6 +997,10 @@ assign_regs_for_inst(jay_ra_state *ra, jay_inst *I) break; } } + + jay_foreach_index(var, c, index) { + BITSET_CLEAR(ra->sources[file], r_reg(ra->reg_for_index[index])); + } } else { alignment = MAX2(alignment, jay_dst_alignment(shader, I)); min_stride = jay_dst_stride_minmax(I, false); @@ -1606,6 +1604,7 @@ jay_register_allocate_function(jay_function *f) ra.available_regs[file] = BITSET_LINEAR_ZALLOC(lin_ctx, num_regs); ra.pinned[file] = BITSET_LINEAR_ZALLOC(lin_ctx, num_regs); ra.killed[file] = BITSET_LINEAR_ZALLOC(lin_ctx, num_regs); + ra.sources[file] = BITSET_LINEAR_ZALLOC(lin_ctx, num_regs); } ra.phi_web = linear_alloc_array(lin_ctx, struct phi_web_node, f->ssa_alloc);