From 3308626e12a9836fb3fd21894ec5eaf54fb053c2 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 20 Apr 2026 17:40:42 -0400 Subject: [PATCH] jay/assign_flags: don't burn a flag for ballots Increases GPR pressure somehow but it's obviously the right thing to do. SIMD16: Totals: Instrs: 2767536 -> 2767381 (-0.01%); split: -0.01%, +0.00% CodeSize: 44323392 -> 40075680 (-9.58%); split: -9.58%, +0.00% Totals from 2147 (81.11% of 2647) affected shaders: Instrs: 2704498 -> 2704343 (-0.01%); split: -0.01%, +0.00% CodeSize: 43477568 -> 39229856 (-9.77%); split: -9.77%, +0.00% SIMD32: Totals: Instrs: 4731031 -> 4746775 (+0.33%); split: -0.33%, +0.67% CodeSize: 76609152 -> 70004080 (-8.62%); split: -8.68%, +0.06% Number of spill instructions: 50110 -> 50187 (+0.15%); split: -0.00%, +0.16% Number of fill instructions: 51341 -> 51804 (+0.90%); split: -0.00%, +0.91% Totals from 2136 (80.70% of 2647) affected shaders: Instrs: 4666677 -> 4682421 (+0.34%); split: -0.34%, +0.67% CodeSize: 75735136 -> 69130064 (-8.72%); split: -8.78%, +0.06% Number of spill instructions: 50108 -> 50185 (+0.15%); split: -0.00%, +0.16% Number of fill instructions: 51339 -> 51802 (+0.90%); split: -0.00%, +0.91% Signed-off-by: Alyssa Rosenzweig Part-of: --- src/intel/compiler/jay/jay_assign_flags.c | 64 ++++++++++++++++------- 1 file changed, 44 insertions(+), 20 deletions(-) diff --git a/src/intel/compiler/jay/jay_assign_flags.c b/src/intel/compiler/jay/jay_assign_flags.c index 327f5c1afd3..1868d8b3946 100644 --- a/src/intel/compiler/jay/jay_assign_flags.c +++ b/src/intel/compiler/jay/jay_assign_flags.c @@ -3,6 +3,7 @@ * SPDX-License-Identifier: MIT */ +#include "util/bitset.h" #include "jay_builder.h" #include "jay_builder_opcodes.h" #include "jay_ir.h" @@ -46,12 +47,13 @@ static_assert(sizeof(struct var_info) == 1); struct flag_ra { jay_builder *b; + BITSET_WORD *ballot_blocks; + jay_block *block; struct var_info *vars; unsigned nr_vars; uint32_t flag_to_global[JAY_MAX_FLAGS]; uint32_t flag_to_local[JAY_MAX_FLAGS]; unsigned roundrobin; - unsigned ballots:JAY_MAX_FLAGS; }; static jay_def @@ -62,16 +64,25 @@ assign_flag(struct flag_ra *ra, bool ballot, jay_def *tie) { + assert(!ballot || BITSET_TEST(ra->ballot_blocks, ra->block->index)); + jay_def canonical = canonicalize_flag(flag); jay_def tmp = jay_alloc_def(ra->b, file, 1); - /* Dedicate a flag for ballot since uniform access would clobber the zeroing. - * TODO: We could optimize this with more tracking. - */ unsigned num_flags = jay_num_regs(ra->b->shader, FLAG); - tmp.reg = tie ? tie->reg : - ballot ? 0 : - (1 + ((ra->roundrobin++) % (num_flags - 1))); + tmp.reg = tie ? tie->reg : ballot ? 0 : ((ra->roundrobin++) % num_flags); + + /* Uniform access (via a UFLAG or an inverse-ballot) would clobber the zero + * for a ballot. We could refine this further but this should be ok for now. + */ + if (!ballot && + tmp.reg == 0 && + BITSET_TEST(ra->ballot_blocks, ra->block->index)) { + + assert(!tie); + tmp.reg = 1; + ra->roundrobin++; + } if (jay_index(canonical) < ra->nr_vars) { ra->vars[jay_index(canonical)] = (struct var_info) { @@ -83,11 +94,6 @@ assign_flag(struct flag_ra *ra, ra->flag_to_global[tmp.reg] = jay_index(canonical); ra->flag_to_local[tmp.reg] = jay_index(tmp); - - if (ballot) { - ra->ballots |= BITFIELD_BIT(tmp.reg); - } - return tmp; } @@ -172,11 +178,11 @@ rewrite_without_flag(struct flag_ra *ra, jay_inst *I, unsigned s, bool in_flag) } static void -assign_block(struct flag_ra *ra, jay_block *block) +assign_block(struct flag_ra *ra) { jay_builder *b = ra->b; - jay_foreach_inst_in_block_safe(block, I) { + jay_foreach_inst_in_block_safe(ra->block, I) { if (I->op == JAY_OPCODE_CAST_CANONICAL_TO_FLAG) { /* Assume the source is already 0/~0 canonical and use it. */ I->op = JAY_OPCODE_MOV; @@ -312,10 +318,10 @@ assign_block(struct flag_ra *ra, jay_block *block) } } - /* Ballots require zeroing flags */ - b->cursor = jay_before_block(block); - u_foreach_bit(i, ra->ballots) { - jay_ZERO_FLAG(b, i); + /* Ballots require zeroing the ballot flag (f0) */ + b->cursor = jay_before_block(ra->block); + if (BITSET_TEST(ra->ballot_blocks, ra->block->index)) { + jay_ZERO_FLAG(b, 0); } } @@ -359,6 +365,7 @@ jay_assign_flags(jay_shader *s) uint32_t nr_vars = f->ssa_alloc; struct var_info *map = calloc(nr_vars, sizeof(map[0])); uint32_t *def_to_block = calloc(nr_vars, sizeof(def_to_block)); + BITSET_WORD *ballot_blocks = BITSET_CALLOC(f->num_blocks); jay_foreach_inst_in_func(f, block, I) { if (!jay_is_null(I->cond_flag)) { @@ -371,12 +378,29 @@ jay_assign_flags(jay_shader *s) map[jay_index(predicate)].read_by_predication = true; } } + + jay_foreach_src(I, s) { + if (jay_is_flag(I->src[s]) && + jay_src_type(I, s) != JAY_TYPE_U1 && + s < I->num_srcs - I->predication) { + + assert(block->index < f->num_blocks); + BITSET_SET(ballot_blocks, block->index); + } + } } jay_foreach_block(f, block) { jay_builder b = { .shader = f->shader, .func = f }; - struct flag_ra ra = { .b = &b, .vars = map, .nr_vars = nr_vars }; - assign_block(&ra, block); + struct flag_ra ra = { + .b = &b, + .vars = map, + .nr_vars = nr_vars, + .ballot_blocks = ballot_blocks, + .block = block, + }; + + assign_block(&ra); } free(map);