From ad040f2fbb1b7d8b12e0a687f8837d90e8f79d07 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 22 Apr 2026 10:53:46 -0400 Subject: [PATCH] jay: introduce a physical control flow graph Consider: u0 = foo() if (divergent) { u0 = bar() r0 = baz(u0) } else { r0 = quux(u0) } Logically, this is fine, there is no interference between bar() and u0. But physically, both sides of the if execute so the bar() write to u0 overwrites the variable the else reads. So this is a miscompile. The solution is to model the extra edges in the physical control flow graph, which lives next to the existing logical control flow graph. Liveness for UGPRs now follows the physical CFG, while liveness for GPRs continues to follow the logical CFG. That models the interference properly, while still allowing phis to work as before (since phis writing UGPRs follow uniform bits of control flow that are necessarily critical edge free for the same reason the logical CFG is). Because our RA copies shuffled registers back at block ends (following Colombet), there's no issue with live range splits here (unlike aco which inserts phis for this case and then needs to worry about critical edges around those phis). There might still be an extremely-challenging-to-hit bug here with UGPR spilling which I need to think more about. It might be fine as-is? Not convinced though. But this is big enough and strictly less broken than what we have right now and the full solution will build on this, so here we are. Fixes artefating in SuperTuxKart and Celestia knows what else. Totals: Instrs: 2770938 -> 2771269 (+0.01%); split: -0.00%, +0.02% CodeSize: 40133712 -> 40138480 (+0.01%); split: -0.01%, +0.02% Totals from 158 (5.97% of 2647) affected shaders: Instrs: 514523 -> 514854 (+0.06%); split: -0.02%, +0.09% CodeSize: 7603040 -> 7607808 (+0.06%); split: -0.03%, +0.09% Signed-off-by: Alyssa Rosenzweig Part-of: --- src/intel/compiler/jay/jay_builder.h | 12 ++- src/intel/compiler/jay/jay_from_nir.c | 47 ++++++---- src/intel/compiler/jay/jay_insert_fp_mode.c | 2 +- src/intel/compiler/jay/jay_ir.h | 93 +++++++++++++------ src/intel/compiler/jay/jay_liveness.c | 27 +++++- src/intel/compiler/jay/jay_lower_spill.c | 2 +- src/intel/compiler/jay/jay_opt_control_flow.c | 4 +- src/intel/compiler/jay/jay_print.c | 20 ++-- src/intel/compiler/jay/jay_repair_ssa.c | 18 ++-- src/intel/compiler/jay/jay_spill.c | 24 ++--- src/intel/compiler/jay/jay_validate.c | 23 +++-- src/intel/compiler/jay/jay_validate_ra.c | 18 ++-- .../compiler/jay/test/test-repair-ssa.cpp | 28 +++--- 13 files changed, 206 insertions(+), 112 deletions(-) diff --git a/src/intel/compiler/jay/jay_builder.h b/src/intel/compiler/jay/jay_builder.h index b0e401fc7a3..2f4f768dd25 100644 --- a/src/intel/compiler/jay/jay_builder.h +++ b/src/intel/compiler/jay/jay_builder.h @@ -106,10 +106,12 @@ jay_before_function(jay_function *f) * have other predecessorss since there are no critical edges. */ static inline jay_block * -jay_edge_to_block(jay_block *pred, jay_block *succ) +jay_edge_to_block(jay_block *pred, jay_block *succ, enum jay_file file) { - assert(jay_num_successors(pred) == 1 || jay_num_predecessors(succ) == 1); - return jay_num_successors(pred) == 1 ? pred : succ; + assert(jay_num_successors(pred, file) == 1 || + jay_num_predecessors(succ, file) == 1); + + return jay_num_successors(pred, file) == 1 ? pred : succ; } /* @@ -118,9 +120,9 @@ jay_edge_to_block(jay_block *pred, jay_block *succ) * flow graph having no critical edges. */ static inline jay_cursor -jay_along_edge(jay_block *pred, jay_block *succ) +jay_along_edge(jay_block *pred, jay_block *succ, enum jay_file file) { - jay_block *to = jay_edge_to_block(pred, succ); + jay_block *to = jay_edge_to_block(pred, succ, file); if (to == pred) return jay_after_block_logical(pred); diff --git a/src/intel/compiler/jay/jay_from_nir.c b/src/intel/compiler/jay/jay_from_nir.c index 9c6d773f98b..c1fa3d1990c 100644 --- a/src/intel/compiler/jay/jay_from_nir.c +++ b/src/intel/compiler/jay/jay_from_nir.c @@ -2042,7 +2042,7 @@ jay_emit_jump(struct nir_to_jay_state *nj, nir_jump_instr *instr) { switch (instr->type) { case nir_jump_break: - jay_block_add_successor(nj->current_block, nj->break_block); + jay_block_add_successor(nj->current_block, nj->break_block, GPR); jay_BREAK(&nj->bld); break; case nir_jump_halt: @@ -2144,14 +2144,22 @@ jay_emit_if(struct nir_to_jay_state *nj, nir_if *nif) /* Pop */ --nj->indent; - jay_block_add_successor(before_block, then_first); - jay_block_add_successor(before_block, else_first); + bool uniform = jay_is_uniform(condition); + + /* Logical CFG edges */ + jay_block_add_successor(before_block, then_first, GPR); + jay_block_add_successor(before_block, else_first, GPR); if (!jay_block_ending_unconditional_jump(then_last)) - jay_block_add_successor(then_last, after_block); + jay_block_add_successor(then_last, after_block, GPR); if (!jay_block_ending_unconditional_jump(else_last)) - jay_block_add_successor(else_last, after_block); + jay_block_add_successor(else_last, after_block, GPR); + + /* For a non-uniform IF, we fall through both sides in the physical CFG */ + if (!uniform) { + jay_block_add_successor(then_last, else_first, UGPR); + } nj->after_block = after_block; @@ -2183,7 +2191,7 @@ jay_emit_loop(struct nir_to_jay_state *nj, nir_loop *nloop) loop_header->loop_header = true; /* The current block falls through to the start of the loop */ - jay_block_add_successor(nj->current_block, loop_header); + jay_block_add_successor(nj->current_block, loop_header, GPR); /* Emit the loop body */ nj->after_block = loop_header; @@ -2194,7 +2202,7 @@ jay_emit_loop(struct nir_to_jay_state *nj, nir_loop *nloop) if (jump && jump->op == JAY_OPCODE_BREAK) { jump->op = JAY_OPCODE_LOOP_ONCE; } else { - jay_block_add_successor(nj->current_block, loop_header); + jay_block_add_successor(nj->current_block, loop_header, GPR); jay_WHILE(b); } @@ -2508,9 +2516,9 @@ jay_setup_payload(struct nir_to_jay_state *nj) } /* - * NIR sometimes contains unreachable blocks (e.g. due to infinite loops). These - * blocks have no predecessors, but do have successors and can contribute to - * phis. They are dead and violate the IR invariant: + * NIR sometimes contains logically unreachable blocks (e.g. due to infinite + * loops). These blocks have no predecessors, but do have successors and can + * contribute to phis. They are dead and violate the IR invariant: * * Live-in sources are live-out in all predecessors. * @@ -2530,16 +2538,23 @@ jay_remove_unreachable_blocks(jay_function *func) jay_foreach_block(func, pred) { if (pred != jay_first_block(func) && - jay_num_predecessors(pred) == 0 && - jay_num_successors(pred) > 0) { + jay_num_predecessors(pred, GPR) == 0 && + jay_num_successors(pred, GPR) > 0) { - jay_foreach_successor(pred, succ) { - util_dynarray_delete_unordered(&succ->predecessors, jay_block *, + jay_foreach_successor(pred, succ, GPR) { + util_dynarray_delete_unordered(&succ->logical_preds, jay_block *, pred); } - pred->successors[0] = NULL; - pred->successors[1] = NULL; + jay_foreach_successor(pred, succ, UGPR) { + util_dynarray_delete_unordered(&succ->physical_preds, + jay_block *, pred); + } + + pred->logical_succs[0] = NULL; + pred->logical_succs[1] = NULL; + pred->physical_succs[0] = NULL; + pred->physical_succs[1] = NULL; progress = true; } } diff --git a/src/intel/compiler/jay/jay_insert_fp_mode.c b/src/intel/compiler/jay/jay_insert_fp_mode.c index a51c974ad3f..fde92477f44 100644 --- a/src/intel/compiler/jay/jay_insert_fp_mode.c +++ b/src/intel/compiler/jay/jay_insert_fp_mode.c @@ -79,7 +79,7 @@ jay_insert_fp_mode(jay_shader *shader, uint32_t api, uint32_t float_sizes) } /* Restore to global state on block boundaries */ - if (jay_num_successors(block) > 0) { + if (jay_num_successors(block, GPR) > 0) { set_cr0(func, jay_after_block(block), ¤t, cr0); } } diff --git a/src/intel/compiler/jay/jay_ir.h b/src/intel/compiler/jay/jay_ir.h index 524e4b06650..fc6de0aad09 100644 --- a/src/intel/compiler/jay/jay_ir.h +++ b/src/intel/compiler/jay/jay_ir.h @@ -443,16 +443,19 @@ jay_abs(jay_def src) return src; } +static inline bool +jay_file_is_uniform(enum jay_file file) +{ + return file == UGPR || file == UFLAG || file == UACCUM || file == J_IMM; +} + /** * Returns true if the given source reads the same value in all lanes. */ static inline bool jay_is_uniform(jay_def d) { - return d.file == UGPR || - d.file == UFLAG || - d.file == UACCUM || - jay_is_imm(d); + return jay_file_is_uniform(d.file); } /** @@ -1077,8 +1080,8 @@ typedef struct jay_block { struct list_head instructions; /** Control flow graph */ - struct jay_block *successors[2]; - struct util_dynarray predecessors; + struct jay_block *logical_succs[2], *physical_succs[2]; + struct util_dynarray logical_preds, physical_preds; /** Index of the block in source order */ unsigned index; @@ -1112,7 +1115,8 @@ jay_new_block(jay_function *f) { jay_block *block = rzalloc(f, jay_block); - util_dynarray_init(&block->predecessors, block); + util_dynarray_init(&block->logical_preds, block); + util_dynarray_init(&block->physical_preds, block); list_inithead(&block->instructions); block->index = f->num_blocks++; @@ -1137,26 +1141,42 @@ jay_block_ending_jump(jay_block *block) return last && jay_op_is_control_flow(last->op) ? last : NULL; } -static inline unsigned -jay_num_predecessors(jay_block *block) +static inline struct util_dynarray * +jay_predecessors(jay_block *blk, enum jay_file file) { - return util_dynarray_num_elements(&block->predecessors, jay_block *); + return jay_file_is_uniform(file) ? &blk->physical_preds : + &blk->logical_preds; +} + +static inline jay_block ** +jay_successors(jay_block *blk, enum jay_file file) +{ + return jay_file_is_uniform(file) ? blk->physical_succs : blk->logical_succs; } static inline unsigned -jay_num_successors(jay_block *block) +jay_num_predecessors(jay_block *blk, enum jay_file file) { - static_assert(ARRAY_SIZE(block->successors) == 2); - return !!block->successors[0] + !!block->successors[1]; + return util_dynarray_num_elements(jay_predecessors(blk, file), jay_block *); +} + +static inline unsigned +jay_num_successors(jay_block *block, enum jay_file file) +{ + static_assert(ARRAY_SIZE(block->logical_succs) == 2); + static_assert(ARRAY_SIZE(block->physical_succs) == 2); + + return !!jay_successors(block, file)[0] + !!jay_successors(block, file)[1]; } static inline jay_block * -jay_first_predecessor(jay_block *block) +jay_first_predecessor(jay_block *block, enum jay_file file) { - if (jay_num_predecessors(block) == 0) + if (jay_num_predecessors(block, file) == 0) return NULL; - return *util_dynarray_element(&block->predecessors, struct jay_block *, 0); + return *util_dynarray_element(jay_predecessors(block, file), + struct jay_block *, 0); } /* Block worklist helpers */ @@ -1230,14 +1250,23 @@ jay_first_predecessor(jay_block *block) jay_foreach_function(s, func) \ jay_foreach_inst_in_func_safe(func, v_block, inst) -#define jay_foreach_successor(blk, v) \ - jay_block *v; \ - jay_block **_v; \ - for (_v = (jay_block **) &blk->successors[0], v = *_v; \ - v != NULL && _v < (jay_block **) &blk->successors[2]; _v++, v = *_v) +/* + * Get the next successor, using the fact that there are at most 2 successors + * and NULL successors cannot precede non-NULL successors. + */ +static inline jay_block * +jay_next_successor(jay_block *parent, enum jay_file file, jay_block *it) +{ + jay_block **succs = jay_successors(parent, file); + return succs[0] == it ? succs[1] : NULL; +} -#define jay_foreach_predecessor(blk, v) \ - util_dynarray_foreach(&blk->predecessors, jay_block *, v) +#define jay_foreach_successor(blk, v, file) \ + for (jay_block *v = jay_successors(blk, file)[0]; v != NULL; \ + v = jay_next_successor(blk, file, v)) + +#define jay_foreach_predecessor(blk, v, file) \ + util_dynarray_foreach(jay_predecessors(blk, file), jay_block *, v) #define jay_foreach_src(inst, s) for (unsigned s = 0; s < inst->num_srcs; ++s) @@ -1343,15 +1372,21 @@ jay_next_block(jay_block *block) } static inline void -jay_block_add_successor(jay_block *block, jay_block *succ) +jay_block_add_successor(jay_block *block, jay_block *succ, enum jay_file file) { - unsigned i = block->successors[0] ? 1 : 0; + jay_block **succs = jay_successors(block, file); + unsigned i = succs[0] ? 1 : 0; - assert(succ && block->successors[0] != succ && block->successors[1] != succ); - assert(block->successors[i] == NULL && "at most 2 successors"); + assert(succ && succs[0] != succ && succs[1] != succ); + assert(succs[i] == NULL && "at most 2 successors"); - block->successors[i] = succ; - util_dynarray_append(&(succ->predecessors), block); + succs[i] = succ; + util_dynarray_append(jay_predecessors(succ, file), block); + + /* All logical CFG edges are also physical CFG edges */ + if (file == GPR) { + jay_block_add_successor(block, succ, UGPR); + } } static inline unsigned diff --git a/src/intel/compiler/jay/jay_liveness.c b/src/intel/compiler/jay/jay_liveness.c index ebe89f7504f..50d1c359663 100644 --- a/src/intel/compiler/jay/jay_liveness.c +++ b/src/intel/compiler/jay/jay_liveness.c @@ -66,6 +66,7 @@ jay_compute_liveness(jay_function *f) ralloc_free(f->dead_defs); f->dead_defs = BITSET_RZALLOC(f, f->ssa_alloc); + BITSET_WORD *uniform = BITSET_CALLOC(f->ssa_alloc); jay_foreach_block(f, block) { u_sparse_bitset_free(&block->live_in); @@ -77,6 +78,14 @@ jay_compute_liveness(jay_function *f) jay_worklist_push_head(&worklist, block); } + jay_foreach_inst_in_func(f, _, I) { + jay_foreach_dst_index(I, dst, index) { + if (jay_is_uniform(dst)) { + BITSET_SET(uniform, index); + } + } + } + while (!u_worklist_is_empty(&worklist)) { /* Pop in reverse order since liveness is a backwards pass */ jay_block *block = jay_worklist_pop_head(&worklist); @@ -94,9 +103,20 @@ jay_compute_liveness(jay_function *f) /* Propagate block->live_in[] to the live_out[] of predecessors. Since * phis are split, they are handled naturally without special cases. */ - jay_foreach_predecessor(block, p) { - if (u_sparse_bitset_merge(&(*p)->live_out, &block->live_in)) { - jay_worklist_push_tail(&worklist, *p); + for (enum jay_file file = GPR; file <= UGPR; ++file) { + jay_foreach_predecessor(block, p, file) { + bool progress = false; + + U_SPARSE_BITSET_FOREACH_SET(&block->live_in, i) { + if ((file == UGPR) == BITSET_TEST(uniform, i)) { + progress |= !u_sparse_bitset_test(&(*p)->live_out, i); + u_sparse_bitset_set(&(*p)->live_out, i); + } + } + + if (progress) { + jay_worklist_push_tail(&worklist, *p); + } } } } @@ -110,6 +130,7 @@ jay_compute_liveness(jay_function *f) #endif u_worklist_fini(&worklist); + free(uniform); } /* diff --git a/src/intel/compiler/jay/jay_lower_spill.c b/src/intel/compiler/jay/jay_lower_spill.c index 21fbac1777e..362b478afcc 100644 --- a/src/intel/compiler/jay/jay_lower_spill.c +++ b/src/intel/compiler/jay/jay_lower_spill.c @@ -138,7 +138,7 @@ jay_lower_spill(jay_function *func) } /* Canonicalize our internal registers at block boundaries */ - if (jay_num_successors(block) > 0) { + if (jay_num_successors(block, GPR) > 0) { if (!address_valid) { jay_SHR(&b, JAY_TYPE_U32, ADDRESS_REG, surf, 4); } diff --git a/src/intel/compiler/jay/jay_opt_control_flow.c b/src/intel/compiler/jay/jay_opt_control_flow.c index 1f337f37296..2940d9b886a 100644 --- a/src/intel/compiler/jay/jay_opt_control_flow.c +++ b/src/intel/compiler/jay/jay_opt_control_flow.c @@ -44,11 +44,11 @@ opt_predicate(jay_function *f, jay_block *block) /* If's fallthrough to the then */ jay_block *then_block = jay_next_block(block); - assert(block->successors[0] == then_block && "successors for if"); + assert(block->logical_succs[0] == then_block && "successors for if"); /* We're searching for a single block then, so the next block is else */ jay_block *else_block = jay_next_block(then_block); - if (block->successors[1] != else_block || + if (block->logical_succs[1] != else_block || list_length(&then_block->instructions) > 3 || !list_is_singular(&else_block->instructions)) return; diff --git a/src/intel/compiler/jay/jay_print.c b/src/intel/compiler/jay/jay_print.c index 64fb2f17382..1a885214178 100644 --- a/src/intel/compiler/jay/jay_print.c +++ b/src/intel/compiler/jay/jay_print.c @@ -249,10 +249,15 @@ jay_print_block(FILE *fp, jay_block *block) fprintf(fp, "B%d%s%s", block->index, block->uniform ? " [uniform]" : "", block->loop_header ? " [loop header]" : ""); bool first = true; - jay_foreach_predecessor(block, p) { + jay_foreach_predecessor(block, p, GPR) { fprintf(fp, "%s B%d", first ? " <-" : "", (*p)->index); first = false; } + first = true; + jay_foreach_predecessor(block, p, UGPR) { + fprintf(fp, "%s B%d", first ? " <=" : "", (*p)->index); + first = false; + } fprintf(fp, " {\n"); /* We group phi destinations/sources for legibility */ @@ -281,11 +286,14 @@ jay_print_block(FILE *fp, jay_block *block) indent(fp, block, false); fprintf(fp, "}"); first = true; - jay_foreach_successor(block, succ) { - if (succ) { - fprintf(fp, "%s B%d", first ? " ->" : "", succ->index); - first = false; - } + jay_foreach_successor(block, succ, GPR) { + fprintf(fp, "%s B%d", first ? " ->" : "", succ->index); + first = false; + } + first = true; + jay_foreach_successor(block, succ, UGPR) { + fprintf(fp, "%s B%d", first ? " =>" : "", succ->index); + first = false; } fprintf(fp, "\n\n"); } diff --git a/src/intel/compiler/jay/jay_repair_ssa.c b/src/intel/compiler/jay/jay_repair_ssa.c index 794f3977cdf..3f210e95a0c 100644 --- a/src/intel/compiler/jay/jay_repair_ssa.c +++ b/src/intel/compiler/jay/jay_repair_ssa.c @@ -66,7 +66,8 @@ static bool try_remove_trivial_phi(struct ctx *ctx, struct phi *phi) { unsigned same = 0; - for (unsigned i = 0; i < jay_num_predecessors(phi->block); ++i) { + for (unsigned i = 0; i < jay_num_predecessors(phi->block, phi->old.file); + ++i) { unsigned src = remap_idx(ctx, phi->src[i]); if (same && src != same && src != phi->dst) { /* Nontrivial */ @@ -86,9 +87,9 @@ try_remove_trivial_phi(struct ctx *ctx, struct phi *phi) static void add_phi(struct ctx *ctx, jay_block *block, jay_def src, unsigned dst) { - unsigned i = 0, n = jay_num_predecessors(block); + unsigned i = 0, n = jay_num_predecessors(block, src.file); unsigned *srcs = linear_alloc_array(ctx->linctx, unsigned, n); - jay_foreach_predecessor(block, pred) { + jay_foreach_predecessor(block, pred, src.file) { assert(i < n); srcs[i++] = lookup(ctx, *pred, src); } @@ -110,9 +111,10 @@ lookup(struct ctx *ctx, jay_block *block, jay_def def) } /* For a single predecessor, we can recurse without adding a phi. */ - bool insert_phi = jay_num_predecessors(block) > 1; - unsigned val = insert_phi ? ctx->alloc++ : - lookup(ctx, jay_first_predecessor(block), def); + bool insert_phi = jay_num_predecessors(block, def.file) > 1; + unsigned val = insert_phi ? + ctx->alloc++ : + lookup(ctx, jay_first_predecessor(block, def.file), def); _mesa_hash_table_u64_insert(ctx->defs[block->index], jay_index(def), (void *) (uintptr_t) val); @@ -190,7 +192,7 @@ jay_repair_ssa(jay_function *func) } /* Seal loop headers after processing the back edge */ - jay_foreach_successor(block, succ) { + jay_foreach_successor(block, succ, GPR) { if (succ->loop_header && succ->index <= block->index) { util_dynarray_foreach(&ctx.incomplete_phis[succ->index], struct incomplete_phi, el) { @@ -235,7 +237,7 @@ jay_repair_ssa(jay_function *func) jay_PHI_DST(&b, jay_scalar(phi->old.file, phi->dst)); unsigned i = 0; - jay_foreach_predecessor(phi->block, pred) { + jay_foreach_predecessor(phi->block, pred, phi->old.file) { b.cursor = jay_before_jump(*pred); unsigned idx = remap_idx(&ctx, phi->src[i++]); jay_PHI_SRC_u32(&b, jay_scalar(phi->old.file, idx), phi->dst); diff --git a/src/intel/compiler/jay/jay_spill.c b/src/intel/compiler/jay/jay_spill.c index 0cf92d30f00..290c969e1ef 100644 --- a/src/intel/compiler/jay/jay_spill.c +++ b/src/intel/compiler/jay/jay_spill.c @@ -352,7 +352,7 @@ insert_coupling_code(struct spill_ctx *ctx, jay_block *pred, jay_block *succ) /* Anything assumed to be spilled in succ must be spilled along all edges. */ U_SPARSE_BITSET_FOREACH_SET(&ss->S_in, v) { if (!u_sparse_bitset_test(&sp->S_out, v)) { - b.cursor = jay_along_edge(pred, succ); + b.cursor = jay_along_edge(pred, succ, GPR /* XXX */); insert_spill(&b, ctx, v); } } @@ -372,8 +372,9 @@ insert_coupling_code(struct spill_ctx *ctx, jay_block *pred, jay_block *succ) !u_sparse_bitset_test(&sp->W_out, src)) { /* Fill the phi source in the predecessor */ - jay_block *reload_block = jay_edge_to_block(pred, succ); - insert_reload(ctx, reload_block, jay_along_edge(pred, succ), src); + jay_block *reload_block = jay_edge_to_block(pred, succ, ctx->file); + insert_reload(ctx, reload_block, jay_along_edge(pred, succ, ctx->file), + src); } } @@ -385,8 +386,9 @@ insert_coupling_code(struct spill_ctx *ctx, jay_block *pred, jay_block *succ) if (!u_sparse_bitset_test(&sp->W_out, v) && !u_sparse_bitset_test(&ctx->phi_set, v)) { - jay_block *reload_block = jay_edge_to_block(pred, succ); - insert_reload(ctx, reload_block, jay_along_edge(pred, succ), v); + jay_block *reload_block = jay_edge_to_block(pred, succ, GPR /* XXX */); + insert_reload(ctx, reload_block, + jay_along_edge(pred, succ, GPR /* XXX */), v); } } } @@ -598,7 +600,7 @@ compute_w_entry(struct spill_ctx *ctx, jay_block *block) U_SPARSE_BITSET_FOREACH_SET(&ctx->N, i) { bool all = true, any = false; - jay_foreach_predecessor(block, P) { + jay_foreach_predecessor(block, P, ctx->file) { bool in = u_sparse_bitset_test(&ctx->blocks[(*P)->index].W_out, i); all &= in; any |= in; @@ -612,7 +614,7 @@ compute_w_entry(struct spill_ctx *ctx, jay_block *block) } } - jay_foreach_predecessor(block, pred) { + jay_foreach_predecessor(block, pred, ctx->file) { jay_foreach_phi_src_in_block(*pred, I) { if (!u_sparse_bitset_test(&ctx->blocks[(*pred)->index].W_out, jay_index(I->src[0]))) { @@ -655,7 +657,7 @@ compute_w_entry(struct spill_ctx *ctx, jay_block *block) static ATTRIBUTE_NOINLINE void compute_s_entry(struct spill_ctx *ctx, jay_block *block) { - jay_foreach_predecessor(block, pred) { + jay_foreach_predecessor(block, pred, ctx->file) { U_SPARSE_BITSET_FOREACH_SET(&ctx->blocks[(*pred)->index].S_out, v) { if (u_sparse_bitset_test(&block->live_in, v)) { u_sparse_bitset_set(&ctx->S, v); @@ -731,7 +733,7 @@ global_next_use_distances(struct spill_ctx *ctx, void *memctx) } /* Propagate successor live-in to pred live-out, joining with min */ - jay_foreach_predecessor(block, pred) { + jay_foreach_predecessor(block, pred, ctx->file) { if (minimum_next_uses(&ctx->blocks[(*pred)->index].next_use_out, &sb->next_use_in, ctx->next_uses, &ctx->phi_set)) { @@ -838,7 +840,7 @@ jay_spill(jay_function *func, enum jay_file file, unsigned k) if (block->loop_header) { compute_w_entry_loop_header(&ctx, block); - } else if (jay_num_predecessors(block) /* skip start blocks */) { + } else if (jay_num_predecessors(block, file) /* skip start blocks */) { compute_w_entry(&ctx, block); } @@ -851,7 +853,7 @@ jay_spill(jay_function *func, enum jay_file file, unsigned k) /* Now that all blocks are processed separately, stitch it together */ jay_foreach_block(func, block) { - jay_foreach_predecessor(block, pred) { + jay_foreach_predecessor(block, pred, file) { u_sparse_bitset_clear_all(&ctx.phi_set); insert_coupling_code(&ctx, *pred, block); } diff --git a/src/intel/compiler/jay/jay_validate.c b/src/intel/compiler/jay/jay_validate.c index 78adb2b232c..ff3c07e1316 100644 --- a/src/intel/compiler/jay/jay_validate.c +++ b/src/intel/compiler/jay/jay_validate.c @@ -264,21 +264,30 @@ jay_validate_function(struct validate_state *validate) validate->block = block; validate->I = NULL; - CHECK(block->successors[0] || !block->successors[1]); + CHECK(block->logical_succs[0] || !block->logical_succs[1]); /* Post-RA we can remove physical jumps though they exist logically */ - if (block->successors[1] && !validate->post_ra) { + if (block->logical_succs[1] && !validate->post_ra) { CHECK(jay_block_ending_jump(block) != NULL); } + bool uniform_phi = false; + jay_foreach_phi_src_in_block(block, phi) { + uniform_phi |= jay_is_uniform(phi->src[0]); + } + /* If a block has multiple successors, and one of them has multiple * predecessors, then we've detected a critical edge. */ - if (jay_num_successors(block) > 1 && !validate->post_ra) { - jay_foreach_successor(block, succ) { - if (jay_num_predecessors(succ) > 1) { - chirp(validate, "Critical edge (B%u -> B%u) is not allowed", - block->index, succ->index); + for (enum jay_file file = GPR; file <= (uniform_phi ? UGPR : GPR); + ++file) { + if (jay_num_successors(block, file) > 1 && !validate->post_ra) { + jay_foreach_successor(block, succ, file) { + if (jay_num_predecessors(succ, file) > 1) { + chirp(validate, "%s critical edge (B%u -> B%u)", + file == GPR ? "Logical" : "Physical", block->index, + succ->index); + } } } } diff --git a/src/intel/compiler/jay/jay_validate_ra.c b/src/intel/compiler/jay/jay_validate_ra.c index 99fd6333366..01de82566cf 100644 --- a/src/intel/compiler/jay/jay_validate_ra.c +++ b/src/intel/compiler/jay/jay_validate_ra.c @@ -102,15 +102,15 @@ validate_block(jay_function *func, jay_block *block, struct regfile *blocks) * * dEQP-VK.graphicsfuzz.spv-stable-mergesort-dead-code */ - bool loop_header = block->loop_header && jay_num_predecessors(block) > 1; + bool loop_header = + block->loop_header && jay_num_predecessors(block, GPR) > 1; /* Initialize the register file based on predecessors. */ /* Initialize with the exit state of any one predecessor */ - jay_block *first_pred = jay_first_predecessor(block); - if (first_pred) { - struct regfile *pred_rf = &blocks[first_pred->index]; - - jay_foreach_ssa_file(f) { + jay_foreach_ssa_file(f) { + jay_block *first_pred = jay_first_predecessor(block, f); + if (first_pred) { + struct regfile *pred_rf = &blocks[first_pred->index]; memcpy(rf->r[f], pred_rf->r[f], rf->n[f] * sizeof(uint32_t)); } } @@ -121,10 +121,10 @@ validate_block(jay_function *func, jay_block *block, struct regfile *blocks) * values coming in from each block, it is considered undefined at the * start of the block. */ - jay_foreach_predecessor(block, pred) { - struct regfile *pred_rf = &blocks[(*pred)->index]; + jay_foreach_ssa_file(file) { + jay_foreach_predecessor(block, pred, file) { + struct regfile *pred_rf = &blocks[(*pred)->index]; - jay_foreach_ssa_file(file) { for (unsigned r = 0; r < rf->n[file]; ++r) { if (*reg(rf, file, r) != *reg(pred_rf, file, r)) { *reg(rf, file, r) = 0; diff --git a/src/intel/compiler/jay/test/test-repair-ssa.cpp b/src/intel/compiler/jay/test/test-repair-ssa.cpp index 8d117746eee..96c1c110887 100644 --- a/src/intel/compiler/jay/test/test-repair-ssa.cpp +++ b/src/intel/compiler/jay/test/test-repair-ssa.cpp @@ -90,11 +90,11 @@ TEST_F(RepairSSA, IfElse) jay_block *C = jay_test_block(b->func); jay_block *D = jay_test_block(b->func); - jay_block_add_successor(A, B); - jay_block_add_successor(A, C); + jay_block_add_successor(A, B, UGPR); + jay_block_add_successor(A, C, UGPR); - jay_block_add_successor(B, D); - jay_block_add_successor(C, D); + jay_block_add_successor(B, D, UGPR); + jay_block_add_successor(C, D, UGPR); b->cursor = jay_after_block(A); jay_IF(b); @@ -140,12 +140,12 @@ TEST_F(RepairSSA, Loop) jay_block *D = jay_test_block(b->func); jay_block *E = jay_test_block(b->func); - jay_block_add_successor(H, A); - jay_block_add_successor(A, B); - jay_block_add_successor(A, C); - jay_block_add_successor(B, E); - jay_block_add_successor(C, D); - jay_block_add_successor(D, A); + jay_block_add_successor(H, A, GPR); + jay_block_add_successor(A, B, GPR); + jay_block_add_successor(A, C, GPR); + jay_block_add_successor(B, E, GPR); + jay_block_add_successor(C, D, GPR); + jay_block_add_successor(D, A, GPR); A->loop_header = true; @@ -189,11 +189,11 @@ TEST_F(RepairSSA, TrivialPhisOptimized) jay_block *C = jay_test_block(b->func); jay_block *D = jay_test_block(b->func); - jay_block_add_successor(A, B); - jay_block_add_successor(A, C); + jay_block_add_successor(A, B, UGPR); + jay_block_add_successor(A, C, UGPR); - jay_block_add_successor(B, D); - jay_block_add_successor(C, D); + jay_block_add_successor(B, D, UGPR); + jay_block_add_successor(C, D, UGPR); b->cursor = jay_after_block(A); jay_def x = jay_MOV_u32(b, 0xcafe);