diff --git a/src/intel/compiler/jay/jay_builder.h b/src/intel/compiler/jay/jay_builder.h index b0e401fc7a3..2f4f768dd25 100644 --- a/src/intel/compiler/jay/jay_builder.h +++ b/src/intel/compiler/jay/jay_builder.h @@ -106,10 +106,12 @@ jay_before_function(jay_function *f) * have other predecessorss since there are no critical edges. */ static inline jay_block * -jay_edge_to_block(jay_block *pred, jay_block *succ) +jay_edge_to_block(jay_block *pred, jay_block *succ, enum jay_file file) { - assert(jay_num_successors(pred) == 1 || jay_num_predecessors(succ) == 1); - return jay_num_successors(pred) == 1 ? pred : succ; + assert(jay_num_successors(pred, file) == 1 || + jay_num_predecessors(succ, file) == 1); + + return jay_num_successors(pred, file) == 1 ? pred : succ; } /* @@ -118,9 +120,9 @@ jay_edge_to_block(jay_block *pred, jay_block *succ) * flow graph having no critical edges. */ static inline jay_cursor -jay_along_edge(jay_block *pred, jay_block *succ) +jay_along_edge(jay_block *pred, jay_block *succ, enum jay_file file) { - jay_block *to = jay_edge_to_block(pred, succ); + jay_block *to = jay_edge_to_block(pred, succ, file); if (to == pred) return jay_after_block_logical(pred); diff --git a/src/intel/compiler/jay/jay_from_nir.c b/src/intel/compiler/jay/jay_from_nir.c index 9c6d773f98b..c1fa3d1990c 100644 --- a/src/intel/compiler/jay/jay_from_nir.c +++ b/src/intel/compiler/jay/jay_from_nir.c @@ -2042,7 +2042,7 @@ jay_emit_jump(struct nir_to_jay_state *nj, nir_jump_instr *instr) { switch (instr->type) { case nir_jump_break: - jay_block_add_successor(nj->current_block, nj->break_block); + jay_block_add_successor(nj->current_block, nj->break_block, GPR); jay_BREAK(&nj->bld); break; case nir_jump_halt: @@ -2144,14 +2144,22 @@ jay_emit_if(struct nir_to_jay_state *nj, nir_if *nif) /* Pop */ --nj->indent; - jay_block_add_successor(before_block, then_first); - jay_block_add_successor(before_block, else_first); + bool uniform = jay_is_uniform(condition); + + /* Logical CFG edges */ + jay_block_add_successor(before_block, then_first, GPR); + jay_block_add_successor(before_block, else_first, GPR); if (!jay_block_ending_unconditional_jump(then_last)) - jay_block_add_successor(then_last, after_block); + jay_block_add_successor(then_last, after_block, GPR); if (!jay_block_ending_unconditional_jump(else_last)) - jay_block_add_successor(else_last, after_block); + jay_block_add_successor(else_last, after_block, GPR); + + /* For a non-uniform IF, we fall through both sides in the physical CFG */ + if (!uniform) { + jay_block_add_successor(then_last, else_first, UGPR); + } nj->after_block = after_block; @@ -2183,7 +2191,7 @@ jay_emit_loop(struct nir_to_jay_state *nj, nir_loop *nloop) loop_header->loop_header = true; /* The current block falls through to the start of the loop */ - jay_block_add_successor(nj->current_block, loop_header); + jay_block_add_successor(nj->current_block, loop_header, GPR); /* Emit the loop body */ nj->after_block = loop_header; @@ -2194,7 +2202,7 @@ jay_emit_loop(struct nir_to_jay_state *nj, nir_loop *nloop) if (jump && jump->op == JAY_OPCODE_BREAK) { jump->op = JAY_OPCODE_LOOP_ONCE; } else { - jay_block_add_successor(nj->current_block, loop_header); + jay_block_add_successor(nj->current_block, loop_header, GPR); jay_WHILE(b); } @@ -2508,9 +2516,9 @@ jay_setup_payload(struct nir_to_jay_state *nj) } /* - * NIR sometimes contains unreachable blocks (e.g. due to infinite loops). These - * blocks have no predecessors, but do have successors and can contribute to - * phis. They are dead and violate the IR invariant: + * NIR sometimes contains logically unreachable blocks (e.g. due to infinite + * loops). These blocks have no predecessors, but do have successors and can + * contribute to phis. They are dead and violate the IR invariant: * * Live-in sources are live-out in all predecessors. * @@ -2530,16 +2538,23 @@ jay_remove_unreachable_blocks(jay_function *func) jay_foreach_block(func, pred) { if (pred != jay_first_block(func) && - jay_num_predecessors(pred) == 0 && - jay_num_successors(pred) > 0) { + jay_num_predecessors(pred, GPR) == 0 && + jay_num_successors(pred, GPR) > 0) { - jay_foreach_successor(pred, succ) { - util_dynarray_delete_unordered(&succ->predecessors, jay_block *, + jay_foreach_successor(pred, succ, GPR) { + util_dynarray_delete_unordered(&succ->logical_preds, jay_block *, pred); } - pred->successors[0] = NULL; - pred->successors[1] = NULL; + jay_foreach_successor(pred, succ, UGPR) { + util_dynarray_delete_unordered(&succ->physical_preds, + jay_block *, pred); + } + + pred->logical_succs[0] = NULL; + pred->logical_succs[1] = NULL; + pred->physical_succs[0] = NULL; + pred->physical_succs[1] = NULL; progress = true; } } diff --git a/src/intel/compiler/jay/jay_insert_fp_mode.c b/src/intel/compiler/jay/jay_insert_fp_mode.c index a51c974ad3f..fde92477f44 100644 --- a/src/intel/compiler/jay/jay_insert_fp_mode.c +++ b/src/intel/compiler/jay/jay_insert_fp_mode.c @@ -79,7 +79,7 @@ jay_insert_fp_mode(jay_shader *shader, uint32_t api, uint32_t float_sizes) } /* Restore to global state on block boundaries */ - if (jay_num_successors(block) > 0) { + if (jay_num_successors(block, GPR) > 0) { set_cr0(func, jay_after_block(block), ¤t, cr0); } } diff --git a/src/intel/compiler/jay/jay_ir.h b/src/intel/compiler/jay/jay_ir.h index 524e4b06650..fc6de0aad09 100644 --- a/src/intel/compiler/jay/jay_ir.h +++ b/src/intel/compiler/jay/jay_ir.h @@ -443,16 +443,19 @@ jay_abs(jay_def src) return src; } +static inline bool +jay_file_is_uniform(enum jay_file file) +{ + return file == UGPR || file == UFLAG || file == UACCUM || file == J_IMM; +} + /** * Returns true if the given source reads the same value in all lanes. */ static inline bool jay_is_uniform(jay_def d) { - return d.file == UGPR || - d.file == UFLAG || - d.file == UACCUM || - jay_is_imm(d); + return jay_file_is_uniform(d.file); } /** @@ -1077,8 +1080,8 @@ typedef struct jay_block { struct list_head instructions; /** Control flow graph */ - struct jay_block *successors[2]; - struct util_dynarray predecessors; + struct jay_block *logical_succs[2], *physical_succs[2]; + struct util_dynarray logical_preds, physical_preds; /** Index of the block in source order */ unsigned index; @@ -1112,7 +1115,8 @@ jay_new_block(jay_function *f) { jay_block *block = rzalloc(f, jay_block); - util_dynarray_init(&block->predecessors, block); + util_dynarray_init(&block->logical_preds, block); + util_dynarray_init(&block->physical_preds, block); list_inithead(&block->instructions); block->index = f->num_blocks++; @@ -1137,26 +1141,42 @@ jay_block_ending_jump(jay_block *block) return last && jay_op_is_control_flow(last->op) ? last : NULL; } -static inline unsigned -jay_num_predecessors(jay_block *block) +static inline struct util_dynarray * +jay_predecessors(jay_block *blk, enum jay_file file) { - return util_dynarray_num_elements(&block->predecessors, jay_block *); + return jay_file_is_uniform(file) ? &blk->physical_preds : + &blk->logical_preds; +} + +static inline jay_block ** +jay_successors(jay_block *blk, enum jay_file file) +{ + return jay_file_is_uniform(file) ? blk->physical_succs : blk->logical_succs; } static inline unsigned -jay_num_successors(jay_block *block) +jay_num_predecessors(jay_block *blk, enum jay_file file) { - static_assert(ARRAY_SIZE(block->successors) == 2); - return !!block->successors[0] + !!block->successors[1]; + return util_dynarray_num_elements(jay_predecessors(blk, file), jay_block *); +} + +static inline unsigned +jay_num_successors(jay_block *block, enum jay_file file) +{ + static_assert(ARRAY_SIZE(block->logical_succs) == 2); + static_assert(ARRAY_SIZE(block->physical_succs) == 2); + + return !!jay_successors(block, file)[0] + !!jay_successors(block, file)[1]; } static inline jay_block * -jay_first_predecessor(jay_block *block) +jay_first_predecessor(jay_block *block, enum jay_file file) { - if (jay_num_predecessors(block) == 0) + if (jay_num_predecessors(block, file) == 0) return NULL; - return *util_dynarray_element(&block->predecessors, struct jay_block *, 0); + return *util_dynarray_element(jay_predecessors(block, file), + struct jay_block *, 0); } /* Block worklist helpers */ @@ -1230,14 +1250,23 @@ jay_first_predecessor(jay_block *block) jay_foreach_function(s, func) \ jay_foreach_inst_in_func_safe(func, v_block, inst) -#define jay_foreach_successor(blk, v) \ - jay_block *v; \ - jay_block **_v; \ - for (_v = (jay_block **) &blk->successors[0], v = *_v; \ - v != NULL && _v < (jay_block **) &blk->successors[2]; _v++, v = *_v) +/* + * Get the next successor, using the fact that there are at most 2 successors + * and NULL successors cannot precede non-NULL successors. + */ +static inline jay_block * +jay_next_successor(jay_block *parent, enum jay_file file, jay_block *it) +{ + jay_block **succs = jay_successors(parent, file); + return succs[0] == it ? succs[1] : NULL; +} -#define jay_foreach_predecessor(blk, v) \ - util_dynarray_foreach(&blk->predecessors, jay_block *, v) +#define jay_foreach_successor(blk, v, file) \ + for (jay_block *v = jay_successors(blk, file)[0]; v != NULL; \ + v = jay_next_successor(blk, file, v)) + +#define jay_foreach_predecessor(blk, v, file) \ + util_dynarray_foreach(jay_predecessors(blk, file), jay_block *, v) #define jay_foreach_src(inst, s) for (unsigned s = 0; s < inst->num_srcs; ++s) @@ -1343,15 +1372,21 @@ jay_next_block(jay_block *block) } static inline void -jay_block_add_successor(jay_block *block, jay_block *succ) +jay_block_add_successor(jay_block *block, jay_block *succ, enum jay_file file) { - unsigned i = block->successors[0] ? 1 : 0; + jay_block **succs = jay_successors(block, file); + unsigned i = succs[0] ? 1 : 0; - assert(succ && block->successors[0] != succ && block->successors[1] != succ); - assert(block->successors[i] == NULL && "at most 2 successors"); + assert(succ && succs[0] != succ && succs[1] != succ); + assert(succs[i] == NULL && "at most 2 successors"); - block->successors[i] = succ; - util_dynarray_append(&(succ->predecessors), block); + succs[i] = succ; + util_dynarray_append(jay_predecessors(succ, file), block); + + /* All logical CFG edges are also physical CFG edges */ + if (file == GPR) { + jay_block_add_successor(block, succ, UGPR); + } } static inline unsigned diff --git a/src/intel/compiler/jay/jay_liveness.c b/src/intel/compiler/jay/jay_liveness.c index ebe89f7504f..50d1c359663 100644 --- a/src/intel/compiler/jay/jay_liveness.c +++ b/src/intel/compiler/jay/jay_liveness.c @@ -66,6 +66,7 @@ jay_compute_liveness(jay_function *f) ralloc_free(f->dead_defs); f->dead_defs = BITSET_RZALLOC(f, f->ssa_alloc); + BITSET_WORD *uniform = BITSET_CALLOC(f->ssa_alloc); jay_foreach_block(f, block) { u_sparse_bitset_free(&block->live_in); @@ -77,6 +78,14 @@ jay_compute_liveness(jay_function *f) jay_worklist_push_head(&worklist, block); } + jay_foreach_inst_in_func(f, _, I) { + jay_foreach_dst_index(I, dst, index) { + if (jay_is_uniform(dst)) { + BITSET_SET(uniform, index); + } + } + } + while (!u_worklist_is_empty(&worklist)) { /* Pop in reverse order since liveness is a backwards pass */ jay_block *block = jay_worklist_pop_head(&worklist); @@ -94,9 +103,20 @@ jay_compute_liveness(jay_function *f) /* Propagate block->live_in[] to the live_out[] of predecessors. Since * phis are split, they are handled naturally without special cases. */ - jay_foreach_predecessor(block, p) { - if (u_sparse_bitset_merge(&(*p)->live_out, &block->live_in)) { - jay_worklist_push_tail(&worklist, *p); + for (enum jay_file file = GPR; file <= UGPR; ++file) { + jay_foreach_predecessor(block, p, file) { + bool progress = false; + + U_SPARSE_BITSET_FOREACH_SET(&block->live_in, i) { + if ((file == UGPR) == BITSET_TEST(uniform, i)) { + progress |= !u_sparse_bitset_test(&(*p)->live_out, i); + u_sparse_bitset_set(&(*p)->live_out, i); + } + } + + if (progress) { + jay_worklist_push_tail(&worklist, *p); + } } } } @@ -110,6 +130,7 @@ jay_compute_liveness(jay_function *f) #endif u_worklist_fini(&worklist); + free(uniform); } /* diff --git a/src/intel/compiler/jay/jay_lower_spill.c b/src/intel/compiler/jay/jay_lower_spill.c index 21fbac1777e..362b478afcc 100644 --- a/src/intel/compiler/jay/jay_lower_spill.c +++ b/src/intel/compiler/jay/jay_lower_spill.c @@ -138,7 +138,7 @@ jay_lower_spill(jay_function *func) } /* Canonicalize our internal registers at block boundaries */ - if (jay_num_successors(block) > 0) { + if (jay_num_successors(block, GPR) > 0) { if (!address_valid) { jay_SHR(&b, JAY_TYPE_U32, ADDRESS_REG, surf, 4); } diff --git a/src/intel/compiler/jay/jay_opt_control_flow.c b/src/intel/compiler/jay/jay_opt_control_flow.c index 1f337f37296..2940d9b886a 100644 --- a/src/intel/compiler/jay/jay_opt_control_flow.c +++ b/src/intel/compiler/jay/jay_opt_control_flow.c @@ -44,11 +44,11 @@ opt_predicate(jay_function *f, jay_block *block) /* If's fallthrough to the then */ jay_block *then_block = jay_next_block(block); - assert(block->successors[0] == then_block && "successors for if"); + assert(block->logical_succs[0] == then_block && "successors for if"); /* We're searching for a single block then, so the next block is else */ jay_block *else_block = jay_next_block(then_block); - if (block->successors[1] != else_block || + if (block->logical_succs[1] != else_block || list_length(&then_block->instructions) > 3 || !list_is_singular(&else_block->instructions)) return; diff --git a/src/intel/compiler/jay/jay_print.c b/src/intel/compiler/jay/jay_print.c index 64fb2f17382..1a885214178 100644 --- a/src/intel/compiler/jay/jay_print.c +++ b/src/intel/compiler/jay/jay_print.c @@ -249,10 +249,15 @@ jay_print_block(FILE *fp, jay_block *block) fprintf(fp, "B%d%s%s", block->index, block->uniform ? " [uniform]" : "", block->loop_header ? " [loop header]" : ""); bool first = true; - jay_foreach_predecessor(block, p) { + jay_foreach_predecessor(block, p, GPR) { fprintf(fp, "%s B%d", first ? " <-" : "", (*p)->index); first = false; } + first = true; + jay_foreach_predecessor(block, p, UGPR) { + fprintf(fp, "%s B%d", first ? " <=" : "", (*p)->index); + first = false; + } fprintf(fp, " {\n"); /* We group phi destinations/sources for legibility */ @@ -281,11 +286,14 @@ jay_print_block(FILE *fp, jay_block *block) indent(fp, block, false); fprintf(fp, "}"); first = true; - jay_foreach_successor(block, succ) { - if (succ) { - fprintf(fp, "%s B%d", first ? " ->" : "", succ->index); - first = false; - } + jay_foreach_successor(block, succ, GPR) { + fprintf(fp, "%s B%d", first ? " ->" : "", succ->index); + first = false; + } + first = true; + jay_foreach_successor(block, succ, UGPR) { + fprintf(fp, "%s B%d", first ? " =>" : "", succ->index); + first = false; } fprintf(fp, "\n\n"); } diff --git a/src/intel/compiler/jay/jay_repair_ssa.c b/src/intel/compiler/jay/jay_repair_ssa.c index 794f3977cdf..3f210e95a0c 100644 --- a/src/intel/compiler/jay/jay_repair_ssa.c +++ b/src/intel/compiler/jay/jay_repair_ssa.c @@ -66,7 +66,8 @@ static bool try_remove_trivial_phi(struct ctx *ctx, struct phi *phi) { unsigned same = 0; - for (unsigned i = 0; i < jay_num_predecessors(phi->block); ++i) { + for (unsigned i = 0; i < jay_num_predecessors(phi->block, phi->old.file); + ++i) { unsigned src = remap_idx(ctx, phi->src[i]); if (same && src != same && src != phi->dst) { /* Nontrivial */ @@ -86,9 +87,9 @@ try_remove_trivial_phi(struct ctx *ctx, struct phi *phi) static void add_phi(struct ctx *ctx, jay_block *block, jay_def src, unsigned dst) { - unsigned i = 0, n = jay_num_predecessors(block); + unsigned i = 0, n = jay_num_predecessors(block, src.file); unsigned *srcs = linear_alloc_array(ctx->linctx, unsigned, n); - jay_foreach_predecessor(block, pred) { + jay_foreach_predecessor(block, pred, src.file) { assert(i < n); srcs[i++] = lookup(ctx, *pred, src); } @@ -110,9 +111,10 @@ lookup(struct ctx *ctx, jay_block *block, jay_def def) } /* For a single predecessor, we can recurse without adding a phi. */ - bool insert_phi = jay_num_predecessors(block) > 1; - unsigned val = insert_phi ? ctx->alloc++ : - lookup(ctx, jay_first_predecessor(block), def); + bool insert_phi = jay_num_predecessors(block, def.file) > 1; + unsigned val = insert_phi ? + ctx->alloc++ : + lookup(ctx, jay_first_predecessor(block, def.file), def); _mesa_hash_table_u64_insert(ctx->defs[block->index], jay_index(def), (void *) (uintptr_t) val); @@ -190,7 +192,7 @@ jay_repair_ssa(jay_function *func) } /* Seal loop headers after processing the back edge */ - jay_foreach_successor(block, succ) { + jay_foreach_successor(block, succ, GPR) { if (succ->loop_header && succ->index <= block->index) { util_dynarray_foreach(&ctx.incomplete_phis[succ->index], struct incomplete_phi, el) { @@ -235,7 +237,7 @@ jay_repair_ssa(jay_function *func) jay_PHI_DST(&b, jay_scalar(phi->old.file, phi->dst)); unsigned i = 0; - jay_foreach_predecessor(phi->block, pred) { + jay_foreach_predecessor(phi->block, pred, phi->old.file) { b.cursor = jay_before_jump(*pred); unsigned idx = remap_idx(&ctx, phi->src[i++]); jay_PHI_SRC_u32(&b, jay_scalar(phi->old.file, idx), phi->dst); diff --git a/src/intel/compiler/jay/jay_spill.c b/src/intel/compiler/jay/jay_spill.c index 0cf92d30f00..290c969e1ef 100644 --- a/src/intel/compiler/jay/jay_spill.c +++ b/src/intel/compiler/jay/jay_spill.c @@ -352,7 +352,7 @@ insert_coupling_code(struct spill_ctx *ctx, jay_block *pred, jay_block *succ) /* Anything assumed to be spilled in succ must be spilled along all edges. */ U_SPARSE_BITSET_FOREACH_SET(&ss->S_in, v) { if (!u_sparse_bitset_test(&sp->S_out, v)) { - b.cursor = jay_along_edge(pred, succ); + b.cursor = jay_along_edge(pred, succ, GPR /* XXX */); insert_spill(&b, ctx, v); } } @@ -372,8 +372,9 @@ insert_coupling_code(struct spill_ctx *ctx, jay_block *pred, jay_block *succ) !u_sparse_bitset_test(&sp->W_out, src)) { /* Fill the phi source in the predecessor */ - jay_block *reload_block = jay_edge_to_block(pred, succ); - insert_reload(ctx, reload_block, jay_along_edge(pred, succ), src); + jay_block *reload_block = jay_edge_to_block(pred, succ, ctx->file); + insert_reload(ctx, reload_block, jay_along_edge(pred, succ, ctx->file), + src); } } @@ -385,8 +386,9 @@ insert_coupling_code(struct spill_ctx *ctx, jay_block *pred, jay_block *succ) if (!u_sparse_bitset_test(&sp->W_out, v) && !u_sparse_bitset_test(&ctx->phi_set, v)) { - jay_block *reload_block = jay_edge_to_block(pred, succ); - insert_reload(ctx, reload_block, jay_along_edge(pred, succ), v); + jay_block *reload_block = jay_edge_to_block(pred, succ, GPR /* XXX */); + insert_reload(ctx, reload_block, + jay_along_edge(pred, succ, GPR /* XXX */), v); } } } @@ -598,7 +600,7 @@ compute_w_entry(struct spill_ctx *ctx, jay_block *block) U_SPARSE_BITSET_FOREACH_SET(&ctx->N, i) { bool all = true, any = false; - jay_foreach_predecessor(block, P) { + jay_foreach_predecessor(block, P, ctx->file) { bool in = u_sparse_bitset_test(&ctx->blocks[(*P)->index].W_out, i); all &= in; any |= in; @@ -612,7 +614,7 @@ compute_w_entry(struct spill_ctx *ctx, jay_block *block) } } - jay_foreach_predecessor(block, pred) { + jay_foreach_predecessor(block, pred, ctx->file) { jay_foreach_phi_src_in_block(*pred, I) { if (!u_sparse_bitset_test(&ctx->blocks[(*pred)->index].W_out, jay_index(I->src[0]))) { @@ -655,7 +657,7 @@ compute_w_entry(struct spill_ctx *ctx, jay_block *block) static ATTRIBUTE_NOINLINE void compute_s_entry(struct spill_ctx *ctx, jay_block *block) { - jay_foreach_predecessor(block, pred) { + jay_foreach_predecessor(block, pred, ctx->file) { U_SPARSE_BITSET_FOREACH_SET(&ctx->blocks[(*pred)->index].S_out, v) { if (u_sparse_bitset_test(&block->live_in, v)) { u_sparse_bitset_set(&ctx->S, v); @@ -731,7 +733,7 @@ global_next_use_distances(struct spill_ctx *ctx, void *memctx) } /* Propagate successor live-in to pred live-out, joining with min */ - jay_foreach_predecessor(block, pred) { + jay_foreach_predecessor(block, pred, ctx->file) { if (minimum_next_uses(&ctx->blocks[(*pred)->index].next_use_out, &sb->next_use_in, ctx->next_uses, &ctx->phi_set)) { @@ -838,7 +840,7 @@ jay_spill(jay_function *func, enum jay_file file, unsigned k) if (block->loop_header) { compute_w_entry_loop_header(&ctx, block); - } else if (jay_num_predecessors(block) /* skip start blocks */) { + } else if (jay_num_predecessors(block, file) /* skip start blocks */) { compute_w_entry(&ctx, block); } @@ -851,7 +853,7 @@ jay_spill(jay_function *func, enum jay_file file, unsigned k) /* Now that all blocks are processed separately, stitch it together */ jay_foreach_block(func, block) { - jay_foreach_predecessor(block, pred) { + jay_foreach_predecessor(block, pred, file) { u_sparse_bitset_clear_all(&ctx.phi_set); insert_coupling_code(&ctx, *pred, block); } diff --git a/src/intel/compiler/jay/jay_validate.c b/src/intel/compiler/jay/jay_validate.c index 78adb2b232c..ff3c07e1316 100644 --- a/src/intel/compiler/jay/jay_validate.c +++ b/src/intel/compiler/jay/jay_validate.c @@ -264,21 +264,30 @@ jay_validate_function(struct validate_state *validate) validate->block = block; validate->I = NULL; - CHECK(block->successors[0] || !block->successors[1]); + CHECK(block->logical_succs[0] || !block->logical_succs[1]); /* Post-RA we can remove physical jumps though they exist logically */ - if (block->successors[1] && !validate->post_ra) { + if (block->logical_succs[1] && !validate->post_ra) { CHECK(jay_block_ending_jump(block) != NULL); } + bool uniform_phi = false; + jay_foreach_phi_src_in_block(block, phi) { + uniform_phi |= jay_is_uniform(phi->src[0]); + } + /* If a block has multiple successors, and one of them has multiple * predecessors, then we've detected a critical edge. */ - if (jay_num_successors(block) > 1 && !validate->post_ra) { - jay_foreach_successor(block, succ) { - if (jay_num_predecessors(succ) > 1) { - chirp(validate, "Critical edge (B%u -> B%u) is not allowed", - block->index, succ->index); + for (enum jay_file file = GPR; file <= (uniform_phi ? UGPR : GPR); + ++file) { + if (jay_num_successors(block, file) > 1 && !validate->post_ra) { + jay_foreach_successor(block, succ, file) { + if (jay_num_predecessors(succ, file) > 1) { + chirp(validate, "%s critical edge (B%u -> B%u)", + file == GPR ? "Logical" : "Physical", block->index, + succ->index); + } } } } diff --git a/src/intel/compiler/jay/jay_validate_ra.c b/src/intel/compiler/jay/jay_validate_ra.c index 99fd6333366..01de82566cf 100644 --- a/src/intel/compiler/jay/jay_validate_ra.c +++ b/src/intel/compiler/jay/jay_validate_ra.c @@ -102,15 +102,15 @@ validate_block(jay_function *func, jay_block *block, struct regfile *blocks) * * dEQP-VK.graphicsfuzz.spv-stable-mergesort-dead-code */ - bool loop_header = block->loop_header && jay_num_predecessors(block) > 1; + bool loop_header = + block->loop_header && jay_num_predecessors(block, GPR) > 1; /* Initialize the register file based on predecessors. */ /* Initialize with the exit state of any one predecessor */ - jay_block *first_pred = jay_first_predecessor(block); - if (first_pred) { - struct regfile *pred_rf = &blocks[first_pred->index]; - - jay_foreach_ssa_file(f) { + jay_foreach_ssa_file(f) { + jay_block *first_pred = jay_first_predecessor(block, f); + if (first_pred) { + struct regfile *pred_rf = &blocks[first_pred->index]; memcpy(rf->r[f], pred_rf->r[f], rf->n[f] * sizeof(uint32_t)); } } @@ -121,10 +121,10 @@ validate_block(jay_function *func, jay_block *block, struct regfile *blocks) * values coming in from each block, it is considered undefined at the * start of the block. */ - jay_foreach_predecessor(block, pred) { - struct regfile *pred_rf = &blocks[(*pred)->index]; + jay_foreach_ssa_file(file) { + jay_foreach_predecessor(block, pred, file) { + struct regfile *pred_rf = &blocks[(*pred)->index]; - jay_foreach_ssa_file(file) { for (unsigned r = 0; r < rf->n[file]; ++r) { if (*reg(rf, file, r) != *reg(pred_rf, file, r)) { *reg(rf, file, r) = 0; diff --git a/src/intel/compiler/jay/test/test-repair-ssa.cpp b/src/intel/compiler/jay/test/test-repair-ssa.cpp index 8d117746eee..96c1c110887 100644 --- a/src/intel/compiler/jay/test/test-repair-ssa.cpp +++ b/src/intel/compiler/jay/test/test-repair-ssa.cpp @@ -90,11 +90,11 @@ TEST_F(RepairSSA, IfElse) jay_block *C = jay_test_block(b->func); jay_block *D = jay_test_block(b->func); - jay_block_add_successor(A, B); - jay_block_add_successor(A, C); + jay_block_add_successor(A, B, UGPR); + jay_block_add_successor(A, C, UGPR); - jay_block_add_successor(B, D); - jay_block_add_successor(C, D); + jay_block_add_successor(B, D, UGPR); + jay_block_add_successor(C, D, UGPR); b->cursor = jay_after_block(A); jay_IF(b); @@ -140,12 +140,12 @@ TEST_F(RepairSSA, Loop) jay_block *D = jay_test_block(b->func); jay_block *E = jay_test_block(b->func); - jay_block_add_successor(H, A); - jay_block_add_successor(A, B); - jay_block_add_successor(A, C); - jay_block_add_successor(B, E); - jay_block_add_successor(C, D); - jay_block_add_successor(D, A); + jay_block_add_successor(H, A, GPR); + jay_block_add_successor(A, B, GPR); + jay_block_add_successor(A, C, GPR); + jay_block_add_successor(B, E, GPR); + jay_block_add_successor(C, D, GPR); + jay_block_add_successor(D, A, GPR); A->loop_header = true; @@ -189,11 +189,11 @@ TEST_F(RepairSSA, TrivialPhisOptimized) jay_block *C = jay_test_block(b->func); jay_block *D = jay_test_block(b->func); - jay_block_add_successor(A, B); - jay_block_add_successor(A, C); + jay_block_add_successor(A, B, UGPR); + jay_block_add_successor(A, C, UGPR); - jay_block_add_successor(B, D); - jay_block_add_successor(C, D); + jay_block_add_successor(B, D, UGPR); + jay_block_add_successor(C, D, UGPR); b->cursor = jay_after_block(A); jay_def x = jay_MOV_u32(b, 0xcafe);