mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 04:58:05 +02:00
jay: introduce a physical control flow graph
Consider:
u0 = foo()
if (divergent) {
u0 = bar()
r0 = baz(u0)
} else {
r0 = quux(u0)
}
Logically, this is fine, there is no interference between bar() and u0. But
physically, both sides of the if execute so the bar() write to u0 overwrites the
variable the else reads. So this is a miscompile.
The solution is to model the extra edges in the physical control flow graph,
which lives next to the existing logical control flow graph. Liveness for UGPRs
now follows the physical CFG, while liveness for GPRs continues to follow the
logical CFG. That models the interference properly, while still allowing phis to
work as before (since phis writing UGPRs follow uniform bits of control flow
that are necessarily critical edge free for the same reason the logical CFG is).
Because our RA copies shuffled registers back at block ends (following
Colombet), there's no issue with live range splits here (unlike aco which
inserts phis for this case and then needs to worry about critical edges around
those phis).
There might still be an extremely-challenging-to-hit bug here with UGPR spilling
which I need to think more about. It might be fine as-is? Not convinced though.
But this is big enough and strictly less broken than what we have right now and
the full solution will build on this, so here we are.
Fixes artefating in SuperTuxKart and Celestia knows what else.
Totals:
Instrs: 2770938 -> 2771269 (+0.01%); split: -0.00%, +0.02%
CodeSize: 40133712 -> 40138480 (+0.01%); split: -0.01%, +0.02%
Totals from 158 (5.97% of 2647) affected shaders:
Instrs: 514523 -> 514854 (+0.06%); split: -0.02%, +0.09%
CodeSize: 7603040 -> 7607808 (+0.06%); split: -0.03%, +0.09%
Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41215>
This commit is contained in:
parent
fadb826515
commit
ad040f2fbb
13 changed files with 206 additions and 112 deletions
|
|
@ -106,10 +106,12 @@ jay_before_function(jay_function *f)
|
|||
* have other predecessorss since there are no critical edges.
|
||||
*/
|
||||
static inline jay_block *
|
||||
jay_edge_to_block(jay_block *pred, jay_block *succ)
|
||||
jay_edge_to_block(jay_block *pred, jay_block *succ, enum jay_file file)
|
||||
{
|
||||
assert(jay_num_successors(pred) == 1 || jay_num_predecessors(succ) == 1);
|
||||
return jay_num_successors(pred) == 1 ? pred : succ;
|
||||
assert(jay_num_successors(pred, file) == 1 ||
|
||||
jay_num_predecessors(succ, file) == 1);
|
||||
|
||||
return jay_num_successors(pred, file) == 1 ? pred : succ;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -118,9 +120,9 @@ jay_edge_to_block(jay_block *pred, jay_block *succ)
|
|||
* flow graph having no critical edges.
|
||||
*/
|
||||
static inline jay_cursor
|
||||
jay_along_edge(jay_block *pred, jay_block *succ)
|
||||
jay_along_edge(jay_block *pred, jay_block *succ, enum jay_file file)
|
||||
{
|
||||
jay_block *to = jay_edge_to_block(pred, succ);
|
||||
jay_block *to = jay_edge_to_block(pred, succ, file);
|
||||
|
||||
if (to == pred)
|
||||
return jay_after_block_logical(pred);
|
||||
|
|
|
|||
|
|
@ -2042,7 +2042,7 @@ jay_emit_jump(struct nir_to_jay_state *nj, nir_jump_instr *instr)
|
|||
{
|
||||
switch (instr->type) {
|
||||
case nir_jump_break:
|
||||
jay_block_add_successor(nj->current_block, nj->break_block);
|
||||
jay_block_add_successor(nj->current_block, nj->break_block, GPR);
|
||||
jay_BREAK(&nj->bld);
|
||||
break;
|
||||
case nir_jump_halt:
|
||||
|
|
@ -2144,14 +2144,22 @@ jay_emit_if(struct nir_to_jay_state *nj, nir_if *nif)
|
|||
/* Pop */
|
||||
--nj->indent;
|
||||
|
||||
jay_block_add_successor(before_block, then_first);
|
||||
jay_block_add_successor(before_block, else_first);
|
||||
bool uniform = jay_is_uniform(condition);
|
||||
|
||||
/* Logical CFG edges */
|
||||
jay_block_add_successor(before_block, then_first, GPR);
|
||||
jay_block_add_successor(before_block, else_first, GPR);
|
||||
|
||||
if (!jay_block_ending_unconditional_jump(then_last))
|
||||
jay_block_add_successor(then_last, after_block);
|
||||
jay_block_add_successor(then_last, after_block, GPR);
|
||||
|
||||
if (!jay_block_ending_unconditional_jump(else_last))
|
||||
jay_block_add_successor(else_last, after_block);
|
||||
jay_block_add_successor(else_last, after_block, GPR);
|
||||
|
||||
/* For a non-uniform IF, we fall through both sides in the physical CFG */
|
||||
if (!uniform) {
|
||||
jay_block_add_successor(then_last, else_first, UGPR);
|
||||
}
|
||||
|
||||
nj->after_block = after_block;
|
||||
|
||||
|
|
@ -2183,7 +2191,7 @@ jay_emit_loop(struct nir_to_jay_state *nj, nir_loop *nloop)
|
|||
loop_header->loop_header = true;
|
||||
|
||||
/* The current block falls through to the start of the loop */
|
||||
jay_block_add_successor(nj->current_block, loop_header);
|
||||
jay_block_add_successor(nj->current_block, loop_header, GPR);
|
||||
|
||||
/* Emit the loop body */
|
||||
nj->after_block = loop_header;
|
||||
|
|
@ -2194,7 +2202,7 @@ jay_emit_loop(struct nir_to_jay_state *nj, nir_loop *nloop)
|
|||
if (jump && jump->op == JAY_OPCODE_BREAK) {
|
||||
jump->op = JAY_OPCODE_LOOP_ONCE;
|
||||
} else {
|
||||
jay_block_add_successor(nj->current_block, loop_header);
|
||||
jay_block_add_successor(nj->current_block, loop_header, GPR);
|
||||
jay_WHILE(b);
|
||||
}
|
||||
|
||||
|
|
@ -2508,9 +2516,9 @@ jay_setup_payload(struct nir_to_jay_state *nj)
|
|||
}
|
||||
|
||||
/*
|
||||
* NIR sometimes contains unreachable blocks (e.g. due to infinite loops). These
|
||||
* blocks have no predecessors, but do have successors and can contribute to
|
||||
* phis. They are dead and violate the IR invariant:
|
||||
* NIR sometimes contains logically unreachable blocks (e.g. due to infinite
|
||||
* loops). These blocks have no predecessors, but do have successors and can
|
||||
* contribute to phis. They are dead and violate the IR invariant:
|
||||
*
|
||||
* Live-in sources are live-out in all predecessors.
|
||||
*
|
||||
|
|
@ -2530,16 +2538,23 @@ jay_remove_unreachable_blocks(jay_function *func)
|
|||
|
||||
jay_foreach_block(func, pred) {
|
||||
if (pred != jay_first_block(func) &&
|
||||
jay_num_predecessors(pred) == 0 &&
|
||||
jay_num_successors(pred) > 0) {
|
||||
jay_num_predecessors(pred, GPR) == 0 &&
|
||||
jay_num_successors(pred, GPR) > 0) {
|
||||
|
||||
jay_foreach_successor(pred, succ) {
|
||||
util_dynarray_delete_unordered(&succ->predecessors, jay_block *,
|
||||
jay_foreach_successor(pred, succ, GPR) {
|
||||
util_dynarray_delete_unordered(&succ->logical_preds, jay_block *,
|
||||
pred);
|
||||
}
|
||||
|
||||
pred->successors[0] = NULL;
|
||||
pred->successors[1] = NULL;
|
||||
jay_foreach_successor(pred, succ, UGPR) {
|
||||
util_dynarray_delete_unordered(&succ->physical_preds,
|
||||
jay_block *, pred);
|
||||
}
|
||||
|
||||
pred->logical_succs[0] = NULL;
|
||||
pred->logical_succs[1] = NULL;
|
||||
pred->physical_succs[0] = NULL;
|
||||
pred->physical_succs[1] = NULL;
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ jay_insert_fp_mode(jay_shader *shader, uint32_t api, uint32_t float_sizes)
|
|||
}
|
||||
|
||||
/* Restore to global state on block boundaries */
|
||||
if (jay_num_successors(block) > 0) {
|
||||
if (jay_num_successors(block, GPR) > 0) {
|
||||
set_cr0(func, jay_after_block(block), ¤t, cr0);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -443,16 +443,19 @@ jay_abs(jay_def src)
|
|||
return src;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
jay_file_is_uniform(enum jay_file file)
|
||||
{
|
||||
return file == UGPR || file == UFLAG || file == UACCUM || file == J_IMM;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the given source reads the same value in all lanes.
|
||||
*/
|
||||
static inline bool
|
||||
jay_is_uniform(jay_def d)
|
||||
{
|
||||
return d.file == UGPR ||
|
||||
d.file == UFLAG ||
|
||||
d.file == UACCUM ||
|
||||
jay_is_imm(d);
|
||||
return jay_file_is_uniform(d.file);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1077,8 +1080,8 @@ typedef struct jay_block {
|
|||
struct list_head instructions;
|
||||
|
||||
/** Control flow graph */
|
||||
struct jay_block *successors[2];
|
||||
struct util_dynarray predecessors;
|
||||
struct jay_block *logical_succs[2], *physical_succs[2];
|
||||
struct util_dynarray logical_preds, physical_preds;
|
||||
|
||||
/** Index of the block in source order */
|
||||
unsigned index;
|
||||
|
|
@ -1112,7 +1115,8 @@ jay_new_block(jay_function *f)
|
|||
{
|
||||
jay_block *block = rzalloc(f, jay_block);
|
||||
|
||||
util_dynarray_init(&block->predecessors, block);
|
||||
util_dynarray_init(&block->logical_preds, block);
|
||||
util_dynarray_init(&block->physical_preds, block);
|
||||
list_inithead(&block->instructions);
|
||||
|
||||
block->index = f->num_blocks++;
|
||||
|
|
@ -1137,26 +1141,42 @@ jay_block_ending_jump(jay_block *block)
|
|||
return last && jay_op_is_control_flow(last->op) ? last : NULL;
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
jay_num_predecessors(jay_block *block)
|
||||
static inline struct util_dynarray *
|
||||
jay_predecessors(jay_block *blk, enum jay_file file)
|
||||
{
|
||||
return util_dynarray_num_elements(&block->predecessors, jay_block *);
|
||||
return jay_file_is_uniform(file) ? &blk->physical_preds :
|
||||
&blk->logical_preds;
|
||||
}
|
||||
|
||||
static inline jay_block **
|
||||
jay_successors(jay_block *blk, enum jay_file file)
|
||||
{
|
||||
return jay_file_is_uniform(file) ? blk->physical_succs : blk->logical_succs;
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
jay_num_successors(jay_block *block)
|
||||
jay_num_predecessors(jay_block *blk, enum jay_file file)
|
||||
{
|
||||
static_assert(ARRAY_SIZE(block->successors) == 2);
|
||||
return !!block->successors[0] + !!block->successors[1];
|
||||
return util_dynarray_num_elements(jay_predecessors(blk, file), jay_block *);
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
jay_num_successors(jay_block *block, enum jay_file file)
|
||||
{
|
||||
static_assert(ARRAY_SIZE(block->logical_succs) == 2);
|
||||
static_assert(ARRAY_SIZE(block->physical_succs) == 2);
|
||||
|
||||
return !!jay_successors(block, file)[0] + !!jay_successors(block, file)[1];
|
||||
}
|
||||
|
||||
static inline jay_block *
|
||||
jay_first_predecessor(jay_block *block)
|
||||
jay_first_predecessor(jay_block *block, enum jay_file file)
|
||||
{
|
||||
if (jay_num_predecessors(block) == 0)
|
||||
if (jay_num_predecessors(block, file) == 0)
|
||||
return NULL;
|
||||
|
||||
return *util_dynarray_element(&block->predecessors, struct jay_block *, 0);
|
||||
return *util_dynarray_element(jay_predecessors(block, file),
|
||||
struct jay_block *, 0);
|
||||
}
|
||||
|
||||
/* Block worklist helpers */
|
||||
|
|
@ -1230,14 +1250,23 @@ jay_first_predecessor(jay_block *block)
|
|||
jay_foreach_function(s, func) \
|
||||
jay_foreach_inst_in_func_safe(func, v_block, inst)
|
||||
|
||||
#define jay_foreach_successor(blk, v) \
|
||||
jay_block *v; \
|
||||
jay_block **_v; \
|
||||
for (_v = (jay_block **) &blk->successors[0], v = *_v; \
|
||||
v != NULL && _v < (jay_block **) &blk->successors[2]; _v++, v = *_v)
|
||||
/*
|
||||
* Get the next successor, using the fact that there are at most 2 successors
|
||||
* and NULL successors cannot precede non-NULL successors.
|
||||
*/
|
||||
static inline jay_block *
|
||||
jay_next_successor(jay_block *parent, enum jay_file file, jay_block *it)
|
||||
{
|
||||
jay_block **succs = jay_successors(parent, file);
|
||||
return succs[0] == it ? succs[1] : NULL;
|
||||
}
|
||||
|
||||
#define jay_foreach_predecessor(blk, v) \
|
||||
util_dynarray_foreach(&blk->predecessors, jay_block *, v)
|
||||
#define jay_foreach_successor(blk, v, file) \
|
||||
for (jay_block *v = jay_successors(blk, file)[0]; v != NULL; \
|
||||
v = jay_next_successor(blk, file, v))
|
||||
|
||||
#define jay_foreach_predecessor(blk, v, file) \
|
||||
util_dynarray_foreach(jay_predecessors(blk, file), jay_block *, v)
|
||||
|
||||
#define jay_foreach_src(inst, s) for (unsigned s = 0; s < inst->num_srcs; ++s)
|
||||
|
||||
|
|
@ -1343,15 +1372,21 @@ jay_next_block(jay_block *block)
|
|||
}
|
||||
|
||||
static inline void
|
||||
jay_block_add_successor(jay_block *block, jay_block *succ)
|
||||
jay_block_add_successor(jay_block *block, jay_block *succ, enum jay_file file)
|
||||
{
|
||||
unsigned i = block->successors[0] ? 1 : 0;
|
||||
jay_block **succs = jay_successors(block, file);
|
||||
unsigned i = succs[0] ? 1 : 0;
|
||||
|
||||
assert(succ && block->successors[0] != succ && block->successors[1] != succ);
|
||||
assert(block->successors[i] == NULL && "at most 2 successors");
|
||||
assert(succ && succs[0] != succ && succs[1] != succ);
|
||||
assert(succs[i] == NULL && "at most 2 successors");
|
||||
|
||||
block->successors[i] = succ;
|
||||
util_dynarray_append(&(succ->predecessors), block);
|
||||
succs[i] = succ;
|
||||
util_dynarray_append(jay_predecessors(succ, file), block);
|
||||
|
||||
/* All logical CFG edges are also physical CFG edges */
|
||||
if (file == GPR) {
|
||||
jay_block_add_successor(block, succ, UGPR);
|
||||
}
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
|
|
|
|||
|
|
@ -66,6 +66,7 @@ jay_compute_liveness(jay_function *f)
|
|||
|
||||
ralloc_free(f->dead_defs);
|
||||
f->dead_defs = BITSET_RZALLOC(f, f->ssa_alloc);
|
||||
BITSET_WORD *uniform = BITSET_CALLOC(f->ssa_alloc);
|
||||
|
||||
jay_foreach_block(f, block) {
|
||||
u_sparse_bitset_free(&block->live_in);
|
||||
|
|
@ -77,6 +78,14 @@ jay_compute_liveness(jay_function *f)
|
|||
jay_worklist_push_head(&worklist, block);
|
||||
}
|
||||
|
||||
jay_foreach_inst_in_func(f, _, I) {
|
||||
jay_foreach_dst_index(I, dst, index) {
|
||||
if (jay_is_uniform(dst)) {
|
||||
BITSET_SET(uniform, index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (!u_worklist_is_empty(&worklist)) {
|
||||
/* Pop in reverse order since liveness is a backwards pass */
|
||||
jay_block *block = jay_worklist_pop_head(&worklist);
|
||||
|
|
@ -94,9 +103,20 @@ jay_compute_liveness(jay_function *f)
|
|||
/* Propagate block->live_in[] to the live_out[] of predecessors. Since
|
||||
* phis are split, they are handled naturally without special cases.
|
||||
*/
|
||||
jay_foreach_predecessor(block, p) {
|
||||
if (u_sparse_bitset_merge(&(*p)->live_out, &block->live_in)) {
|
||||
jay_worklist_push_tail(&worklist, *p);
|
||||
for (enum jay_file file = GPR; file <= UGPR; ++file) {
|
||||
jay_foreach_predecessor(block, p, file) {
|
||||
bool progress = false;
|
||||
|
||||
U_SPARSE_BITSET_FOREACH_SET(&block->live_in, i) {
|
||||
if ((file == UGPR) == BITSET_TEST(uniform, i)) {
|
||||
progress |= !u_sparse_bitset_test(&(*p)->live_out, i);
|
||||
u_sparse_bitset_set(&(*p)->live_out, i);
|
||||
}
|
||||
}
|
||||
|
||||
if (progress) {
|
||||
jay_worklist_push_tail(&worklist, *p);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -110,6 +130,7 @@ jay_compute_liveness(jay_function *f)
|
|||
#endif
|
||||
|
||||
u_worklist_fini(&worklist);
|
||||
free(uniform);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -138,7 +138,7 @@ jay_lower_spill(jay_function *func)
|
|||
}
|
||||
|
||||
/* Canonicalize our internal registers at block boundaries */
|
||||
if (jay_num_successors(block) > 0) {
|
||||
if (jay_num_successors(block, GPR) > 0) {
|
||||
if (!address_valid) {
|
||||
jay_SHR(&b, JAY_TYPE_U32, ADDRESS_REG, surf, 4);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -44,11 +44,11 @@ opt_predicate(jay_function *f, jay_block *block)
|
|||
|
||||
/* If's fallthrough to the then */
|
||||
jay_block *then_block = jay_next_block(block);
|
||||
assert(block->successors[0] == then_block && "successors for if");
|
||||
assert(block->logical_succs[0] == then_block && "successors for if");
|
||||
|
||||
/* We're searching for a single block then, so the next block is else */
|
||||
jay_block *else_block = jay_next_block(then_block);
|
||||
if (block->successors[1] != else_block ||
|
||||
if (block->logical_succs[1] != else_block ||
|
||||
list_length(&then_block->instructions) > 3 ||
|
||||
!list_is_singular(&else_block->instructions))
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -249,10 +249,15 @@ jay_print_block(FILE *fp, jay_block *block)
|
|||
fprintf(fp, "B%d%s%s", block->index, block->uniform ? " [uniform]" : "",
|
||||
block->loop_header ? " [loop header]" : "");
|
||||
bool first = true;
|
||||
jay_foreach_predecessor(block, p) {
|
||||
jay_foreach_predecessor(block, p, GPR) {
|
||||
fprintf(fp, "%s B%d", first ? " <-" : "", (*p)->index);
|
||||
first = false;
|
||||
}
|
||||
first = true;
|
||||
jay_foreach_predecessor(block, p, UGPR) {
|
||||
fprintf(fp, "%s B%d", first ? " <=" : "", (*p)->index);
|
||||
first = false;
|
||||
}
|
||||
fprintf(fp, " {\n");
|
||||
|
||||
/* We group phi destinations/sources for legibility */
|
||||
|
|
@ -281,11 +286,14 @@ jay_print_block(FILE *fp, jay_block *block)
|
|||
indent(fp, block, false);
|
||||
fprintf(fp, "}");
|
||||
first = true;
|
||||
jay_foreach_successor(block, succ) {
|
||||
if (succ) {
|
||||
fprintf(fp, "%s B%d", first ? " ->" : "", succ->index);
|
||||
first = false;
|
||||
}
|
||||
jay_foreach_successor(block, succ, GPR) {
|
||||
fprintf(fp, "%s B%d", first ? " ->" : "", succ->index);
|
||||
first = false;
|
||||
}
|
||||
first = true;
|
||||
jay_foreach_successor(block, succ, UGPR) {
|
||||
fprintf(fp, "%s B%d", first ? " =>" : "", succ->index);
|
||||
first = false;
|
||||
}
|
||||
fprintf(fp, "\n\n");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -66,7 +66,8 @@ static bool
|
|||
try_remove_trivial_phi(struct ctx *ctx, struct phi *phi)
|
||||
{
|
||||
unsigned same = 0;
|
||||
for (unsigned i = 0; i < jay_num_predecessors(phi->block); ++i) {
|
||||
for (unsigned i = 0; i < jay_num_predecessors(phi->block, phi->old.file);
|
||||
++i) {
|
||||
unsigned src = remap_idx(ctx, phi->src[i]);
|
||||
if (same && src != same && src != phi->dst) {
|
||||
/* Nontrivial */
|
||||
|
|
@ -86,9 +87,9 @@ try_remove_trivial_phi(struct ctx *ctx, struct phi *phi)
|
|||
static void
|
||||
add_phi(struct ctx *ctx, jay_block *block, jay_def src, unsigned dst)
|
||||
{
|
||||
unsigned i = 0, n = jay_num_predecessors(block);
|
||||
unsigned i = 0, n = jay_num_predecessors(block, src.file);
|
||||
unsigned *srcs = linear_alloc_array(ctx->linctx, unsigned, n);
|
||||
jay_foreach_predecessor(block, pred) {
|
||||
jay_foreach_predecessor(block, pred, src.file) {
|
||||
assert(i < n);
|
||||
srcs[i++] = lookup(ctx, *pred, src);
|
||||
}
|
||||
|
|
@ -110,9 +111,10 @@ lookup(struct ctx *ctx, jay_block *block, jay_def def)
|
|||
}
|
||||
|
||||
/* For a single predecessor, we can recurse without adding a phi. */
|
||||
bool insert_phi = jay_num_predecessors(block) > 1;
|
||||
unsigned val = insert_phi ? ctx->alloc++ :
|
||||
lookup(ctx, jay_first_predecessor(block), def);
|
||||
bool insert_phi = jay_num_predecessors(block, def.file) > 1;
|
||||
unsigned val = insert_phi ?
|
||||
ctx->alloc++ :
|
||||
lookup(ctx, jay_first_predecessor(block, def.file), def);
|
||||
|
||||
_mesa_hash_table_u64_insert(ctx->defs[block->index], jay_index(def),
|
||||
(void *) (uintptr_t) val);
|
||||
|
|
@ -190,7 +192,7 @@ jay_repair_ssa(jay_function *func)
|
|||
}
|
||||
|
||||
/* Seal loop headers after processing the back edge */
|
||||
jay_foreach_successor(block, succ) {
|
||||
jay_foreach_successor(block, succ, GPR) {
|
||||
if (succ->loop_header && succ->index <= block->index) {
|
||||
util_dynarray_foreach(&ctx.incomplete_phis[succ->index],
|
||||
struct incomplete_phi, el) {
|
||||
|
|
@ -235,7 +237,7 @@ jay_repair_ssa(jay_function *func)
|
|||
jay_PHI_DST(&b, jay_scalar(phi->old.file, phi->dst));
|
||||
|
||||
unsigned i = 0;
|
||||
jay_foreach_predecessor(phi->block, pred) {
|
||||
jay_foreach_predecessor(phi->block, pred, phi->old.file) {
|
||||
b.cursor = jay_before_jump(*pred);
|
||||
unsigned idx = remap_idx(&ctx, phi->src[i++]);
|
||||
jay_PHI_SRC_u32(&b, jay_scalar(phi->old.file, idx), phi->dst);
|
||||
|
|
|
|||
|
|
@ -352,7 +352,7 @@ insert_coupling_code(struct spill_ctx *ctx, jay_block *pred, jay_block *succ)
|
|||
/* Anything assumed to be spilled in succ must be spilled along all edges. */
|
||||
U_SPARSE_BITSET_FOREACH_SET(&ss->S_in, v) {
|
||||
if (!u_sparse_bitset_test(&sp->S_out, v)) {
|
||||
b.cursor = jay_along_edge(pred, succ);
|
||||
b.cursor = jay_along_edge(pred, succ, GPR /* XXX */);
|
||||
insert_spill(&b, ctx, v);
|
||||
}
|
||||
}
|
||||
|
|
@ -372,8 +372,9 @@ insert_coupling_code(struct spill_ctx *ctx, jay_block *pred, jay_block *succ)
|
|||
!u_sparse_bitset_test(&sp->W_out, src)) {
|
||||
|
||||
/* Fill the phi source in the predecessor */
|
||||
jay_block *reload_block = jay_edge_to_block(pred, succ);
|
||||
insert_reload(ctx, reload_block, jay_along_edge(pred, succ), src);
|
||||
jay_block *reload_block = jay_edge_to_block(pred, succ, ctx->file);
|
||||
insert_reload(ctx, reload_block, jay_along_edge(pred, succ, ctx->file),
|
||||
src);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -385,8 +386,9 @@ insert_coupling_code(struct spill_ctx *ctx, jay_block *pred, jay_block *succ)
|
|||
if (!u_sparse_bitset_test(&sp->W_out, v) &&
|
||||
!u_sparse_bitset_test(&ctx->phi_set, v)) {
|
||||
|
||||
jay_block *reload_block = jay_edge_to_block(pred, succ);
|
||||
insert_reload(ctx, reload_block, jay_along_edge(pred, succ), v);
|
||||
jay_block *reload_block = jay_edge_to_block(pred, succ, GPR /* XXX */);
|
||||
insert_reload(ctx, reload_block,
|
||||
jay_along_edge(pred, succ, GPR /* XXX */), v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -598,7 +600,7 @@ compute_w_entry(struct spill_ctx *ctx, jay_block *block)
|
|||
U_SPARSE_BITSET_FOREACH_SET(&ctx->N, i) {
|
||||
bool all = true, any = false;
|
||||
|
||||
jay_foreach_predecessor(block, P) {
|
||||
jay_foreach_predecessor(block, P, ctx->file) {
|
||||
bool in = u_sparse_bitset_test(&ctx->blocks[(*P)->index].W_out, i);
|
||||
all &= in;
|
||||
any |= in;
|
||||
|
|
@ -612,7 +614,7 @@ compute_w_entry(struct spill_ctx *ctx, jay_block *block)
|
|||
}
|
||||
}
|
||||
|
||||
jay_foreach_predecessor(block, pred) {
|
||||
jay_foreach_predecessor(block, pred, ctx->file) {
|
||||
jay_foreach_phi_src_in_block(*pred, I) {
|
||||
if (!u_sparse_bitset_test(&ctx->blocks[(*pred)->index].W_out,
|
||||
jay_index(I->src[0]))) {
|
||||
|
|
@ -655,7 +657,7 @@ compute_w_entry(struct spill_ctx *ctx, jay_block *block)
|
|||
static ATTRIBUTE_NOINLINE void
|
||||
compute_s_entry(struct spill_ctx *ctx, jay_block *block)
|
||||
{
|
||||
jay_foreach_predecessor(block, pred) {
|
||||
jay_foreach_predecessor(block, pred, ctx->file) {
|
||||
U_SPARSE_BITSET_FOREACH_SET(&ctx->blocks[(*pred)->index].S_out, v) {
|
||||
if (u_sparse_bitset_test(&block->live_in, v)) {
|
||||
u_sparse_bitset_set(&ctx->S, v);
|
||||
|
|
@ -731,7 +733,7 @@ global_next_use_distances(struct spill_ctx *ctx, void *memctx)
|
|||
}
|
||||
|
||||
/* Propagate successor live-in to pred live-out, joining with min */
|
||||
jay_foreach_predecessor(block, pred) {
|
||||
jay_foreach_predecessor(block, pred, ctx->file) {
|
||||
if (minimum_next_uses(&ctx->blocks[(*pred)->index].next_use_out,
|
||||
&sb->next_use_in, ctx->next_uses,
|
||||
&ctx->phi_set)) {
|
||||
|
|
@ -838,7 +840,7 @@ jay_spill(jay_function *func, enum jay_file file, unsigned k)
|
|||
|
||||
if (block->loop_header) {
|
||||
compute_w_entry_loop_header(&ctx, block);
|
||||
} else if (jay_num_predecessors(block) /* skip start blocks */) {
|
||||
} else if (jay_num_predecessors(block, file) /* skip start blocks */) {
|
||||
compute_w_entry(&ctx, block);
|
||||
}
|
||||
|
||||
|
|
@ -851,7 +853,7 @@ jay_spill(jay_function *func, enum jay_file file, unsigned k)
|
|||
|
||||
/* Now that all blocks are processed separately, stitch it together */
|
||||
jay_foreach_block(func, block) {
|
||||
jay_foreach_predecessor(block, pred) {
|
||||
jay_foreach_predecessor(block, pred, file) {
|
||||
u_sparse_bitset_clear_all(&ctx.phi_set);
|
||||
insert_coupling_code(&ctx, *pred, block);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -264,21 +264,30 @@ jay_validate_function(struct validate_state *validate)
|
|||
validate->block = block;
|
||||
validate->I = NULL;
|
||||
|
||||
CHECK(block->successors[0] || !block->successors[1]);
|
||||
CHECK(block->logical_succs[0] || !block->logical_succs[1]);
|
||||
|
||||
/* Post-RA we can remove physical jumps though they exist logically */
|
||||
if (block->successors[1] && !validate->post_ra) {
|
||||
if (block->logical_succs[1] && !validate->post_ra) {
|
||||
CHECK(jay_block_ending_jump(block) != NULL);
|
||||
}
|
||||
|
||||
bool uniform_phi = false;
|
||||
jay_foreach_phi_src_in_block(block, phi) {
|
||||
uniform_phi |= jay_is_uniform(phi->src[0]);
|
||||
}
|
||||
|
||||
/* If a block has multiple successors, and one of them has multiple
|
||||
* predecessors, then we've detected a critical edge.
|
||||
*/
|
||||
if (jay_num_successors(block) > 1 && !validate->post_ra) {
|
||||
jay_foreach_successor(block, succ) {
|
||||
if (jay_num_predecessors(succ) > 1) {
|
||||
chirp(validate, "Critical edge (B%u -> B%u) is not allowed",
|
||||
block->index, succ->index);
|
||||
for (enum jay_file file = GPR; file <= (uniform_phi ? UGPR : GPR);
|
||||
++file) {
|
||||
if (jay_num_successors(block, file) > 1 && !validate->post_ra) {
|
||||
jay_foreach_successor(block, succ, file) {
|
||||
if (jay_num_predecessors(succ, file) > 1) {
|
||||
chirp(validate, "%s critical edge (B%u -> B%u)",
|
||||
file == GPR ? "Logical" : "Physical", block->index,
|
||||
succ->index);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -102,15 +102,15 @@ validate_block(jay_function *func, jay_block *block, struct regfile *blocks)
|
|||
*
|
||||
* dEQP-VK.graphicsfuzz.spv-stable-mergesort-dead-code
|
||||
*/
|
||||
bool loop_header = block->loop_header && jay_num_predecessors(block) > 1;
|
||||
bool loop_header =
|
||||
block->loop_header && jay_num_predecessors(block, GPR) > 1;
|
||||
|
||||
/* Initialize the register file based on predecessors. */
|
||||
/* Initialize with the exit state of any one predecessor */
|
||||
jay_block *first_pred = jay_first_predecessor(block);
|
||||
if (first_pred) {
|
||||
struct regfile *pred_rf = &blocks[first_pred->index];
|
||||
|
||||
jay_foreach_ssa_file(f) {
|
||||
jay_foreach_ssa_file(f) {
|
||||
jay_block *first_pred = jay_first_predecessor(block, f);
|
||||
if (first_pred) {
|
||||
struct regfile *pred_rf = &blocks[first_pred->index];
|
||||
memcpy(rf->r[f], pred_rf->r[f], rf->n[f] * sizeof(uint32_t));
|
||||
}
|
||||
}
|
||||
|
|
@ -121,10 +121,10 @@ validate_block(jay_function *func, jay_block *block, struct regfile *blocks)
|
|||
* values coming in from each block, it is considered undefined at the
|
||||
* start of the block.
|
||||
*/
|
||||
jay_foreach_predecessor(block, pred) {
|
||||
struct regfile *pred_rf = &blocks[(*pred)->index];
|
||||
jay_foreach_ssa_file(file) {
|
||||
jay_foreach_predecessor(block, pred, file) {
|
||||
struct regfile *pred_rf = &blocks[(*pred)->index];
|
||||
|
||||
jay_foreach_ssa_file(file) {
|
||||
for (unsigned r = 0; r < rf->n[file]; ++r) {
|
||||
if (*reg(rf, file, r) != *reg(pred_rf, file, r)) {
|
||||
*reg(rf, file, r) = 0;
|
||||
|
|
|
|||
|
|
@ -90,11 +90,11 @@ TEST_F(RepairSSA, IfElse)
|
|||
jay_block *C = jay_test_block(b->func);
|
||||
jay_block *D = jay_test_block(b->func);
|
||||
|
||||
jay_block_add_successor(A, B);
|
||||
jay_block_add_successor(A, C);
|
||||
jay_block_add_successor(A, B, UGPR);
|
||||
jay_block_add_successor(A, C, UGPR);
|
||||
|
||||
jay_block_add_successor(B, D);
|
||||
jay_block_add_successor(C, D);
|
||||
jay_block_add_successor(B, D, UGPR);
|
||||
jay_block_add_successor(C, D, UGPR);
|
||||
|
||||
b->cursor = jay_after_block(A);
|
||||
jay_IF(b);
|
||||
|
|
@ -140,12 +140,12 @@ TEST_F(RepairSSA, Loop)
|
|||
jay_block *D = jay_test_block(b->func);
|
||||
jay_block *E = jay_test_block(b->func);
|
||||
|
||||
jay_block_add_successor(H, A);
|
||||
jay_block_add_successor(A, B);
|
||||
jay_block_add_successor(A, C);
|
||||
jay_block_add_successor(B, E);
|
||||
jay_block_add_successor(C, D);
|
||||
jay_block_add_successor(D, A);
|
||||
jay_block_add_successor(H, A, GPR);
|
||||
jay_block_add_successor(A, B, GPR);
|
||||
jay_block_add_successor(A, C, GPR);
|
||||
jay_block_add_successor(B, E, GPR);
|
||||
jay_block_add_successor(C, D, GPR);
|
||||
jay_block_add_successor(D, A, GPR);
|
||||
|
||||
A->loop_header = true;
|
||||
|
||||
|
|
@ -189,11 +189,11 @@ TEST_F(RepairSSA, TrivialPhisOptimized)
|
|||
jay_block *C = jay_test_block(b->func);
|
||||
jay_block *D = jay_test_block(b->func);
|
||||
|
||||
jay_block_add_successor(A, B);
|
||||
jay_block_add_successor(A, C);
|
||||
jay_block_add_successor(A, B, UGPR);
|
||||
jay_block_add_successor(A, C, UGPR);
|
||||
|
||||
jay_block_add_successor(B, D);
|
||||
jay_block_add_successor(C, D);
|
||||
jay_block_add_successor(B, D, UGPR);
|
||||
jay_block_add_successor(C, D, UGPR);
|
||||
|
||||
b->cursor = jay_after_block(A);
|
||||
jay_def x = jay_MOV_u32(b, 0xcafe);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue