nv50: build proper phi functions in the first place

This commit is contained in:
Christoph Bumiller 2010-08-05 00:11:56 +02:00
parent 720e0c430d
commit aaa8802a22
5 changed files with 167 additions and 148 deletions

View file

@ -394,7 +394,7 @@ nv_nvi_delete(struct nv_instruction *nvi)
struct nv_basic_block *b = nvi->bb;
int j;
debug_printf("REM: "); nv_print_instruction(nvi);
/* debug_printf("REM: "); nv_print_instruction(nvi); */
for (j = 0; j < 5; ++j)
nv_reference(NULL, &nvi->src[j], NULL);
@ -477,5 +477,40 @@ nvbb_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d)
for (j = 0; j < b->num_in; ++j)
n += nvbb_dominated_by(b->in[j], d);
return n && (n == b->num_in);
return (n && (n == b->num_in)) ? 1 : 0;
}
/* check if bf (future) can be reached from bp (past) */
boolean
nvbb_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
struct nv_basic_block *bt)
{
if (bf == bp)
return TRUE;
if (bp == bt)
return FALSE;
if (bp->out[0] && bp->out[0] != bp &&
nvbb_reachable_by(bf, bp->out[0], bt))
return TRUE;
if (bp->out[1] && bp->out[1] != bp &&
nvbb_reachable_by(bf, bp->out[1], bt))
return TRUE;
return FALSE;
}
struct nv_basic_block *
nvbb_dom_frontier(struct nv_basic_block *b)
{
struct nv_basic_block *df = b->out[0];
assert(df);
while (nvbb_dominated_by(df, b) ||
(!nvbb_dominated_by(df->in[0], b) &&
(!df->in[1] || !nvbb_dominated_by(df->in[1], b)))) {
df = df->out[0];
assert(df);
}
assert(df);
return df;
}

View file

@ -426,6 +426,9 @@ void nv_nvi_delete(struct nv_instruction *);
void nv_nvi_permute(struct nv_instruction *, struct nv_instruction *);
void nvbb_attach_block(struct nv_basic_block *parent, struct nv_basic_block *);
int nvbb_dominated_by(struct nv_basic_block *, struct nv_basic_block *);
boolean nvbb_reachable_by(struct nv_basic_block *, struct nv_basic_block *,
struct nv_basic_block *);
struct nv_basic_block *nvbb_dom_frontier(struct nv_basic_block *);
int nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val,
struct nv_value *new_val);

View file

@ -771,6 +771,10 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
if (ik->src[4] || ir->src[4])
continue; /* don't mess with address registers */
if (ik->flags_src || ir->flags_src ||
ik->flags_def || ir->flags_def)
continue; /* and also not with flags, for now */
for (s = 0; s < 3; ++s) {
struct nv_value *a, *b;

View file

@ -43,25 +43,6 @@ struct nv_pc_pass {
uint pass_seq;
};
/* check if bf (future) can be reached from bp (past) */
static boolean
bb_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
struct nv_basic_block *bt)
{
if (bf == bp)
return TRUE;
if (bp == bt)
return FALSE;
if (bp->out[0] && bp->out[0] != bp &&
bb_reachable_by(bf, bp->out[0], bt))
return TRUE;
if (bp->out[1] && bp->out[1] != bp &&
bb_reachable_by(bf, bp->out[1], bt))
return TRUE;
return FALSE;
}
static void
ranges_coalesce(struct nv_range *range)
{
@ -377,32 +358,13 @@ try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
do_join_values(ctx, a, b);
}
/* For phi functions with sources from blocks that are not direct predecessors,
* if such a source is to be used in an earlier predecessor, we need to add an
* additional phi function. Used when inserting the MOVs below.
*/
static struct nv_value *
propagate_phi(struct nv_pc *pc, struct nv_instruction *phi, int s)
{
struct nv_basic_block *b = pc->current_block;
struct nv_value *val = phi->src[s]->value;
struct nv_instruction *nvi = new_instruction(pc, NV_OP_PHI);
int i, k;
(nvi->def[0] = new_value(pc, val->reg.file, val->reg.type))->insn = nvi;
for (k = 0, i = 0; i < 4 && phi->src[i]; ++i) {
if (bb_reachable_by(b, phi->src[i]->value->insn->bb, b))
nvi->src[k++] = new_ref(pc, phi->src[i]->value);
}
return nvi->def[0];
}
/* For IF blocks without ELSE blocks, insert an empty block for the MOVs.
* Insert additional PHIs for cases where a direct MOV wouldn't be valid.
/* For each operand of each PHI in b, generate a new value by inserting a MOV
* at the end of the block it is coming from and replace the operand with its
* result. This eliminates liveness conflicts and enables us to let values be
* copied to the right register if such a conflict exists nonetheless.
*/
static int
pass_generate_phi_movs_1(struct nv_pc_pass *ctx, struct nv_basic_block *b)
pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
{
struct nv_instruction *i, *ni;
struct nv_value *val;
@ -426,31 +388,36 @@ pass_generate_phi_movs_1(struct nv_pc_pass *ctx, struct nv_basic_block *b)
if (p->exit->target == b) /* target to new else-block */
p->exit->target = pn;
for (j = 0; j < b->num_in; ++j) {
if (b->in[j] == p) {
b->in[j] = pn;
break;
}
}
b->in[n] = pn;
pn->out[0] = b;
pn->in[0] = p;
pn->num_in = 1;
}
ctx->pc->current_block = pn;
for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
for (j = 0; j < 4 && i->src[j]; ++j) {
if (bb_reachable_by(pn, i->src[j]->value->insn->bb, b))
if (nvbb_reachable_by(p, i->src[j]->value->insn->bb, b))
break;
}
if (j >= 4 || !i->src[j])
continue;
val = i->src[j]->value;
if (!nvbb_dominated_by(pn, val->insn->bb))
nv_reference(ctx->pc, &i->src[j], propagate_phi(ctx->pc, i, j));
ni = new_instruction(ctx->pc, NV_OP_MOV);
/* TODO: insert instruction at correct position in the first place */
if (ni->prev && ni->prev->target)
nv_nvi_permute(ni->prev, ni);
ni->def[0] = new_value(ctx->pc, val->reg.file, val->reg.type);
ni->def[0]->insn = ni;
ni->src[0] = new_ref(ctx->pc, val);
nv_reference(ctx->pc, &i->src[j], ni->def[0]);
}
if (pn != p && pn->exit) {
ctx->pc->current_block = b->in[n ? 0 : 1];
ni = new_instruction(ctx->pc, NV_OP_BRA);
@ -461,70 +428,11 @@ pass_generate_phi_movs_1(struct nv_pc_pass *ctx, struct nv_basic_block *b)
for (j = 0; j < 2; ++j)
if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq)
pass_generate_phi_movs_1(ctx, b->out[j]);
pass_generate_phi_movs(ctx, b->out[j]);
return 0;
}
/* Now everything should be in order and we can insert the MOVs. */
static int
pass_generate_phi_movs_2(struct nv_pc_pass *ctx, struct nv_basic_block *b)
{
struct nv_instruction *i, *mov;
struct nv_value *val;
struct nv_basic_block *p;
int n, j;
b->pass_seq = ctx->pc->pass_seq;
for (n = 0; n < b->num_in; ++n) {
ctx->pc->current_block = p = b->in[n];
for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
for (j = 0; j < 4 && i->src[j]; ++j) {
if (bb_reachable_by(p, i->src[j]->value->insn->bb, b))
break;
}
if (j >= 4 || !i->src[j])
continue;
val = i->src[j]->value;
mov = new_instruction(ctx->pc, NV_OP_MOV);
/* TODO: insert instruction at correct position in the first place */
if (mov->prev && mov->prev->target)
nv_nvi_permute(mov->prev, mov);
mov->def[0] = new_value(ctx->pc, val->reg.file, val->reg.type);
mov->def[0]->insn = mov;
mov->src[0] = new_ref(ctx->pc, val);
nv_reference(ctx->pc, &i->src[j], mov->def[0]);
}
}
for (j = 1; j >= 0; --j) /* different order for the sake of diversity */
if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq)
pass_generate_phi_movs_2(ctx, b->out[j]);
return 0;
}
/* For each operand of each PHI in b, generate a new value by inserting a MOV
* at the end of the block it is coming from and replace the operand with its
* result. This eliminates liveness conflicts and enables us to let values be
* copied to the right register if such a conflict exists nonetheless.
*/
static INLINE int
pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
{
if (pass_generate_phi_movs_1(ctx, b))
return 1;
++ctx->pc->pass_seq;
return pass_generate_phi_movs_2(ctx, b);
}
static int
pass_join_values(struct nv_pc_pass *ctx, int iter)
{
@ -688,7 +596,7 @@ pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)
break;
assert(i->src[j]->value->insn);
if (bb_reachable_by(b, i->src[j]->value->insn->bb, b->out[n])) {
if (nvbb_reachable_by(b, i->src[j]->value->insn->bb, b->out[n])) {
live_set_add(b, i->src[j]->value);
debug_printf("BB:%i liveset + %i\n", b->id, i->src[j]->value->n);
} else {
@ -774,7 +682,7 @@ pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b)
if (!i->src[s])
break;
assert(i->src[s]->value->insn);
if (bb_reachable_by(b, i->src[s]->value->insn->bb, b->out[j]))
if (nvbb_reachable_by(b, i->src[s]->value->insn->bb, b->out[j]))
live_set_add(b, i->src[s]->value);
else
live_set_rem(b, i->src[s]->value);
@ -978,7 +886,7 @@ nv_pc_exec_pass1(struct nv_pc *pc)
nv_print_program(ctx->pc->root);
ctx->insns = CALLOC(pc->num_instructions, sizeof(struct nv_instruction *));
ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *));
pc->pass_seq++;
ret = pass_generate_phi_movs(ctx, pc->root);

View file

@ -51,16 +51,22 @@ struct bld_value_stack {
};
static INLINE void
bld_push_value(struct bld_value_stack *stk)
bld_vals_push_val(struct bld_value_stack *stk, struct nv_value *val)
{
assert(!stk->size || (stk->body[stk->size - 1] != stk->top));
assert(!stk->size || (stk->body[stk->size - 1] != val));
if (!(stk->size % 8)) {
unsigned old_sz = (stk->size + 0) * sizeof(struct nv_value *);
unsigned new_sz = (stk->size + 8) * sizeof(struct nv_value *);
stk->body = (struct nv_value **)REALLOC(stk->body, old_sz, new_sz);
}
stk->body[stk->size++] = stk->top;
stk->body[stk->size++] = val;
}
static INLINE void
bld_vals_push(struct bld_value_stack *stk)
{
bld_vals_push_val(stk, stk->top);
stk->top = NULL;
}
@ -72,7 +78,7 @@ bld_push_values(struct bld_value_stack *stacks, int n)
for (i = 0; i < n; ++i)
for (c = 0; c < 4; ++c)
if (stacks[i * 4 + c].top)
bld_push_value(&stacks[i * 4 + c]);
bld_vals_push(&stacks[i * 4 + c]);
}
#define FETCH_TEMP(i, c) (bld->tvs[i][c].top)
@ -121,6 +127,17 @@ struct bld_context {
uint num_immds;
};
static INLINE void
bld_warn_uninitialized(struct bld_context *bld, int kind,
struct bld_value_stack *stk, struct nv_basic_block *b)
{
long i = (stk - &bld->tvs[0][0]) / 4;
long c = (stk - &bld->tvs[0][0]) & 3;
debug_printf("WARNING: TEMP[%li].%li %s used uninitialized in BB:%i\n",
i, c, kind ? "may be" : "is", b->id);
}
static INLINE struct nv_value *
bld_def(struct nv_instruction *i, int c, struct nv_value *value)
{
@ -168,42 +185,91 @@ fetch_by_bb(struct bld_value_stack *stack,
fetch_by_bb(stack, vals, n, b->in[i]);
}
static INLINE struct nv_value *
bld_load_imm_u32(struct bld_context *bld, uint32_t u);
static struct nv_value *
bld_fetch_global(struct bld_context *bld, struct bld_value_stack *stack)
bld_phi(struct bld_context *bld, struct nv_basic_block *b,
struct bld_value_stack *stack)
{
struct nv_value *vals[16], *phi = NULL;
int j, i = 0, n = 0;
struct nv_basic_block *in;
struct nv_value *vals[16], *val;
struct nv_instruction *phi;
int i, j, n;
fetch_by_bb(stack, vals, &n, bld->pc->current_block);
do {
i = n = 0;
fetch_by_bb(stack, vals, &n, b);
if (!n) {
bld_warn_uninitialized(bld, 0, stack, b);
return NULL;
}
if (n == 1) {
if (nvbb_dominated_by(b, vals[0]->insn->bb))
break;
bld_warn_uninitialized(bld, 1, stack, b);
/* back-tracking to insert missing value of other path */
in = b;
while (in->in[0]) {
if (in->num_in == 1) {
in = in->in[0];
} else {
if (!nvbb_reachable_by(in->in[0], vals[0]->insn->bb, b)) {
in = in->in[0];
break;
}
if (!nvbb_reachable_by(in->in[1], vals[0]->insn->bb, b)) {
in = in->in[1];
break;
}
in = in->in[0];
}
}
bld->pc->current_block = in;
/* should make this a no-op */
bld_vals_push_val(stack, bld_load_imm_u32(bld, 0));
continue;
}
for (i = 0; i < n; ++i) {
if (nvbb_dominated_by(b, vals[i]->insn->bb))
continue;
for (j = 0; j < b->num_in; ++j)
if (nvbb_dominated_by(b->in[j], vals[i]->insn->bb))
break;
if (j == b->num_in) {
in = nvbb_dom_frontier(vals[i]->insn->bb);
val = bld_phi(bld, in, stack);
bld_vals_push_val(stack, val);
break;
}
}
} while(i < n);
bld->pc->current_block = b;
if (n == 0)
return NULL;
if (n == 1)
return vals[0];
debug_printf("phi required: %i candidates\n", n);
phi = new_instruction(bld->pc, NV_OP_PHI);
while (i < n) {
struct nv_instruction *insn = new_instruction(bld->pc, NV_OP_PHI);
bld_def(phi, 0, new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.type));
for (i = 0; i < n; ++i)
phi->src[i] = new_ref(bld->pc, vals[i]);
j = phi ? 1 : 0;
if (phi)
insn->src[0] = new_ref(bld->pc, phi);
return phi->def[0];
}
phi = new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.type);
bld_def(insn, 0, phi);
for (; j < 4; ++j) {
insn->src[j] = new_ref(bld->pc, vals[i++]);
if (i == n)
break;
}
debug_printf("new phi: %i, %i in\n", phi->n, j);
}
/* insert_at_head(list, phi) is done at end of block */
return phi;
static INLINE struct nv_value *
bld_fetch_global(struct bld_context *bld, struct bld_value_stack *stack)
{
return bld_phi(bld, bld->pc->current_block, stack);
}
static INLINE struct nv_value *
@ -640,6 +706,9 @@ bld_new_block(struct bld_context *bld, struct nv_basic_block *b)
for (i = 0; i < 4; ++i)
bld->saved_addr[i][0] = NULL;
for (i = 0; i < 128; ++i)
bld->saved_inputs[i] = NULL;
}
static struct nv_value *