From 50994eeabff270a1aeccf7c104104d13a2018dfd Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Fri, 18 Jun 2021 16:13:34 +0200 Subject: [PATCH] ir3/sched: Convert to srcs/dsts arrays Also change the indexing in ir3_delayslots, so it's finally sane! To do this we also have to change foreach_ssa_src_n to index srcs instead of regs, so that the indexing stays in sync. Part-of: --- src/freedreno/ir3/ir3.h | 10 ++++----- src/freedreno/ir3/ir3_delay.c | 17 +++++++-------- src/freedreno/ir3/ir3_postsched.c | 12 +++++------ src/freedreno/ir3/ir3_sched.c | 35 ++++++++++++++----------------- 4 files changed, 34 insertions(+), 40 deletions(-) diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index ca58233df89..76718a45a4f 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1356,12 +1356,12 @@ ir3_try_swap_signedness(opc_t opc, bool *can_swap) static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr) { - return instr->regs_count + instr->deps_count; + return instr->srcs_count + instr->deps_count; } static inline bool __is_false_dep(struct ir3_instruction *instr, unsigned n) { - if (n >= instr->regs_count) + if (n >= instr->srcs_count) return true; return false; } @@ -1370,9 +1370,9 @@ static inline struct ir3_instruction ** __ssa_srcp_n(struct ir3_instruction *instr, unsigned n) { if (__is_false_dep(instr, n)) - return &instr->deps[n - instr->regs_count]; - if (ssa(instr->regs[n])) - return &instr->regs[n]->def->instr; + return &instr->deps[n - instr->srcs_count]; + if (ssa(instr->srcs[n])) + return &instr->srcs[n]->def->instr; return NULL; } diff --git a/src/freedreno/ir3/ir3_delay.c b/src/freedreno/ir3/ir3_delay.c index 3dbc0f768eb..b6bc55b80cf 100644 --- a/src/freedreno/ir3/ir3_delay.c +++ b/src/freedreno/ir3/ir3_delay.c @@ -95,19 +95,16 @@ ir3_delayslots(struct ir3_instruction *assigner, is_mem(consumer)) { return 6; } else { - /* assigner and consumer are both alu */ - assert(n > 0); - /* In mergedregs mode, there is an extra 2-cycle penalty when half of * a full-reg is read as a half-reg or when a half-reg is read as a * full-reg. */ bool mismatched_half = - (assigner->regs[0]->flags & IR3_REG_HALF) != - (consumer->regs[n - 1]->flags & IR3_REG_HALF); + (assigner->dsts[0]->flags & IR3_REG_HALF) != + (consumer->srcs[n]->flags & IR3_REG_HALF); unsigned penalty = mismatched_half ? 2 : 0; if ((is_mad(consumer->opc) || is_madsh(consumer->opc)) && - (n == 3)) { + (n == 2)) { /* special case, 3rd src to cat3 not required on first cycle */ return 1 + penalty; } else { @@ -188,7 +185,7 @@ ir3_delay_calc_prera(struct ir3_block *block, struct ir3_instruction *instr) unsigned d = 0; if (src->def && src->def->instr->block == block) { - d = delay_calc_srcn_prera(block, src->def->instr, instr, i+1); + d = delay_calc_srcn_prera(block, src->def->instr, instr, i); } delay = MAX2(delay, d); @@ -221,8 +218,8 @@ static unsigned delay_calc_srcn_postra(struct ir3_instruction *assigner, struct ir3_instruction *consumer, unsigned n, bool soft, bool mergedregs) { - struct ir3_register *src = consumer->regs[n]; - struct ir3_register *dst = assigner->regs[0]; + struct ir3_register *src = consumer->srcs[n]; + struct ir3_register *dst = assigner->dsts[0]; bool mismatched_half = (src->flags & IR3_REG_HALF) != (dst->flags & IR3_REG_HALF); @@ -322,7 +319,7 @@ delay_calc_postra(struct ir3_block *block, if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) continue; - unsigned src_delay = delay_calc_srcn_postra(assigner, consumer, n+1, soft, mergedregs); + unsigned src_delay = delay_calc_srcn_postra(assigner, consumer, n, soft, mergedregs); new_delay = MAX2(new_delay, src_delay); } } diff --git a/src/freedreno/ir3/ir3_postsched.c b/src/freedreno/ir3/ir3_postsched.c index 3fbe97946ca..a764acb7ebb 100644 --- a/src/freedreno/ir3/ir3_postsched.c +++ b/src/freedreno/ir3/ir3_postsched.c @@ -450,12 +450,12 @@ calculate_deps(struct ir3_postsched_deps_state *state, if (reg->flags & IR3_REG_RELATIV) { /* mark entire array as read: */ for (unsigned j = 0; j < reg->size; j++) { - add_reg_dep(state, node, reg, reg->array.base + j, i + 1); + add_reg_dep(state, node, reg, reg->array.base + j, i); } } else { assert(reg->wrmask >= 1); u_foreach_bit (b, reg->wrmask) { - add_reg_dep(state, node, reg, reg->num + b, i + 1); + add_reg_dep(state, node, reg, reg->num + b, i); } } } @@ -466,7 +466,7 @@ calculate_deps(struct ir3_postsched_deps_state *state, /* And then after we update the state for what this instruction * wrote: */ - struct ir3_register *reg = node->instr->regs[0]; + struct ir3_register *reg = node->instr->dsts[0]; if (reg->flags & IR3_REG_RELATIV) { /* mark the entire array as written: */ for (unsigned i = 0; i < reg->size; i++) { @@ -694,16 +694,16 @@ is_self_mov(struct ir3_instruction *instr) if (!is_same_type_mov(instr)) return false; - if (instr->regs[0]->num != instr->regs[1]->num) + if (instr->dsts[0]->num != instr->srcs[0]->num) return false; - if (instr->regs[0]->flags & IR3_REG_RELATIV) + if (instr->dsts[0]->flags & IR3_REG_RELATIV) return false; if (instr->cat1.round != ROUND_ZERO) return false; - if (instr->regs[1]->flags & (IR3_REG_CONST | IR3_REG_IMMED | + if (instr->srcs[0]->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV | IR3_REG_FNEG | IR3_REG_FABS | IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT)) return false; diff --git a/src/freedreno/ir3/ir3_sched.c b/src/freedreno/ir3/ir3_sched.c index 5a44ebe1bf8..7900af15a62 100644 --- a/src/freedreno/ir3/ir3_sched.c +++ b/src/freedreno/ir3/ir3_sched.c @@ -381,7 +381,7 @@ check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, struct ir3_instruction *indirect = ir->a0_users[i]; if (!indirect) continue; - if (indirect->address->def != instr->regs[0]) + if (indirect->address->def != instr->dsts[0]) continue; ready = could_sched(indirect, instr); } @@ -398,7 +398,7 @@ check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, struct ir3_instruction *indirect = ir->a1_users[i]; if (!indirect) continue; - if (indirect->address->def != instr->regs[0]) + if (indirect->address->def != instr->dsts[0]) continue; ready = could_sched(indirect, instr); } @@ -436,7 +436,7 @@ check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, * * We could do this by adding each bary.f instruction as * virtual ssa src for the kill instruction. But we have - * fixed length instr->regs[]. + * fixed length instr->srcs[]. * * TODO we could handle this by false-deps now, probably. */ @@ -508,7 +508,7 @@ live_effect(struct ir3_instruction *instr) * then count all it's other components too: */ if (n->collect) - new_live *= n->collect->regs_count - 1; + new_live *= n->collect->srcs_count; foreach_ssa_src_n (src, n, instr) { if (__is_false_dep(instr, n)) @@ -870,13 +870,13 @@ split_addr(struct ir3_sched_ctx *ctx, struct ir3_instruction **addr, /* remap remaining instructions using current addr * to new addr: */ - if (indirect->address->def == (*addr)->regs[0]) { + if (indirect->address->def == (*addr)->dsts[0]) { if (!new_addr) { new_addr = split_instr(ctx, *addr); /* original addr is scheduled, but new one isn't: */ new_addr->flags &= ~IR3_INSTR_MARK; } - indirect->address->def = new_addr->regs[0]; + indirect->address->def = new_addr->dsts[0]; /* don't need to remove old dag edge since old addr is * already scheduled: */ @@ -919,13 +919,13 @@ split_pred(struct ir3_sched_ctx *ctx) * TODO is there ever a case when pred isn't first * (and only) src? */ - if (ssa(predicated->regs[1]) == ctx->pred) { + if (ssa(predicated->srcs[0]) == ctx->pred) { if (!new_pred) { new_pred = split_instr(ctx, ctx->pred); /* original pred is scheduled, but new one isn't: */ new_pred->flags &= ~IR3_INSTR_MARK; } - predicated->regs[1]->instr = new_pred; + predicated->srcs[0]->instr = new_pred; /* don't need to remove old dag edge since old pred is * already scheduled: */ @@ -977,13 +977,7 @@ sched_node_add_dep(struct ir3_instruction *instr, struct ir3_instruction *src, i dag_add_edge(&sn->dag, &n->dag, NULL); - /* There's a mismatch between the indices foreach_ssa_src_n uses and the - * indices that ir3_delayslots expects, and additionally we don't want to - * call it and get bogus answers on false dependencies. - */ - unsigned d = 0; - if (i < instr->regs_count) - d = ir3_delayslots(src, instr, i + 1, true); + unsigned d = ir3_delayslots(src, instr, i, true); n->delay = MAX2(n->delay, d); } @@ -1028,7 +1022,7 @@ is_output_only(struct ir3_instruction *instr) if (!writes_gpr(instr)) return false; - if (!(instr->regs[0]->flags & IR3_REG_SSA)) + if (!(instr->dsts[0]->flags & IR3_REG_SSA)) return false; foreach_ssa_use (use, instr) @@ -1240,9 +1234,12 @@ get_array_id(struct ir3_instruction *instr) * src or dst, ir3_cp should enforce this. */ - for (unsigned i = 0; i < instr->regs_count; i++) - if (instr->regs[i]->flags & IR3_REG_ARRAY) - return instr->regs[i]->array.id; + for (unsigned i = 0; i < instr->dsts_count; i++) + if (instr->dsts[i]->flags & IR3_REG_ARRAY) + return instr->dsts[i]->array.id; + for (unsigned i = 0; i < instr->srcs_count; i++) + if (instr->srcs[i]->flags & IR3_REG_ARRAY) + return instr->srcs[i]->array.id; unreachable("this was unexpected"); }