ir3/sched: Convert to srcs/dsts arrays

Also change the indexing in ir3_delayslots, so it's finally sane! To do
this we also have to change foreach_ssa_src_n to index srcs instead of
regs, so that the indexing stays in sync.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11469>
This commit is contained in:
Connor Abbott 2021-06-18 16:13:34 +02:00 committed by Marge Bot
parent 132dfacdcb
commit 50994eeabf
4 changed files with 34 additions and 40 deletions

View file

@ -1356,12 +1356,12 @@ ir3_try_swap_signedness(opc_t opc, bool *can_swap)
static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
{
return instr->regs_count + instr->deps_count;
return instr->srcs_count + instr->deps_count;
}
static inline bool __is_false_dep(struct ir3_instruction *instr, unsigned n)
{
if (n >= instr->regs_count)
if (n >= instr->srcs_count)
return true;
return false;
}
@ -1370,9 +1370,9 @@ static inline struct ir3_instruction **
__ssa_srcp_n(struct ir3_instruction *instr, unsigned n)
{
if (__is_false_dep(instr, n))
return &instr->deps[n - instr->regs_count];
if (ssa(instr->regs[n]))
return &instr->regs[n]->def->instr;
return &instr->deps[n - instr->srcs_count];
if (ssa(instr->srcs[n]))
return &instr->srcs[n]->def->instr;
return NULL;
}

View file

@ -95,19 +95,16 @@ ir3_delayslots(struct ir3_instruction *assigner,
is_mem(consumer)) {
return 6;
} else {
/* assigner and consumer are both alu */
assert(n > 0);
/* In mergedregs mode, there is an extra 2-cycle penalty when half of
* a full-reg is read as a half-reg or when a half-reg is read as a
* full-reg.
*/
bool mismatched_half =
(assigner->regs[0]->flags & IR3_REG_HALF) !=
(consumer->regs[n - 1]->flags & IR3_REG_HALF);
(assigner->dsts[0]->flags & IR3_REG_HALF) !=
(consumer->srcs[n]->flags & IR3_REG_HALF);
unsigned penalty = mismatched_half ? 2 : 0;
if ((is_mad(consumer->opc) || is_madsh(consumer->opc)) &&
(n == 3)) {
(n == 2)) {
/* special case, 3rd src to cat3 not required on first cycle */
return 1 + penalty;
} else {
@ -188,7 +185,7 @@ ir3_delay_calc_prera(struct ir3_block *block, struct ir3_instruction *instr)
unsigned d = 0;
if (src->def && src->def->instr->block == block) {
d = delay_calc_srcn_prera(block, src->def->instr, instr, i+1);
d = delay_calc_srcn_prera(block, src->def->instr, instr, i);
}
delay = MAX2(delay, d);
@ -221,8 +218,8 @@ static unsigned
delay_calc_srcn_postra(struct ir3_instruction *assigner, struct ir3_instruction *consumer,
unsigned n, bool soft, bool mergedregs)
{
struct ir3_register *src = consumer->regs[n];
struct ir3_register *dst = assigner->regs[0];
struct ir3_register *src = consumer->srcs[n];
struct ir3_register *dst = assigner->dsts[0];
bool mismatched_half =
(src->flags & IR3_REG_HALF) != (dst->flags & IR3_REG_HALF);
@ -322,7 +319,7 @@ delay_calc_postra(struct ir3_block *block,
if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST))
continue;
unsigned src_delay = delay_calc_srcn_postra(assigner, consumer, n+1, soft, mergedregs);
unsigned src_delay = delay_calc_srcn_postra(assigner, consumer, n, soft, mergedregs);
new_delay = MAX2(new_delay, src_delay);
}
}

View file

@ -450,12 +450,12 @@ calculate_deps(struct ir3_postsched_deps_state *state,
if (reg->flags & IR3_REG_RELATIV) {
/* mark entire array as read: */
for (unsigned j = 0; j < reg->size; j++) {
add_reg_dep(state, node, reg, reg->array.base + j, i + 1);
add_reg_dep(state, node, reg, reg->array.base + j, i);
}
} else {
assert(reg->wrmask >= 1);
u_foreach_bit (b, reg->wrmask) {
add_reg_dep(state, node, reg, reg->num + b, i + 1);
add_reg_dep(state, node, reg, reg->num + b, i);
}
}
}
@ -466,7 +466,7 @@ calculate_deps(struct ir3_postsched_deps_state *state,
/* And then after we update the state for what this instruction
* wrote:
*/
struct ir3_register *reg = node->instr->regs[0];
struct ir3_register *reg = node->instr->dsts[0];
if (reg->flags & IR3_REG_RELATIV) {
/* mark the entire array as written: */
for (unsigned i = 0; i < reg->size; i++) {
@ -694,16 +694,16 @@ is_self_mov(struct ir3_instruction *instr)
if (!is_same_type_mov(instr))
return false;
if (instr->regs[0]->num != instr->regs[1]->num)
if (instr->dsts[0]->num != instr->srcs[0]->num)
return false;
if (instr->regs[0]->flags & IR3_REG_RELATIV)
if (instr->dsts[0]->flags & IR3_REG_RELATIV)
return false;
if (instr->cat1.round != ROUND_ZERO)
return false;
if (instr->regs[1]->flags & (IR3_REG_CONST | IR3_REG_IMMED |
if (instr->srcs[0]->flags & (IR3_REG_CONST | IR3_REG_IMMED |
IR3_REG_RELATIV | IR3_REG_FNEG | IR3_REG_FABS |
IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT))
return false;

View file

@ -381,7 +381,7 @@ check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
struct ir3_instruction *indirect = ir->a0_users[i];
if (!indirect)
continue;
if (indirect->address->def != instr->regs[0])
if (indirect->address->def != instr->dsts[0])
continue;
ready = could_sched(indirect, instr);
}
@ -398,7 +398,7 @@ check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
struct ir3_instruction *indirect = ir->a1_users[i];
if (!indirect)
continue;
if (indirect->address->def != instr->regs[0])
if (indirect->address->def != instr->dsts[0])
continue;
ready = could_sched(indirect, instr);
}
@ -436,7 +436,7 @@ check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
*
* We could do this by adding each bary.f instruction as
* virtual ssa src for the kill instruction. But we have
* fixed length instr->regs[].
* fixed length instr->srcs[].
*
* TODO we could handle this by false-deps now, probably.
*/
@ -508,7 +508,7 @@ live_effect(struct ir3_instruction *instr)
* then count all it's other components too:
*/
if (n->collect)
new_live *= n->collect->regs_count - 1;
new_live *= n->collect->srcs_count;
foreach_ssa_src_n (src, n, instr) {
if (__is_false_dep(instr, n))
@ -870,13 +870,13 @@ split_addr(struct ir3_sched_ctx *ctx, struct ir3_instruction **addr,
/* remap remaining instructions using current addr
* to new addr:
*/
if (indirect->address->def == (*addr)->regs[0]) {
if (indirect->address->def == (*addr)->dsts[0]) {
if (!new_addr) {
new_addr = split_instr(ctx, *addr);
/* original addr is scheduled, but new one isn't: */
new_addr->flags &= ~IR3_INSTR_MARK;
}
indirect->address->def = new_addr->regs[0];
indirect->address->def = new_addr->dsts[0];
/* don't need to remove old dag edge since old addr is
* already scheduled:
*/
@ -919,13 +919,13 @@ split_pred(struct ir3_sched_ctx *ctx)
* TODO is there ever a case when pred isn't first
* (and only) src?
*/
if (ssa(predicated->regs[1]) == ctx->pred) {
if (ssa(predicated->srcs[0]) == ctx->pred) {
if (!new_pred) {
new_pred = split_instr(ctx, ctx->pred);
/* original pred is scheduled, but new one isn't: */
new_pred->flags &= ~IR3_INSTR_MARK;
}
predicated->regs[1]->instr = new_pred;
predicated->srcs[0]->instr = new_pred;
/* don't need to remove old dag edge since old pred is
* already scheduled:
*/
@ -977,13 +977,7 @@ sched_node_add_dep(struct ir3_instruction *instr, struct ir3_instruction *src, i
dag_add_edge(&sn->dag, &n->dag, NULL);
/* There's a mismatch between the indices foreach_ssa_src_n uses and the
* indices that ir3_delayslots expects, and additionally we don't want to
* call it and get bogus answers on false dependencies.
*/
unsigned d = 0;
if (i < instr->regs_count)
d = ir3_delayslots(src, instr, i + 1, true);
unsigned d = ir3_delayslots(src, instr, i, true);
n->delay = MAX2(n->delay, d);
}
@ -1028,7 +1022,7 @@ is_output_only(struct ir3_instruction *instr)
if (!writes_gpr(instr))
return false;
if (!(instr->regs[0]->flags & IR3_REG_SSA))
if (!(instr->dsts[0]->flags & IR3_REG_SSA))
return false;
foreach_ssa_use (use, instr)
@ -1240,9 +1234,12 @@ get_array_id(struct ir3_instruction *instr)
* src or dst, ir3_cp should enforce this.
*/
for (unsigned i = 0; i < instr->regs_count; i++)
if (instr->regs[i]->flags & IR3_REG_ARRAY)
return instr->regs[i]->array.id;
for (unsigned i = 0; i < instr->dsts_count; i++)
if (instr->dsts[i]->flags & IR3_REG_ARRAY)
return instr->dsts[i]->array.id;
for (unsigned i = 0; i < instr->srcs_count; i++)
if (instr->srcs[i]->flags & IR3_REG_ARRAY)
return instr->srcs[i]->array.id;
unreachable("this was unexpected");
}