mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 15:20:10 +01:00
ir3/sched: Convert to srcs/dsts arrays
Also change the indexing in ir3_delayslots, so it's finally sane! To do this we also have to change foreach_ssa_src_n to index srcs instead of regs, so that the indexing stays in sync. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11469>
This commit is contained in:
parent
132dfacdcb
commit
50994eeabf
4 changed files with 34 additions and 40 deletions
|
|
@ -1356,12 +1356,12 @@ ir3_try_swap_signedness(opc_t opc, bool *can_swap)
|
|||
|
||||
static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
|
||||
{
|
||||
return instr->regs_count + instr->deps_count;
|
||||
return instr->srcs_count + instr->deps_count;
|
||||
}
|
||||
|
||||
static inline bool __is_false_dep(struct ir3_instruction *instr, unsigned n)
|
||||
{
|
||||
if (n >= instr->regs_count)
|
||||
if (n >= instr->srcs_count)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
|
@ -1370,9 +1370,9 @@ static inline struct ir3_instruction **
|
|||
__ssa_srcp_n(struct ir3_instruction *instr, unsigned n)
|
||||
{
|
||||
if (__is_false_dep(instr, n))
|
||||
return &instr->deps[n - instr->regs_count];
|
||||
if (ssa(instr->regs[n]))
|
||||
return &instr->regs[n]->def->instr;
|
||||
return &instr->deps[n - instr->srcs_count];
|
||||
if (ssa(instr->srcs[n]))
|
||||
return &instr->srcs[n]->def->instr;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -95,19 +95,16 @@ ir3_delayslots(struct ir3_instruction *assigner,
|
|||
is_mem(consumer)) {
|
||||
return 6;
|
||||
} else {
|
||||
/* assigner and consumer are both alu */
|
||||
assert(n > 0);
|
||||
|
||||
/* In mergedregs mode, there is an extra 2-cycle penalty when half of
|
||||
* a full-reg is read as a half-reg or when a half-reg is read as a
|
||||
* full-reg.
|
||||
*/
|
||||
bool mismatched_half =
|
||||
(assigner->regs[0]->flags & IR3_REG_HALF) !=
|
||||
(consumer->regs[n - 1]->flags & IR3_REG_HALF);
|
||||
(assigner->dsts[0]->flags & IR3_REG_HALF) !=
|
||||
(consumer->srcs[n]->flags & IR3_REG_HALF);
|
||||
unsigned penalty = mismatched_half ? 2 : 0;
|
||||
if ((is_mad(consumer->opc) || is_madsh(consumer->opc)) &&
|
||||
(n == 3)) {
|
||||
(n == 2)) {
|
||||
/* special case, 3rd src to cat3 not required on first cycle */
|
||||
return 1 + penalty;
|
||||
} else {
|
||||
|
|
@ -188,7 +185,7 @@ ir3_delay_calc_prera(struct ir3_block *block, struct ir3_instruction *instr)
|
|||
unsigned d = 0;
|
||||
|
||||
if (src->def && src->def->instr->block == block) {
|
||||
d = delay_calc_srcn_prera(block, src->def->instr, instr, i+1);
|
||||
d = delay_calc_srcn_prera(block, src->def->instr, instr, i);
|
||||
}
|
||||
|
||||
delay = MAX2(delay, d);
|
||||
|
|
@ -221,8 +218,8 @@ static unsigned
|
|||
delay_calc_srcn_postra(struct ir3_instruction *assigner, struct ir3_instruction *consumer,
|
||||
unsigned n, bool soft, bool mergedregs)
|
||||
{
|
||||
struct ir3_register *src = consumer->regs[n];
|
||||
struct ir3_register *dst = assigner->regs[0];
|
||||
struct ir3_register *src = consumer->srcs[n];
|
||||
struct ir3_register *dst = assigner->dsts[0];
|
||||
bool mismatched_half =
|
||||
(src->flags & IR3_REG_HALF) != (dst->flags & IR3_REG_HALF);
|
||||
|
||||
|
|
@ -322,7 +319,7 @@ delay_calc_postra(struct ir3_block *block,
|
|||
if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST))
|
||||
continue;
|
||||
|
||||
unsigned src_delay = delay_calc_srcn_postra(assigner, consumer, n+1, soft, mergedregs);
|
||||
unsigned src_delay = delay_calc_srcn_postra(assigner, consumer, n, soft, mergedregs);
|
||||
new_delay = MAX2(new_delay, src_delay);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -450,12 +450,12 @@ calculate_deps(struct ir3_postsched_deps_state *state,
|
|||
if (reg->flags & IR3_REG_RELATIV) {
|
||||
/* mark entire array as read: */
|
||||
for (unsigned j = 0; j < reg->size; j++) {
|
||||
add_reg_dep(state, node, reg, reg->array.base + j, i + 1);
|
||||
add_reg_dep(state, node, reg, reg->array.base + j, i);
|
||||
}
|
||||
} else {
|
||||
assert(reg->wrmask >= 1);
|
||||
u_foreach_bit (b, reg->wrmask) {
|
||||
add_reg_dep(state, node, reg, reg->num + b, i + 1);
|
||||
add_reg_dep(state, node, reg, reg->num + b, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -466,7 +466,7 @@ calculate_deps(struct ir3_postsched_deps_state *state,
|
|||
/* And then after we update the state for what this instruction
|
||||
* wrote:
|
||||
*/
|
||||
struct ir3_register *reg = node->instr->regs[0];
|
||||
struct ir3_register *reg = node->instr->dsts[0];
|
||||
if (reg->flags & IR3_REG_RELATIV) {
|
||||
/* mark the entire array as written: */
|
||||
for (unsigned i = 0; i < reg->size; i++) {
|
||||
|
|
@ -694,16 +694,16 @@ is_self_mov(struct ir3_instruction *instr)
|
|||
if (!is_same_type_mov(instr))
|
||||
return false;
|
||||
|
||||
if (instr->regs[0]->num != instr->regs[1]->num)
|
||||
if (instr->dsts[0]->num != instr->srcs[0]->num)
|
||||
return false;
|
||||
|
||||
if (instr->regs[0]->flags & IR3_REG_RELATIV)
|
||||
if (instr->dsts[0]->flags & IR3_REG_RELATIV)
|
||||
return false;
|
||||
|
||||
if (instr->cat1.round != ROUND_ZERO)
|
||||
return false;
|
||||
|
||||
if (instr->regs[1]->flags & (IR3_REG_CONST | IR3_REG_IMMED |
|
||||
if (instr->srcs[0]->flags & (IR3_REG_CONST | IR3_REG_IMMED |
|
||||
IR3_REG_RELATIV | IR3_REG_FNEG | IR3_REG_FABS |
|
||||
IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT))
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -381,7 +381,7 @@ check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
|
|||
struct ir3_instruction *indirect = ir->a0_users[i];
|
||||
if (!indirect)
|
||||
continue;
|
||||
if (indirect->address->def != instr->regs[0])
|
||||
if (indirect->address->def != instr->dsts[0])
|
||||
continue;
|
||||
ready = could_sched(indirect, instr);
|
||||
}
|
||||
|
|
@ -398,7 +398,7 @@ check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
|
|||
struct ir3_instruction *indirect = ir->a1_users[i];
|
||||
if (!indirect)
|
||||
continue;
|
||||
if (indirect->address->def != instr->regs[0])
|
||||
if (indirect->address->def != instr->dsts[0])
|
||||
continue;
|
||||
ready = could_sched(indirect, instr);
|
||||
}
|
||||
|
|
@ -436,7 +436,7 @@ check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
|
|||
*
|
||||
* We could do this by adding each bary.f instruction as
|
||||
* virtual ssa src for the kill instruction. But we have
|
||||
* fixed length instr->regs[].
|
||||
* fixed length instr->srcs[].
|
||||
*
|
||||
* TODO we could handle this by false-deps now, probably.
|
||||
*/
|
||||
|
|
@ -508,7 +508,7 @@ live_effect(struct ir3_instruction *instr)
|
|||
* then count all it's other components too:
|
||||
*/
|
||||
if (n->collect)
|
||||
new_live *= n->collect->regs_count - 1;
|
||||
new_live *= n->collect->srcs_count;
|
||||
|
||||
foreach_ssa_src_n (src, n, instr) {
|
||||
if (__is_false_dep(instr, n))
|
||||
|
|
@ -870,13 +870,13 @@ split_addr(struct ir3_sched_ctx *ctx, struct ir3_instruction **addr,
|
|||
/* remap remaining instructions using current addr
|
||||
* to new addr:
|
||||
*/
|
||||
if (indirect->address->def == (*addr)->regs[0]) {
|
||||
if (indirect->address->def == (*addr)->dsts[0]) {
|
||||
if (!new_addr) {
|
||||
new_addr = split_instr(ctx, *addr);
|
||||
/* original addr is scheduled, but new one isn't: */
|
||||
new_addr->flags &= ~IR3_INSTR_MARK;
|
||||
}
|
||||
indirect->address->def = new_addr->regs[0];
|
||||
indirect->address->def = new_addr->dsts[0];
|
||||
/* don't need to remove old dag edge since old addr is
|
||||
* already scheduled:
|
||||
*/
|
||||
|
|
@ -919,13 +919,13 @@ split_pred(struct ir3_sched_ctx *ctx)
|
|||
* TODO is there ever a case when pred isn't first
|
||||
* (and only) src?
|
||||
*/
|
||||
if (ssa(predicated->regs[1]) == ctx->pred) {
|
||||
if (ssa(predicated->srcs[0]) == ctx->pred) {
|
||||
if (!new_pred) {
|
||||
new_pred = split_instr(ctx, ctx->pred);
|
||||
/* original pred is scheduled, but new one isn't: */
|
||||
new_pred->flags &= ~IR3_INSTR_MARK;
|
||||
}
|
||||
predicated->regs[1]->instr = new_pred;
|
||||
predicated->srcs[0]->instr = new_pred;
|
||||
/* don't need to remove old dag edge since old pred is
|
||||
* already scheduled:
|
||||
*/
|
||||
|
|
@ -977,13 +977,7 @@ sched_node_add_dep(struct ir3_instruction *instr, struct ir3_instruction *src, i
|
|||
dag_add_edge(&sn->dag, &n->dag, NULL);
|
||||
|
||||
|
||||
/* There's a mismatch between the indices foreach_ssa_src_n uses and the
|
||||
* indices that ir3_delayslots expects, and additionally we don't want to
|
||||
* call it and get bogus answers on false dependencies.
|
||||
*/
|
||||
unsigned d = 0;
|
||||
if (i < instr->regs_count)
|
||||
d = ir3_delayslots(src, instr, i + 1, true);
|
||||
unsigned d = ir3_delayslots(src, instr, i, true);
|
||||
|
||||
n->delay = MAX2(n->delay, d);
|
||||
}
|
||||
|
|
@ -1028,7 +1022,7 @@ is_output_only(struct ir3_instruction *instr)
|
|||
if (!writes_gpr(instr))
|
||||
return false;
|
||||
|
||||
if (!(instr->regs[0]->flags & IR3_REG_SSA))
|
||||
if (!(instr->dsts[0]->flags & IR3_REG_SSA))
|
||||
return false;
|
||||
|
||||
foreach_ssa_use (use, instr)
|
||||
|
|
@ -1240,9 +1234,12 @@ get_array_id(struct ir3_instruction *instr)
|
|||
* src or dst, ir3_cp should enforce this.
|
||||
*/
|
||||
|
||||
for (unsigned i = 0; i < instr->regs_count; i++)
|
||||
if (instr->regs[i]->flags & IR3_REG_ARRAY)
|
||||
return instr->regs[i]->array.id;
|
||||
for (unsigned i = 0; i < instr->dsts_count; i++)
|
||||
if (instr->dsts[i]->flags & IR3_REG_ARRAY)
|
||||
return instr->dsts[i]->array.id;
|
||||
for (unsigned i = 0; i < instr->srcs_count; i++)
|
||||
if (instr->srcs[i]->flags & IR3_REG_ARRAY)
|
||||
return instr->srcs[i]->array.id;
|
||||
|
||||
unreachable("this was unexpected");
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue