From 40a1c4ba2d6ff13fae396b0baa569efaa426c041 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 23 Mar 2021 17:59:04 +0100 Subject: [PATCH] ir3/postsched: Fix ir3_postsched_node::delay calculation This wasn't using the same calculation that add_reg_dep() was using to get the index into state->regs, so it was using the wrong register. Fix this by folding it into add_reg_dep(). This shouldn't fix anything, because it's just used for scheduler priorities, but it should reduce nop's and syncs. Part-of: --- src/freedreno/ir3/ir3_postsched.c | 48 +++++++++++++++++-------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/src/freedreno/ir3/ir3_postsched.c b/src/freedreno/ir3/ir3_postsched.c index 012b3d68660..fe68cfe1bf5 100644 --- a/src/freedreno/ir3/ir3_postsched.c +++ b/src/freedreno/ir3/ir3_postsched.c @@ -362,10 +362,17 @@ add_dep(struct ir3_postsched_deps_state *state, static void add_single_reg_dep(struct ir3_postsched_deps_state *state, - struct ir3_postsched_node *node, unsigned num, bool write) + struct ir3_postsched_node *node, unsigned num, int src_n) { - add_dep(state, dep_reg(state, num), node); - if (write) { + struct ir3_postsched_node *dep = dep_reg(state, num); + + if (src_n >= 0 && dep && state->direction == F) { + unsigned d = ir3_delayslots(dep->instr, node->instr, src_n, true); + node->delay = MAX2(node->delay, d); + } + + add_dep(state, dep, node); + if (src_n < 0) { dep_reg(state, num) = node; } } @@ -373,11 +380,17 @@ add_single_reg_dep(struct ir3_postsched_deps_state *state, /* This is where we handled full vs half-precision, and potential conflicts * between half and full precision that result in additional dependencies. * The 'reg' arg is really just to know half vs full precision. + * + * If non-negative, then this adds a dependency on a source register, and + * src_n is the index passed into ir3_delayslots() for calculating the delay: + * 0 means this is for an address source, non-0 corresponds to + * node->instr->regs[src_n]. If negative, then this is for a destination + * register. */ static void add_reg_dep(struct ir3_postsched_deps_state *state, struct ir3_postsched_node *node, const struct ir3_register *reg, - unsigned num, bool write) + unsigned num, int src_n) { if (state->merged) { /* Make sure that special registers like a0.x that are written as @@ -386,16 +399,16 @@ add_reg_dep(struct ir3_postsched_deps_state *state, */ if ((reg->flags & IR3_REG_HALF) && num < regid(48, 0)) { /* single conflict in half-reg space: */ - add_single_reg_dep(state, node, num, write); + add_single_reg_dep(state, node, num, src_n); } else { /* two conflicts in half-reg space: */ - add_single_reg_dep(state, node, 2 * num + 0, write); - add_single_reg_dep(state, node, 2 * num + 1, write); + add_single_reg_dep(state, node, 2 * num + 0, src_n); + add_single_reg_dep(state, node, 2 * num + 1, src_n); } } else { if (reg->flags & IR3_REG_HALF) num += ARRAY_SIZE(state->regs) / 2; - add_single_reg_dep(state, node, num, write); + add_single_reg_dep(state, node, num, src_n); } } @@ -413,27 +426,20 @@ calculate_deps(struct ir3_postsched_deps_state *state, if (reg->flags & IR3_REG_RELATIV) { /* mark entire array as read: */ struct ir3_array *arr = ir3_lookup_array(state->ctx->ir, reg->array.id); - for (unsigned i = 0; i < arr->length; i++) { - add_reg_dep(state, node, reg, arr->reg + i, false); + for (unsigned j = 0; j < arr->length; j++) { + add_reg_dep(state, node, reg, arr->reg + j, i + 1); } } else { assert(reg->wrmask >= 1); u_foreach_bit (b, reg->wrmask) { - add_reg_dep(state, node, reg, reg->num + b, false); - - struct ir3_postsched_node *dep = dep_reg(state, reg->num + b); - if (dep && (state->direction == F)) { - unsigned d = ir3_delayslots(dep->instr, node->instr, i + 1, true); - node->delay = MAX2(node->delay, d); - } + add_reg_dep(state, node, reg, reg->num + b, i + 1); } } } if (node->instr->address) { add_reg_dep(state, node, node->instr->address->regs[0], - node->instr->address->regs[0]->num, - false); + node->instr->address->regs[0]->num, 0); } if (dest_regs(node->instr) == 0) @@ -447,12 +453,12 @@ calculate_deps(struct ir3_postsched_deps_state *state, /* mark the entire array as written: */ struct ir3_array *arr = ir3_lookup_array(state->ctx->ir, reg->array.id); for (unsigned i = 0; i < arr->length; i++) { - add_reg_dep(state, node, reg, arr->reg + i, true); + add_reg_dep(state, node, reg, arr->reg + i, -1); } } else { assert(reg->wrmask >= 1); u_foreach_bit (b, reg->wrmask) { - add_reg_dep(state, node, reg, reg->num + b, true); + add_reg_dep(state, node, reg, reg->num + b, -1); } } }