ir3: Rewrite postsched dependency handling

Split up the dependencies into multiple files, similar to RA, and
calculate the file + index. This lets us remove the previous hack we had
and lets us handle half shared registers correctly.

The actual calculation of the file is moved into a shared
ir3_reg_file_offset() function so that it can be reused in other places
which have to check for overlapping registers.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22075>
This commit is contained in:
Connor Abbott 2021-10-07 12:43:39 +02:00 committed by Marge Bot
parent dbeeec2570
commit 750e6843c0
2 changed files with 101 additions and 43 deletions

View file

@ -1321,17 +1321,21 @@ dest_regs(struct ir3_instruction *instr)
return util_last_bit(instr->dsts[0]->wrmask); return util_last_bit(instr->dsts[0]->wrmask);
} }
static inline bool
is_reg_gpr(const struct ir3_register *reg)
{
if ((reg_num(reg) == REG_A0) || (reg->flags & IR3_REG_PREDICATE))
return false;
return true;
}
/* is dst a normal temp register: */ /* is dst a normal temp register: */
static inline bool static inline bool
is_dest_gpr(struct ir3_register *dst) is_dest_gpr(const struct ir3_register *dst)
{ {
if (dst->wrmask == 0) if (dst->wrmask == 0)
return false; return false;
if (reg_num(dst) == REG_A0) return is_reg_gpr(dst);
return false;
if (dst->flags & IR3_REG_PREDICATE)
return false;
return true;
} }
static inline bool static inline bool
@ -1383,8 +1387,50 @@ writes_pred(struct ir3_instruction *instr)
static inline bool static inline bool
is_reg_special(const struct ir3_register *reg) is_reg_special(const struct ir3_register *reg)
{ {
return (reg->flags & (IR3_REG_SHARED | IR3_REG_PREDICATE) || return (reg->flags & IR3_REG_SHARED) || !is_reg_gpr(reg);
(reg_num(reg) == REG_A0)); }
/* r0.x - r47.w are "normal" registers. r48.x - r55.w are shared registers.
* Everything above those are non-GPR registers like a0.x and p0.x that aren't
* assigned by RA.
*/
#define GPR_REG_SIZE (4 * 48)
#define SHARED_REG_START GPR_REG_SIZE
#define SHARED_REG_SIZE (4 * 8)
#define NONGPR_REG_START (SHARED_REG_START + SHARED_REG_SIZE)
#define NONGPR_REG_SIZE (4 * 8)
enum ir3_reg_file {
IR3_FILE_FULL,
IR3_FILE_HALF,
IR3_FILE_SHARED,
IR3_FILE_NONGPR,
};
/* Return a file + offset that can be used for determining if two registers
* alias. The register is only really used for its flags, the num is taken from
* the parameter. Registers overlap if they are in the same file and have an
* overlapping offset. The offset is multiplied by 2 for full registers to
* handle aliasing half and full registers, that is it's in units of half-regs.
*/
static inline unsigned
ir3_reg_file_offset(const struct ir3_register *reg, unsigned num,
bool mergedregs, enum ir3_reg_file *file)
{
unsigned size = reg_elem_size(reg);
if (!is_reg_gpr(reg)) {
*file = IR3_FILE_NONGPR;
return (num - NONGPR_REG_START) * size;
} else if (reg->flags & IR3_REG_SHARED) {
*file = IR3_FILE_SHARED;
return (num - SHARED_REG_START) * size;
} else if (mergedregs || !(reg->flags & IR3_REG_HALF)) {
*file = IR3_FILE_FULL;
return num * size;
} else {
*file = IR3_FILE_HALF;
return num;
}
} }
/* Same as above but in cases where we don't have a register. r48.x and above /* Same as above but in cases where we don't have a register. r48.x and above

View file

@ -362,24 +362,22 @@ struct ir3_postsched_deps_state {
* Note, this table is twice as big as the # of regs, to deal with * Note, this table is twice as big as the # of regs, to deal with
* half-precision regs. The approach differs depending on whether * half-precision regs. The approach differs depending on whether
* the half and full precision register files are "merged" (conflict, * the half and full precision register files are "merged" (conflict,
* ie. a6xx+) in which case we consider each full precision dep * ie. a6xx+) in which case we use "regs" for both full precision and half
* precision dependencies and consider each full precision dep
* as two half-precision dependencies, vs older separate (non- * as two half-precision dependencies, vs older separate (non-
* conflicting) in which case the first half of the table is used * conflicting) in which case the separate "half_regs" table is used for
* for full precision and 2nd half for half-precision. * half-precision deps. See ir3_reg_file_offset().
*/ */
struct ir3_postsched_node *regs[2 * 256]; struct ir3_postsched_node *regs[2 * GPR_REG_SIZE];
unsigned dst_n[2 * 256]; unsigned dst_n[2 * GPR_REG_SIZE];
struct ir3_postsched_node *half_regs[GPR_REG_SIZE];
unsigned half_dst_n[GPR_REG_SIZE];
struct ir3_postsched_node *shared_regs[2 * SHARED_REG_SIZE];
unsigned shared_dst_n[2 * SHARED_REG_SIZE];
struct ir3_postsched_node *nongpr_regs[2 * NONGPR_REG_SIZE];
unsigned nongpr_dst_n[2 * NONGPR_REG_SIZE];
}; };
/* bounds checking read/write accessors, since OoB access to stuff on
* the stack is gonna cause a bad day.
*/
#define dep_reg(state, idx) \
*({ \
assert((idx) < ARRAY_SIZE((state)->regs)); \
&(state)->regs[(idx)]; \
})
static void static void
add_dep(struct ir3_postsched_deps_state *state, add_dep(struct ir3_postsched_deps_state *state,
struct ir3_postsched_node *before, struct ir3_postsched_node *after, struct ir3_postsched_node *before, struct ir3_postsched_node *after,
@ -399,10 +397,12 @@ add_dep(struct ir3_postsched_deps_state *state,
static void static void
add_single_reg_dep(struct ir3_postsched_deps_state *state, add_single_reg_dep(struct ir3_postsched_deps_state *state,
struct ir3_postsched_node *node, unsigned num, int src_n, struct ir3_postsched_node *node,
struct ir3_postsched_node **dep_ptr,
unsigned *dst_n_ptr, unsigned num, int src_n,
int dst_n) int dst_n)
{ {
struct ir3_postsched_node *dep = dep_reg(state, num); struct ir3_postsched_node *dep = *dep_ptr;
unsigned d = 0; unsigned d = 0;
if (src_n >= 0 && dep && state->direction == F) { if (src_n >= 0 && dep && state->direction == F) {
@ -419,8 +419,8 @@ add_single_reg_dep(struct ir3_postsched_deps_state *state,
add_dep(state, dep, node, d); add_dep(state, dep, node, d);
if (src_n < 0) { if (src_n < 0) {
dep_reg(state, num) = node; *dep_ptr = node;
state->dst_n[num] = dst_n; *dst_n_ptr = dst_n;
} }
} }
@ -438,24 +438,36 @@ add_reg_dep(struct ir3_postsched_deps_state *state,
struct ir3_postsched_node *node, const struct ir3_register *reg, struct ir3_postsched_node *node, const struct ir3_register *reg,
unsigned num, int src_n, int dst_n) unsigned num, int src_n, int dst_n)
{ {
if (state->merged) { struct ir3_postsched_node **regs;
/* Make sure that special registers like a0.x that are written as unsigned *dst_n_ptr;
* half-registers don't alias random full registers by pretending that enum ir3_reg_file file;
* they're full registers: unsigned size = reg_elem_size(reg);
*/ unsigned offset = ir3_reg_file_offset(reg, num, state->merged, &file);
if ((reg->flags & IR3_REG_HALF) && !is_reg_special(reg)) { switch (file) {
/* single conflict in half-reg space: */ case IR3_FILE_FULL:
add_single_reg_dep(state, node, num, src_n, dst_n); assert(offset + size <= ARRAY_SIZE(state->regs));
} else { regs = state->regs;
/* two conflicts in half-reg space: */ dst_n_ptr = state->dst_n;
add_single_reg_dep(state, node, 2 * num + 0, src_n, dst_n); break;
add_single_reg_dep(state, node, 2 * num + 1, src_n, dst_n); case IR3_FILE_HALF:
} assert(offset + 1 <= ARRAY_SIZE(state->half_regs));
} else { regs = state->half_regs;
if (reg->flags & IR3_REG_HALF) dst_n_ptr = state->half_dst_n;
num += ARRAY_SIZE(state->regs) / 2; break;
add_single_reg_dep(state, node, num, src_n, dst_n); case IR3_FILE_SHARED:
assert(offset + size <= ARRAY_SIZE(state->shared_regs));
regs = state->shared_regs;
dst_n_ptr = state->shared_dst_n;
break;
case IR3_FILE_NONGPR:
assert(offset + size <= ARRAY_SIZE(state->nongpr_regs));
regs = state->nongpr_regs;
dst_n_ptr = state->nongpr_dst_n;
break;
} }
for (unsigned i = 0; i < size; i++)
add_single_reg_dep(state, node, &regs[offset + i], &dst_n_ptr[offset + i], num, src_n, dst_n);
} }
static void static void