ir3: Rewrite postsched dependency handling

Split up the dependencies into multiple files, similar to RA, and
calculate the file + index. This lets us remove the previous hack we had
and lets us handle half shared registers correctly.

The actual calculation of the file is moved into a shared
ir3_reg_file_offset() function so that it can be reused in other places
which have to check for overlapping registers.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22075>
This commit is contained in:
Connor Abbott 2021-10-07 12:43:39 +02:00 committed by Marge Bot
parent dbeeec2570
commit 750e6843c0
2 changed files with 101 additions and 43 deletions

View file

@ -1321,17 +1321,21 @@ dest_regs(struct ir3_instruction *instr)
return util_last_bit(instr->dsts[0]->wrmask);
}
static inline bool
is_reg_gpr(const struct ir3_register *reg)
{
if ((reg_num(reg) == REG_A0) || (reg->flags & IR3_REG_PREDICATE))
return false;
return true;
}
/* is dst a normal temp register: */
static inline bool
is_dest_gpr(struct ir3_register *dst)
is_dest_gpr(const struct ir3_register *dst)
{
if (dst->wrmask == 0)
return false;
if (reg_num(dst) == REG_A0)
return false;
if (dst->flags & IR3_REG_PREDICATE)
return false;
return true;
return is_reg_gpr(dst);
}
static inline bool
@ -1383,8 +1387,50 @@ writes_pred(struct ir3_instruction *instr)
static inline bool
is_reg_special(const struct ir3_register *reg)
{
return (reg->flags & (IR3_REG_SHARED | IR3_REG_PREDICATE) ||
(reg_num(reg) == REG_A0));
return (reg->flags & IR3_REG_SHARED) || !is_reg_gpr(reg);
}
/* r0.x - r47.w are "normal" registers. r48.x - r55.w are shared registers.
* Everything above those are non-GPR registers like a0.x and p0.x that aren't
* assigned by RA.
*/
#define GPR_REG_SIZE (4 * 48)
#define SHARED_REG_START GPR_REG_SIZE
#define SHARED_REG_SIZE (4 * 8)
#define NONGPR_REG_START (SHARED_REG_START + SHARED_REG_SIZE)
#define NONGPR_REG_SIZE (4 * 8)
enum ir3_reg_file {
IR3_FILE_FULL,
IR3_FILE_HALF,
IR3_FILE_SHARED,
IR3_FILE_NONGPR,
};
/* Return a file + offset that can be used for determining if two registers
* alias. The register is only really used for its flags, the num is taken from
* the parameter. Registers overlap if they are in the same file and have an
* overlapping offset. The offset is multiplied by 2 for full registers to
* handle aliasing half and full registers, that is it's in units of half-regs.
*/
static inline unsigned
ir3_reg_file_offset(const struct ir3_register *reg, unsigned num,
bool mergedregs, enum ir3_reg_file *file)
{
unsigned size = reg_elem_size(reg);
if (!is_reg_gpr(reg)) {
*file = IR3_FILE_NONGPR;
return (num - NONGPR_REG_START) * size;
} else if (reg->flags & IR3_REG_SHARED) {
*file = IR3_FILE_SHARED;
return (num - SHARED_REG_START) * size;
} else if (mergedregs || !(reg->flags & IR3_REG_HALF)) {
*file = IR3_FILE_FULL;
return num * size;
} else {
*file = IR3_FILE_HALF;
return num;
}
}
/* Same as above but in cases where we don't have a register. r48.x and above

View file

@ -362,24 +362,22 @@ struct ir3_postsched_deps_state {
* Note, this table is twice as big as the # of regs, to deal with
* half-precision regs. The approach differs depending on whether
* the half and full precision register files are "merged" (conflict,
* ie. a6xx+) in which case we consider each full precision dep
* ie. a6xx+) in which case we use "regs" for both full precision and half
* precision dependencies and consider each full precision dep
* as two half-precision dependencies, vs older separate (non-
* conflicting) in which case the first half of the table is used
* for full precision and 2nd half for half-precision.
* conflicting) in which case the separate "half_regs" table is used for
* half-precision deps. See ir3_reg_file_offset().
*/
struct ir3_postsched_node *regs[2 * 256];
unsigned dst_n[2 * 256];
struct ir3_postsched_node *regs[2 * GPR_REG_SIZE];
unsigned dst_n[2 * GPR_REG_SIZE];
struct ir3_postsched_node *half_regs[GPR_REG_SIZE];
unsigned half_dst_n[GPR_REG_SIZE];
struct ir3_postsched_node *shared_regs[2 * SHARED_REG_SIZE];
unsigned shared_dst_n[2 * SHARED_REG_SIZE];
struct ir3_postsched_node *nongpr_regs[2 * NONGPR_REG_SIZE];
unsigned nongpr_dst_n[2 * NONGPR_REG_SIZE];
};
/* bounds checking read/write accessors, since OoB access to stuff on
* the stack is gonna cause a bad day.
*/
#define dep_reg(state, idx) \
*({ \
assert((idx) < ARRAY_SIZE((state)->regs)); \
&(state)->regs[(idx)]; \
})
static void
add_dep(struct ir3_postsched_deps_state *state,
struct ir3_postsched_node *before, struct ir3_postsched_node *after,
@ -399,10 +397,12 @@ add_dep(struct ir3_postsched_deps_state *state,
static void
add_single_reg_dep(struct ir3_postsched_deps_state *state,
struct ir3_postsched_node *node, unsigned num, int src_n,
struct ir3_postsched_node *node,
struct ir3_postsched_node **dep_ptr,
unsigned *dst_n_ptr, unsigned num, int src_n,
int dst_n)
{
struct ir3_postsched_node *dep = dep_reg(state, num);
struct ir3_postsched_node *dep = *dep_ptr;
unsigned d = 0;
if (src_n >= 0 && dep && state->direction == F) {
@ -419,8 +419,8 @@ add_single_reg_dep(struct ir3_postsched_deps_state *state,
add_dep(state, dep, node, d);
if (src_n < 0) {
dep_reg(state, num) = node;
state->dst_n[num] = dst_n;
*dep_ptr = node;
*dst_n_ptr = dst_n;
}
}
@ -438,24 +438,36 @@ add_reg_dep(struct ir3_postsched_deps_state *state,
struct ir3_postsched_node *node, const struct ir3_register *reg,
unsigned num, int src_n, int dst_n)
{
if (state->merged) {
/* Make sure that special registers like a0.x that are written as
* half-registers don't alias random full registers by pretending that
* they're full registers:
*/
if ((reg->flags & IR3_REG_HALF) && !is_reg_special(reg)) {
/* single conflict in half-reg space: */
add_single_reg_dep(state, node, num, src_n, dst_n);
} else {
/* two conflicts in half-reg space: */
add_single_reg_dep(state, node, 2 * num + 0, src_n, dst_n);
add_single_reg_dep(state, node, 2 * num + 1, src_n, dst_n);
}
} else {
if (reg->flags & IR3_REG_HALF)
num += ARRAY_SIZE(state->regs) / 2;
add_single_reg_dep(state, node, num, src_n, dst_n);
struct ir3_postsched_node **regs;
unsigned *dst_n_ptr;
enum ir3_reg_file file;
unsigned size = reg_elem_size(reg);
unsigned offset = ir3_reg_file_offset(reg, num, state->merged, &file);
switch (file) {
case IR3_FILE_FULL:
assert(offset + size <= ARRAY_SIZE(state->regs));
regs = state->regs;
dst_n_ptr = state->dst_n;
break;
case IR3_FILE_HALF:
assert(offset + 1 <= ARRAY_SIZE(state->half_regs));
regs = state->half_regs;
dst_n_ptr = state->half_dst_n;
break;
case IR3_FILE_SHARED:
assert(offset + size <= ARRAY_SIZE(state->shared_regs));
regs = state->shared_regs;
dst_n_ptr = state->shared_dst_n;
break;
case IR3_FILE_NONGPR:
assert(offset + size <= ARRAY_SIZE(state->nongpr_regs));
regs = state->nongpr_regs;
dst_n_ptr = state->nongpr_dst_n;
break;
}
for (unsigned i = 0; i < size; i++)
add_single_reg_dep(state, node, &regs[offset + i], &dst_n_ptr[offset + i], num, src_n, dst_n);
}
static void