mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 09:08:10 +02:00
ir3: Rewrite postsched dependency handling
Split up the dependencies into multiple files, similar to RA, and calculate the file + index. This lets us remove the previous hack we had and lets us handle half shared registers correctly. The actual calculation of the file is moved into a shared ir3_reg_file_offset() function so that it can be reused in other places which have to check for overlapping registers. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22075>
This commit is contained in:
parent
dbeeec2570
commit
750e6843c0
2 changed files with 101 additions and 43 deletions
|
|
@ -1321,17 +1321,21 @@ dest_regs(struct ir3_instruction *instr)
|
||||||
return util_last_bit(instr->dsts[0]->wrmask);
|
return util_last_bit(instr->dsts[0]->wrmask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
is_reg_gpr(const struct ir3_register *reg)
|
||||||
|
{
|
||||||
|
if ((reg_num(reg) == REG_A0) || (reg->flags & IR3_REG_PREDICATE))
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/* is dst a normal temp register: */
|
/* is dst a normal temp register: */
|
||||||
static inline bool
|
static inline bool
|
||||||
is_dest_gpr(struct ir3_register *dst)
|
is_dest_gpr(const struct ir3_register *dst)
|
||||||
{
|
{
|
||||||
if (dst->wrmask == 0)
|
if (dst->wrmask == 0)
|
||||||
return false;
|
return false;
|
||||||
if (reg_num(dst) == REG_A0)
|
return is_reg_gpr(dst);
|
||||||
return false;
|
|
||||||
if (dst->flags & IR3_REG_PREDICATE)
|
|
||||||
return false;
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool
|
static inline bool
|
||||||
|
|
@ -1383,8 +1387,50 @@ writes_pred(struct ir3_instruction *instr)
|
||||||
static inline bool
|
static inline bool
|
||||||
is_reg_special(const struct ir3_register *reg)
|
is_reg_special(const struct ir3_register *reg)
|
||||||
{
|
{
|
||||||
return (reg->flags & (IR3_REG_SHARED | IR3_REG_PREDICATE) ||
|
return (reg->flags & IR3_REG_SHARED) || !is_reg_gpr(reg);
|
||||||
(reg_num(reg) == REG_A0));
|
}
|
||||||
|
|
||||||
|
/* r0.x - r47.w are "normal" registers. r48.x - r55.w are shared registers.
|
||||||
|
* Everything above those are non-GPR registers like a0.x and p0.x that aren't
|
||||||
|
* assigned by RA.
|
||||||
|
*/
|
||||||
|
#define GPR_REG_SIZE (4 * 48)
|
||||||
|
#define SHARED_REG_START GPR_REG_SIZE
|
||||||
|
#define SHARED_REG_SIZE (4 * 8)
|
||||||
|
#define NONGPR_REG_START (SHARED_REG_START + SHARED_REG_SIZE)
|
||||||
|
#define NONGPR_REG_SIZE (4 * 8)
|
||||||
|
|
||||||
|
enum ir3_reg_file {
|
||||||
|
IR3_FILE_FULL,
|
||||||
|
IR3_FILE_HALF,
|
||||||
|
IR3_FILE_SHARED,
|
||||||
|
IR3_FILE_NONGPR,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Return a file + offset that can be used for determining if two registers
|
||||||
|
* alias. The register is only really used for its flags, the num is taken from
|
||||||
|
* the parameter. Registers overlap if they are in the same file and have an
|
||||||
|
* overlapping offset. The offset is multiplied by 2 for full registers to
|
||||||
|
* handle aliasing half and full registers, that is it's in units of half-regs.
|
||||||
|
*/
|
||||||
|
static inline unsigned
|
||||||
|
ir3_reg_file_offset(const struct ir3_register *reg, unsigned num,
|
||||||
|
bool mergedregs, enum ir3_reg_file *file)
|
||||||
|
{
|
||||||
|
unsigned size = reg_elem_size(reg);
|
||||||
|
if (!is_reg_gpr(reg)) {
|
||||||
|
*file = IR3_FILE_NONGPR;
|
||||||
|
return (num - NONGPR_REG_START) * size;
|
||||||
|
} else if (reg->flags & IR3_REG_SHARED) {
|
||||||
|
*file = IR3_FILE_SHARED;
|
||||||
|
return (num - SHARED_REG_START) * size;
|
||||||
|
} else if (mergedregs || !(reg->flags & IR3_REG_HALF)) {
|
||||||
|
*file = IR3_FILE_FULL;
|
||||||
|
return num * size;
|
||||||
|
} else {
|
||||||
|
*file = IR3_FILE_HALF;
|
||||||
|
return num;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Same as above but in cases where we don't have a register. r48.x and above
|
/* Same as above but in cases where we don't have a register. r48.x and above
|
||||||
|
|
|
||||||
|
|
@ -362,24 +362,22 @@ struct ir3_postsched_deps_state {
|
||||||
* Note, this table is twice as big as the # of regs, to deal with
|
* Note, this table is twice as big as the # of regs, to deal with
|
||||||
* half-precision regs. The approach differs depending on whether
|
* half-precision regs. The approach differs depending on whether
|
||||||
* the half and full precision register files are "merged" (conflict,
|
* the half and full precision register files are "merged" (conflict,
|
||||||
* ie. a6xx+) in which case we consider each full precision dep
|
* ie. a6xx+) in which case we use "regs" for both full precision and half
|
||||||
|
* precision dependencies and consider each full precision dep
|
||||||
* as two half-precision dependencies, vs older separate (non-
|
* as two half-precision dependencies, vs older separate (non-
|
||||||
* conflicting) in which case the first half of the table is used
|
* conflicting) in which case the separate "half_regs" table is used for
|
||||||
* for full precision and 2nd half for half-precision.
|
* half-precision deps. See ir3_reg_file_offset().
|
||||||
*/
|
*/
|
||||||
struct ir3_postsched_node *regs[2 * 256];
|
struct ir3_postsched_node *regs[2 * GPR_REG_SIZE];
|
||||||
unsigned dst_n[2 * 256];
|
unsigned dst_n[2 * GPR_REG_SIZE];
|
||||||
|
struct ir3_postsched_node *half_regs[GPR_REG_SIZE];
|
||||||
|
unsigned half_dst_n[GPR_REG_SIZE];
|
||||||
|
struct ir3_postsched_node *shared_regs[2 * SHARED_REG_SIZE];
|
||||||
|
unsigned shared_dst_n[2 * SHARED_REG_SIZE];
|
||||||
|
struct ir3_postsched_node *nongpr_regs[2 * NONGPR_REG_SIZE];
|
||||||
|
unsigned nongpr_dst_n[2 * NONGPR_REG_SIZE];
|
||||||
};
|
};
|
||||||
|
|
||||||
/* bounds checking read/write accessors, since OoB access to stuff on
|
|
||||||
* the stack is gonna cause a bad day.
|
|
||||||
*/
|
|
||||||
#define dep_reg(state, idx) \
|
|
||||||
*({ \
|
|
||||||
assert((idx) < ARRAY_SIZE((state)->regs)); \
|
|
||||||
&(state)->regs[(idx)]; \
|
|
||||||
})
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
add_dep(struct ir3_postsched_deps_state *state,
|
add_dep(struct ir3_postsched_deps_state *state,
|
||||||
struct ir3_postsched_node *before, struct ir3_postsched_node *after,
|
struct ir3_postsched_node *before, struct ir3_postsched_node *after,
|
||||||
|
|
@ -399,10 +397,12 @@ add_dep(struct ir3_postsched_deps_state *state,
|
||||||
|
|
||||||
static void
|
static void
|
||||||
add_single_reg_dep(struct ir3_postsched_deps_state *state,
|
add_single_reg_dep(struct ir3_postsched_deps_state *state,
|
||||||
struct ir3_postsched_node *node, unsigned num, int src_n,
|
struct ir3_postsched_node *node,
|
||||||
|
struct ir3_postsched_node **dep_ptr,
|
||||||
|
unsigned *dst_n_ptr, unsigned num, int src_n,
|
||||||
int dst_n)
|
int dst_n)
|
||||||
{
|
{
|
||||||
struct ir3_postsched_node *dep = dep_reg(state, num);
|
struct ir3_postsched_node *dep = *dep_ptr;
|
||||||
|
|
||||||
unsigned d = 0;
|
unsigned d = 0;
|
||||||
if (src_n >= 0 && dep && state->direction == F) {
|
if (src_n >= 0 && dep && state->direction == F) {
|
||||||
|
|
@ -419,8 +419,8 @@ add_single_reg_dep(struct ir3_postsched_deps_state *state,
|
||||||
|
|
||||||
add_dep(state, dep, node, d);
|
add_dep(state, dep, node, d);
|
||||||
if (src_n < 0) {
|
if (src_n < 0) {
|
||||||
dep_reg(state, num) = node;
|
*dep_ptr = node;
|
||||||
state->dst_n[num] = dst_n;
|
*dst_n_ptr = dst_n;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -438,24 +438,36 @@ add_reg_dep(struct ir3_postsched_deps_state *state,
|
||||||
struct ir3_postsched_node *node, const struct ir3_register *reg,
|
struct ir3_postsched_node *node, const struct ir3_register *reg,
|
||||||
unsigned num, int src_n, int dst_n)
|
unsigned num, int src_n, int dst_n)
|
||||||
{
|
{
|
||||||
if (state->merged) {
|
struct ir3_postsched_node **regs;
|
||||||
/* Make sure that special registers like a0.x that are written as
|
unsigned *dst_n_ptr;
|
||||||
* half-registers don't alias random full registers by pretending that
|
enum ir3_reg_file file;
|
||||||
* they're full registers:
|
unsigned size = reg_elem_size(reg);
|
||||||
*/
|
unsigned offset = ir3_reg_file_offset(reg, num, state->merged, &file);
|
||||||
if ((reg->flags & IR3_REG_HALF) && !is_reg_special(reg)) {
|
switch (file) {
|
||||||
/* single conflict in half-reg space: */
|
case IR3_FILE_FULL:
|
||||||
add_single_reg_dep(state, node, num, src_n, dst_n);
|
assert(offset + size <= ARRAY_SIZE(state->regs));
|
||||||
} else {
|
regs = state->regs;
|
||||||
/* two conflicts in half-reg space: */
|
dst_n_ptr = state->dst_n;
|
||||||
add_single_reg_dep(state, node, 2 * num + 0, src_n, dst_n);
|
break;
|
||||||
add_single_reg_dep(state, node, 2 * num + 1, src_n, dst_n);
|
case IR3_FILE_HALF:
|
||||||
}
|
assert(offset + 1 <= ARRAY_SIZE(state->half_regs));
|
||||||
} else {
|
regs = state->half_regs;
|
||||||
if (reg->flags & IR3_REG_HALF)
|
dst_n_ptr = state->half_dst_n;
|
||||||
num += ARRAY_SIZE(state->regs) / 2;
|
break;
|
||||||
add_single_reg_dep(state, node, num, src_n, dst_n);
|
case IR3_FILE_SHARED:
|
||||||
|
assert(offset + size <= ARRAY_SIZE(state->shared_regs));
|
||||||
|
regs = state->shared_regs;
|
||||||
|
dst_n_ptr = state->shared_dst_n;
|
||||||
|
break;
|
||||||
|
case IR3_FILE_NONGPR:
|
||||||
|
assert(offset + size <= ARRAY_SIZE(state->nongpr_regs));
|
||||||
|
regs = state->nongpr_regs;
|
||||||
|
dst_n_ptr = state->nongpr_dst_n;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < size; i++)
|
||||||
|
add_single_reg_dep(state, node, ®s[offset + i], &dst_n_ptr[offset + i], num, src_n, dst_n);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue