mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-26 12:50:10 +01:00
ir3: Rewrite postsched dependency handling
Split up the dependencies into multiple files, similar to RA, and calculate the file + index. This lets us remove the previous hack we had and lets us handle half shared registers correctly. The actual calculation of the file is moved into a shared ir3_reg_file_offset() function so that it can be reused in other places which have to check for overlapping registers. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22075>
This commit is contained in:
parent
dbeeec2570
commit
750e6843c0
2 changed files with 101 additions and 43 deletions
|
|
@ -1321,17 +1321,21 @@ dest_regs(struct ir3_instruction *instr)
|
|||
return util_last_bit(instr->dsts[0]->wrmask);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
is_reg_gpr(const struct ir3_register *reg)
|
||||
{
|
||||
if ((reg_num(reg) == REG_A0) || (reg->flags & IR3_REG_PREDICATE))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* is dst a normal temp register: */
|
||||
static inline bool
|
||||
is_dest_gpr(struct ir3_register *dst)
|
||||
is_dest_gpr(const struct ir3_register *dst)
|
||||
{
|
||||
if (dst->wrmask == 0)
|
||||
return false;
|
||||
if (reg_num(dst) == REG_A0)
|
||||
return false;
|
||||
if (dst->flags & IR3_REG_PREDICATE)
|
||||
return false;
|
||||
return true;
|
||||
return is_reg_gpr(dst);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
|
|
@ -1383,8 +1387,50 @@ writes_pred(struct ir3_instruction *instr)
|
|||
static inline bool
|
||||
is_reg_special(const struct ir3_register *reg)
|
||||
{
|
||||
return (reg->flags & (IR3_REG_SHARED | IR3_REG_PREDICATE) ||
|
||||
(reg_num(reg) == REG_A0));
|
||||
return (reg->flags & IR3_REG_SHARED) || !is_reg_gpr(reg);
|
||||
}
|
||||
|
||||
/* r0.x - r47.w are "normal" registers. r48.x - r55.w are shared registers.
|
||||
* Everything above those are non-GPR registers like a0.x and p0.x that aren't
|
||||
* assigned by RA.
|
||||
*/
|
||||
#define GPR_REG_SIZE (4 * 48)
|
||||
#define SHARED_REG_START GPR_REG_SIZE
|
||||
#define SHARED_REG_SIZE (4 * 8)
|
||||
#define NONGPR_REG_START (SHARED_REG_START + SHARED_REG_SIZE)
|
||||
#define NONGPR_REG_SIZE (4 * 8)
|
||||
|
||||
enum ir3_reg_file {
|
||||
IR3_FILE_FULL,
|
||||
IR3_FILE_HALF,
|
||||
IR3_FILE_SHARED,
|
||||
IR3_FILE_NONGPR,
|
||||
};
|
||||
|
||||
/* Return a file + offset that can be used for determining if two registers
|
||||
* alias. The register is only really used for its flags, the num is taken from
|
||||
* the parameter. Registers overlap if they are in the same file and have an
|
||||
* overlapping offset. The offset is multiplied by 2 for full registers to
|
||||
* handle aliasing half and full registers, that is it's in units of half-regs.
|
||||
*/
|
||||
static inline unsigned
|
||||
ir3_reg_file_offset(const struct ir3_register *reg, unsigned num,
|
||||
bool mergedregs, enum ir3_reg_file *file)
|
||||
{
|
||||
unsigned size = reg_elem_size(reg);
|
||||
if (!is_reg_gpr(reg)) {
|
||||
*file = IR3_FILE_NONGPR;
|
||||
return (num - NONGPR_REG_START) * size;
|
||||
} else if (reg->flags & IR3_REG_SHARED) {
|
||||
*file = IR3_FILE_SHARED;
|
||||
return (num - SHARED_REG_START) * size;
|
||||
} else if (mergedregs || !(reg->flags & IR3_REG_HALF)) {
|
||||
*file = IR3_FILE_FULL;
|
||||
return num * size;
|
||||
} else {
|
||||
*file = IR3_FILE_HALF;
|
||||
return num;
|
||||
}
|
||||
}
|
||||
|
||||
/* Same as above but in cases where we don't have a register. r48.x and above
|
||||
|
|
|
|||
|
|
@ -362,24 +362,22 @@ struct ir3_postsched_deps_state {
|
|||
* Note, this table is twice as big as the # of regs, to deal with
|
||||
* half-precision regs. The approach differs depending on whether
|
||||
* the half and full precision register files are "merged" (conflict,
|
||||
* ie. a6xx+) in which case we consider each full precision dep
|
||||
* ie. a6xx+) in which case we use "regs" for both full precision and half
|
||||
* precision dependencies and consider each full precision dep
|
||||
* as two half-precision dependencies, vs older separate (non-
|
||||
* conflicting) in which case the first half of the table is used
|
||||
* for full precision and 2nd half for half-precision.
|
||||
* conflicting) in which case the separate "half_regs" table is used for
|
||||
* half-precision deps. See ir3_reg_file_offset().
|
||||
*/
|
||||
struct ir3_postsched_node *regs[2 * 256];
|
||||
unsigned dst_n[2 * 256];
|
||||
struct ir3_postsched_node *regs[2 * GPR_REG_SIZE];
|
||||
unsigned dst_n[2 * GPR_REG_SIZE];
|
||||
struct ir3_postsched_node *half_regs[GPR_REG_SIZE];
|
||||
unsigned half_dst_n[GPR_REG_SIZE];
|
||||
struct ir3_postsched_node *shared_regs[2 * SHARED_REG_SIZE];
|
||||
unsigned shared_dst_n[2 * SHARED_REG_SIZE];
|
||||
struct ir3_postsched_node *nongpr_regs[2 * NONGPR_REG_SIZE];
|
||||
unsigned nongpr_dst_n[2 * NONGPR_REG_SIZE];
|
||||
};
|
||||
|
||||
/* bounds checking read/write accessors, since OoB access to stuff on
|
||||
* the stack is gonna cause a bad day.
|
||||
*/
|
||||
#define dep_reg(state, idx) \
|
||||
*({ \
|
||||
assert((idx) < ARRAY_SIZE((state)->regs)); \
|
||||
&(state)->regs[(idx)]; \
|
||||
})
|
||||
|
||||
static void
|
||||
add_dep(struct ir3_postsched_deps_state *state,
|
||||
struct ir3_postsched_node *before, struct ir3_postsched_node *after,
|
||||
|
|
@ -399,10 +397,12 @@ add_dep(struct ir3_postsched_deps_state *state,
|
|||
|
||||
static void
|
||||
add_single_reg_dep(struct ir3_postsched_deps_state *state,
|
||||
struct ir3_postsched_node *node, unsigned num, int src_n,
|
||||
struct ir3_postsched_node *node,
|
||||
struct ir3_postsched_node **dep_ptr,
|
||||
unsigned *dst_n_ptr, unsigned num, int src_n,
|
||||
int dst_n)
|
||||
{
|
||||
struct ir3_postsched_node *dep = dep_reg(state, num);
|
||||
struct ir3_postsched_node *dep = *dep_ptr;
|
||||
|
||||
unsigned d = 0;
|
||||
if (src_n >= 0 && dep && state->direction == F) {
|
||||
|
|
@ -419,8 +419,8 @@ add_single_reg_dep(struct ir3_postsched_deps_state *state,
|
|||
|
||||
add_dep(state, dep, node, d);
|
||||
if (src_n < 0) {
|
||||
dep_reg(state, num) = node;
|
||||
state->dst_n[num] = dst_n;
|
||||
*dep_ptr = node;
|
||||
*dst_n_ptr = dst_n;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -438,24 +438,36 @@ add_reg_dep(struct ir3_postsched_deps_state *state,
|
|||
struct ir3_postsched_node *node, const struct ir3_register *reg,
|
||||
unsigned num, int src_n, int dst_n)
|
||||
{
|
||||
if (state->merged) {
|
||||
/* Make sure that special registers like a0.x that are written as
|
||||
* half-registers don't alias random full registers by pretending that
|
||||
* they're full registers:
|
||||
*/
|
||||
if ((reg->flags & IR3_REG_HALF) && !is_reg_special(reg)) {
|
||||
/* single conflict in half-reg space: */
|
||||
add_single_reg_dep(state, node, num, src_n, dst_n);
|
||||
} else {
|
||||
/* two conflicts in half-reg space: */
|
||||
add_single_reg_dep(state, node, 2 * num + 0, src_n, dst_n);
|
||||
add_single_reg_dep(state, node, 2 * num + 1, src_n, dst_n);
|
||||
}
|
||||
} else {
|
||||
if (reg->flags & IR3_REG_HALF)
|
||||
num += ARRAY_SIZE(state->regs) / 2;
|
||||
add_single_reg_dep(state, node, num, src_n, dst_n);
|
||||
struct ir3_postsched_node **regs;
|
||||
unsigned *dst_n_ptr;
|
||||
enum ir3_reg_file file;
|
||||
unsigned size = reg_elem_size(reg);
|
||||
unsigned offset = ir3_reg_file_offset(reg, num, state->merged, &file);
|
||||
switch (file) {
|
||||
case IR3_FILE_FULL:
|
||||
assert(offset + size <= ARRAY_SIZE(state->regs));
|
||||
regs = state->regs;
|
||||
dst_n_ptr = state->dst_n;
|
||||
break;
|
||||
case IR3_FILE_HALF:
|
||||
assert(offset + 1 <= ARRAY_SIZE(state->half_regs));
|
||||
regs = state->half_regs;
|
||||
dst_n_ptr = state->half_dst_n;
|
||||
break;
|
||||
case IR3_FILE_SHARED:
|
||||
assert(offset + size <= ARRAY_SIZE(state->shared_regs));
|
||||
regs = state->shared_regs;
|
||||
dst_n_ptr = state->shared_dst_n;
|
||||
break;
|
||||
case IR3_FILE_NONGPR:
|
||||
assert(offset + size <= ARRAY_SIZE(state->nongpr_regs));
|
||||
regs = state->nongpr_regs;
|
||||
dst_n_ptr = state->nongpr_dst_n;
|
||||
break;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < size; i++)
|
||||
add_single_reg_dep(state, node, ®s[offset + i], &dst_n_ptr[offset + i], num, src_n, dst_n);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue