diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index e563ff29f87..b20e327d2e2 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1321,17 +1321,21 @@ dest_regs(struct ir3_instruction *instr) return util_last_bit(instr->dsts[0]->wrmask); } +static inline bool +is_reg_gpr(const struct ir3_register *reg) +{ + if ((reg_num(reg) == REG_A0) || (reg->flags & IR3_REG_PREDICATE)) + return false; + return true; +} + /* is dst a normal temp register: */ static inline bool -is_dest_gpr(struct ir3_register *dst) +is_dest_gpr(const struct ir3_register *dst) { if (dst->wrmask == 0) return false; - if (reg_num(dst) == REG_A0) - return false; - if (dst->flags & IR3_REG_PREDICATE) - return false; - return true; + return is_reg_gpr(dst); } static inline bool @@ -1383,8 +1387,50 @@ writes_pred(struct ir3_instruction *instr) static inline bool is_reg_special(const struct ir3_register *reg) { - return (reg->flags & (IR3_REG_SHARED | IR3_REG_PREDICATE) || - (reg_num(reg) == REG_A0)); + return (reg->flags & IR3_REG_SHARED) || !is_reg_gpr(reg); +} + +/* r0.x - r47.w are "normal" registers. r48.x - r55.w are shared registers. + * Everything above those are non-GPR registers like a0.x and p0.x that aren't + * assigned by RA. + */ +#define GPR_REG_SIZE (4 * 48) +#define SHARED_REG_START GPR_REG_SIZE +#define SHARED_REG_SIZE (4 * 8) +#define NONGPR_REG_START (SHARED_REG_START + SHARED_REG_SIZE) +#define NONGPR_REG_SIZE (4 * 8) + +enum ir3_reg_file { + IR3_FILE_FULL, + IR3_FILE_HALF, + IR3_FILE_SHARED, + IR3_FILE_NONGPR, +}; + +/* Return a file + offset that can be used for determining if two registers + * alias. The register is only really used for its flags, the num is taken from + * the parameter. Registers overlap if they are in the same file and have an + * overlapping offset. The offset is multiplied by 2 for full registers to + * handle aliasing half and full registers, that is it's in units of half-regs. + */ +static inline unsigned +ir3_reg_file_offset(const struct ir3_register *reg, unsigned num, + bool mergedregs, enum ir3_reg_file *file) +{ + unsigned size = reg_elem_size(reg); + if (!is_reg_gpr(reg)) { + *file = IR3_FILE_NONGPR; + return (num - NONGPR_REG_START) * size; + } else if (reg->flags & IR3_REG_SHARED) { + *file = IR3_FILE_SHARED; + return (num - SHARED_REG_START) * size; + } else if (mergedregs || !(reg->flags & IR3_REG_HALF)) { + *file = IR3_FILE_FULL; + return num * size; + } else { + *file = IR3_FILE_HALF; + return num; + } } /* Same as above but in cases where we don't have a register. r48.x and above diff --git a/src/freedreno/ir3/ir3_postsched.c b/src/freedreno/ir3/ir3_postsched.c index a80d17a20fd..c56829e9025 100644 --- a/src/freedreno/ir3/ir3_postsched.c +++ b/src/freedreno/ir3/ir3_postsched.c @@ -362,24 +362,22 @@ struct ir3_postsched_deps_state { * Note, this table is twice as big as the # of regs, to deal with * half-precision regs. The approach differs depending on whether * the half and full precision register files are "merged" (conflict, - * ie. a6xx+) in which case we consider each full precision dep + * ie. a6xx+) in which case we use "regs" for both full precision and half + * precision dependencies and consider each full precision dep * as two half-precision dependencies, vs older separate (non- - * conflicting) in which case the first half of the table is used - * for full precision and 2nd half for half-precision. + * conflicting) in which case the separate "half_regs" table is used for + * half-precision deps. See ir3_reg_file_offset(). */ - struct ir3_postsched_node *regs[2 * 256]; - unsigned dst_n[2 * 256]; + struct ir3_postsched_node *regs[2 * GPR_REG_SIZE]; + unsigned dst_n[2 * GPR_REG_SIZE]; + struct ir3_postsched_node *half_regs[GPR_REG_SIZE]; + unsigned half_dst_n[GPR_REG_SIZE]; + struct ir3_postsched_node *shared_regs[2 * SHARED_REG_SIZE]; + unsigned shared_dst_n[2 * SHARED_REG_SIZE]; + struct ir3_postsched_node *nongpr_regs[2 * NONGPR_REG_SIZE]; + unsigned nongpr_dst_n[2 * NONGPR_REG_SIZE]; }; -/* bounds checking read/write accessors, since OoB access to stuff on - * the stack is gonna cause a bad day. - */ -#define dep_reg(state, idx) \ - *({ \ - assert((idx) < ARRAY_SIZE((state)->regs)); \ - &(state)->regs[(idx)]; \ - }) - static void add_dep(struct ir3_postsched_deps_state *state, struct ir3_postsched_node *before, struct ir3_postsched_node *after, @@ -399,10 +397,12 @@ add_dep(struct ir3_postsched_deps_state *state, static void add_single_reg_dep(struct ir3_postsched_deps_state *state, - struct ir3_postsched_node *node, unsigned num, int src_n, + struct ir3_postsched_node *node, + struct ir3_postsched_node **dep_ptr, + unsigned *dst_n_ptr, unsigned num, int src_n, int dst_n) { - struct ir3_postsched_node *dep = dep_reg(state, num); + struct ir3_postsched_node *dep = *dep_ptr; unsigned d = 0; if (src_n >= 0 && dep && state->direction == F) { @@ -419,8 +419,8 @@ add_single_reg_dep(struct ir3_postsched_deps_state *state, add_dep(state, dep, node, d); if (src_n < 0) { - dep_reg(state, num) = node; - state->dst_n[num] = dst_n; + *dep_ptr = node; + *dst_n_ptr = dst_n; } } @@ -438,24 +438,36 @@ add_reg_dep(struct ir3_postsched_deps_state *state, struct ir3_postsched_node *node, const struct ir3_register *reg, unsigned num, int src_n, int dst_n) { - if (state->merged) { - /* Make sure that special registers like a0.x that are written as - * half-registers don't alias random full registers by pretending that - * they're full registers: - */ - if ((reg->flags & IR3_REG_HALF) && !is_reg_special(reg)) { - /* single conflict in half-reg space: */ - add_single_reg_dep(state, node, num, src_n, dst_n); - } else { - /* two conflicts in half-reg space: */ - add_single_reg_dep(state, node, 2 * num + 0, src_n, dst_n); - add_single_reg_dep(state, node, 2 * num + 1, src_n, dst_n); - } - } else { - if (reg->flags & IR3_REG_HALF) - num += ARRAY_SIZE(state->regs) / 2; - add_single_reg_dep(state, node, num, src_n, dst_n); + struct ir3_postsched_node **regs; + unsigned *dst_n_ptr; + enum ir3_reg_file file; + unsigned size = reg_elem_size(reg); + unsigned offset = ir3_reg_file_offset(reg, num, state->merged, &file); + switch (file) { + case IR3_FILE_FULL: + assert(offset + size <= ARRAY_SIZE(state->regs)); + regs = state->regs; + dst_n_ptr = state->dst_n; + break; + case IR3_FILE_HALF: + assert(offset + 1 <= ARRAY_SIZE(state->half_regs)); + regs = state->half_regs; + dst_n_ptr = state->half_dst_n; + break; + case IR3_FILE_SHARED: + assert(offset + size <= ARRAY_SIZE(state->shared_regs)); + regs = state->shared_regs; + dst_n_ptr = state->shared_dst_n; + break; + case IR3_FILE_NONGPR: + assert(offset + size <= ARRAY_SIZE(state->nongpr_regs)); + regs = state->nongpr_regs; + dst_n_ptr = state->nongpr_dst_n; + break; } + + for (unsigned i = 0; i < size; i++) + add_single_reg_dep(state, node, ®s[offset + i], &dst_n_ptr[offset + i], num, src_n, dst_n); } static void