agx: Coalesce collects when possible

Track collects and use them as affinities when choosing registers. On
glmark2:

total instructions in shared programs: 5498 -> 5388 (-2.00%)
instructions in affected programs: 2748 -> 2638 (-4.00%)
helped: 31
HURT: 0
helped stats (abs) min: 1.0 max: 12.0 x̄: 3.55 x̃: 3
helped stats (rel) min: 0.09% max: 57.14% x̄: 10.58% x̃: 5.97%
95% mean confidence interval for instructions value: -4.61 -2.49
95% mean confidence interval for instructions %-change: -15.16% -6.00%
Instructions are helped.

total bytes in shared programs: 37280 -> 36620 (-1.77%)
bytes in affected programs: 18880 -> 18220 (-3.50%)
helped: 31
HURT: 0
helped stats (abs) min: 6.0 max: 72.0 x̄: 21.29 x̃: 18
helped stats (rel) min: 0.07% max: 48.98% x̄: 9.16% x̃: 5.17%
95% mean confidence interval for bytes value: -27.64 -14.94
95% mean confidence interval for bytes %-change: -13.03% -5.29%
Bytes are helped.

total halfregs in shared programs: 1267 -> 1279 (0.95%)
halfregs in affected programs: 37 -> 49 (32.43%)
helped: 0
HURT: 9
HURT stats (abs)   min: 1.0 max: 2.0 x̄: 1.33 x̃: 1
HURT stats (rel)   min: 16.67% max: 66.67% x̄: 35.58% x̃: 28.57%
95% mean confidence interval for halfregs value: 0.95 1.72
95% mean confidence interval for halfregs %-change: 21.50% 49.67%
Halfregs are HURT.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19590>
This commit is contained in:
Alyssa Rosenzweig 2022-10-26 11:23:51 -04:00 committed by Marge Bot
parent 4cc2427ad6
commit 023f27fada

View file

@ -34,6 +34,9 @@ struct ra_ctx {
BITSET_WORD *visited;
BITSET_WORD *used_regs;
/* For affinities */
agx_instr **src_to_collect;
/* Maximum number of registers that RA is allowed to use */
unsigned bound;
};
@ -139,6 +142,88 @@ assign_regs(struct ra_ctx *rctx, agx_index v, unsigned reg)
BITSET_SET_RANGE(rctx->used_regs, reg, end);
}
static unsigned
affinity_base_of_collect(struct ra_ctx *rctx, agx_instr *collect, unsigned src)
{
unsigned src_reg = rctx->ssa_to_reg[collect->src[src].value];
unsigned src_offset = src * agx_size_align_16(collect->src[src].size);
if (src_reg >= src_offset)
return src_reg - src_offset;
else
return ~0;
}
static unsigned
pick_regs(struct ra_ctx *rctx, agx_instr *I, unsigned d)
{
agx_index idx = I->dest[d];
assert(idx.type == AGX_INDEX_NORMAL);
unsigned count = agx_write_registers(I, d);
unsigned align = agx_size_align_16(idx.size);
/* Try to allocate collects compatibly with their sources */
if (I->op == AGX_OPCODE_COLLECT) {
agx_foreach_ssa_src(I, s) {
assert(BITSET_TEST(rctx->visited, I->src[s].value) &&
"registers assigned in an order compatible with dominance "
"and this is not a phi node, so we have assigned a register");
unsigned base = affinity_base_of_collect(rctx, I, s);
if (base >= rctx->bound || (base + count) > rctx->bound)
continue;
if (!BITSET_TEST_RANGE(rctx->used_regs, base, base + count - 1))
return base;
}
}
/* Try to allocate sources of collects contiguously */
if (rctx->src_to_collect[idx.value] != NULL) {
agx_instr *collect = rctx->src_to_collect[idx.value];
assert(count == align && "collect sources are scalar");
/* Find our offset in the collect. If our source is repeated in the
* collect, this may not be unique. We arbitrarily choose the first.
*/
unsigned our_source = ~0;
agx_foreach_ssa_src(collect, s) {
if (agx_is_equiv(collect->src[s], idx)) {
our_source = s;
break;
}
}
assert(our_source < collect->nr_srcs && "source must be in the collect");
/* See if we can allocate compatibly with any source of the collect */
agx_foreach_ssa_src(collect, s) {
if (!BITSET_TEST(rctx->visited, collect->src[s].value))
continue;
/* Determine where the collect should start relative to the source */
unsigned base = affinity_base_of_collect(rctx, collect, s);
if (base >= rctx->bound)
continue;
unsigned our_reg = base + (our_source * align);
/* Don't allocate past the end of the register file */
if ((our_reg + align) > rctx->bound)
continue;
/* If those registers are free, then choose them */
if (!BITSET_TEST_RANGE(rctx->used_regs, our_reg, our_reg + align - 1))
return our_reg;
}
}
/* Default to any contiguous sequence of registers */
return find_regs(rctx->used_regs, count, align, rctx->bound);
}
/** Assign registers to SSA values in a block. */
static void
@ -201,11 +286,7 @@ agx_ra_assign_local(struct ra_ctx *rctx)
* because of the SSA form.
*/
agx_foreach_ssa_dest(I, d) {
unsigned count = agx_write_registers(I, d);
unsigned align = agx_size_align_16(I->dest[d].size);
assign_regs(rctx, I->dest[d],
find_regs(used_regs, count, align, rctx->bound));
assign_regs(rctx, I->dest[d], pick_regs(rctx, I, d));
}
}
@ -297,9 +378,17 @@ agx_ra(agx_context *ctx)
agx_compute_liveness(ctx);
uint8_t *ssa_to_reg = calloc(ctx->alloc, sizeof(uint8_t));
uint8_t *ncomps = calloc(ctx->alloc, sizeof(uint8_t));
agx_instr **src_to_collect = calloc(ctx->alloc, sizeof(agx_instr *));
BITSET_WORD *visited = calloc(BITSET_WORDS(ctx->alloc), sizeof(BITSET_WORD));
agx_foreach_instr_global(ctx, I) {
/* Record collects so we can coalesce when assigning */
if (I->op == AGX_OPCODE_COLLECT) {
agx_foreach_ssa_src(I, s) {
src_to_collect[I->src[s].value] = I;
}
}
agx_foreach_ssa_dest(I, d) {
unsigned v = I->dest[d].value;
assert(ncomps[v] == 0 && "broken SSA");
@ -315,6 +404,7 @@ agx_ra(agx_context *ctx)
.shader = ctx,
.block = block,
.ssa_to_reg = ssa_to_reg,
.src_to_collect = src_to_collect,
.ncomps = ncomps,
.visited = visited,
.bound = AGX_NUM_REGS
@ -436,6 +526,7 @@ agx_ra(agx_context *ctx)
}
}
free(src_to_collect);
free(ssa_to_reg);
free(ncomps);
free(visited);