mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
agx: Coalesce collects when possible
Track collects and use them as affinities when choosing registers. On glmark2: total instructions in shared programs: 5498 -> 5388 (-2.00%) instructions in affected programs: 2748 -> 2638 (-4.00%) helped: 31 HURT: 0 helped stats (abs) min: 1.0 max: 12.0 x̄: 3.55 x̃: 3 helped stats (rel) min: 0.09% max: 57.14% x̄: 10.58% x̃: 5.97% 95% mean confidence interval for instructions value: -4.61 -2.49 95% mean confidence interval for instructions %-change: -15.16% -6.00% Instructions are helped. total bytes in shared programs: 37280 -> 36620 (-1.77%) bytes in affected programs: 18880 -> 18220 (-3.50%) helped: 31 HURT: 0 helped stats (abs) min: 6.0 max: 72.0 x̄: 21.29 x̃: 18 helped stats (rel) min: 0.07% max: 48.98% x̄: 9.16% x̃: 5.17% 95% mean confidence interval for bytes value: -27.64 -14.94 95% mean confidence interval for bytes %-change: -13.03% -5.29% Bytes are helped. total halfregs in shared programs: 1267 -> 1279 (0.95%) halfregs in affected programs: 37 -> 49 (32.43%) helped: 0 HURT: 9 HURT stats (abs) min: 1.0 max: 2.0 x̄: 1.33 x̃: 1 HURT stats (rel) min: 16.67% max: 66.67% x̄: 35.58% x̃: 28.57% 95% mean confidence interval for halfregs value: 0.95 1.72 95% mean confidence interval for halfregs %-change: 21.50% 49.67% Halfregs are HURT. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19590>
This commit is contained in:
parent
4cc2427ad6
commit
023f27fada
1 changed files with 96 additions and 5 deletions
|
|
@ -34,6 +34,9 @@ struct ra_ctx {
|
|||
BITSET_WORD *visited;
|
||||
BITSET_WORD *used_regs;
|
||||
|
||||
/* For affinities */
|
||||
agx_instr **src_to_collect;
|
||||
|
||||
/* Maximum number of registers that RA is allowed to use */
|
||||
unsigned bound;
|
||||
};
|
||||
|
|
@ -139,6 +142,88 @@ assign_regs(struct ra_ctx *rctx, agx_index v, unsigned reg)
|
|||
BITSET_SET_RANGE(rctx->used_regs, reg, end);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
affinity_base_of_collect(struct ra_ctx *rctx, agx_instr *collect, unsigned src)
|
||||
{
|
||||
unsigned src_reg = rctx->ssa_to_reg[collect->src[src].value];
|
||||
unsigned src_offset = src * agx_size_align_16(collect->src[src].size);
|
||||
|
||||
if (src_reg >= src_offset)
|
||||
return src_reg - src_offset;
|
||||
else
|
||||
return ~0;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
pick_regs(struct ra_ctx *rctx, agx_instr *I, unsigned d)
|
||||
{
|
||||
agx_index idx = I->dest[d];
|
||||
assert(idx.type == AGX_INDEX_NORMAL);
|
||||
|
||||
unsigned count = agx_write_registers(I, d);
|
||||
unsigned align = agx_size_align_16(idx.size);
|
||||
|
||||
/* Try to allocate collects compatibly with their sources */
|
||||
if (I->op == AGX_OPCODE_COLLECT) {
|
||||
agx_foreach_ssa_src(I, s) {
|
||||
assert(BITSET_TEST(rctx->visited, I->src[s].value) &&
|
||||
"registers assigned in an order compatible with dominance "
|
||||
"and this is not a phi node, so we have assigned a register");
|
||||
|
||||
unsigned base = affinity_base_of_collect(rctx, I, s);
|
||||
if (base >= rctx->bound || (base + count) > rctx->bound)
|
||||
continue;
|
||||
|
||||
if (!BITSET_TEST_RANGE(rctx->used_regs, base, base + count - 1))
|
||||
return base;
|
||||
}
|
||||
}
|
||||
|
||||
/* Try to allocate sources of collects contiguously */
|
||||
if (rctx->src_to_collect[idx.value] != NULL) {
|
||||
agx_instr *collect = rctx->src_to_collect[idx.value];
|
||||
|
||||
assert(count == align && "collect sources are scalar");
|
||||
|
||||
/* Find our offset in the collect. If our source is repeated in the
|
||||
* collect, this may not be unique. We arbitrarily choose the first.
|
||||
*/
|
||||
unsigned our_source = ~0;
|
||||
agx_foreach_ssa_src(collect, s) {
|
||||
if (agx_is_equiv(collect->src[s], idx)) {
|
||||
our_source = s;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert(our_source < collect->nr_srcs && "source must be in the collect");
|
||||
|
||||
/* See if we can allocate compatibly with any source of the collect */
|
||||
agx_foreach_ssa_src(collect, s) {
|
||||
if (!BITSET_TEST(rctx->visited, collect->src[s].value))
|
||||
continue;
|
||||
|
||||
/* Determine where the collect should start relative to the source */
|
||||
unsigned base = affinity_base_of_collect(rctx, collect, s);
|
||||
if (base >= rctx->bound)
|
||||
continue;
|
||||
|
||||
unsigned our_reg = base + (our_source * align);
|
||||
|
||||
/* Don't allocate past the end of the register file */
|
||||
if ((our_reg + align) > rctx->bound)
|
||||
continue;
|
||||
|
||||
/* If those registers are free, then choose them */
|
||||
if (!BITSET_TEST_RANGE(rctx->used_regs, our_reg, our_reg + align - 1))
|
||||
return our_reg;
|
||||
}
|
||||
}
|
||||
|
||||
/* Default to any contiguous sequence of registers */
|
||||
return find_regs(rctx->used_regs, count, align, rctx->bound);
|
||||
}
|
||||
|
||||
/** Assign registers to SSA values in a block. */
|
||||
|
||||
static void
|
||||
|
|
@ -201,11 +286,7 @@ agx_ra_assign_local(struct ra_ctx *rctx)
|
|||
* because of the SSA form.
|
||||
*/
|
||||
agx_foreach_ssa_dest(I, d) {
|
||||
unsigned count = agx_write_registers(I, d);
|
||||
unsigned align = agx_size_align_16(I->dest[d].size);
|
||||
|
||||
assign_regs(rctx, I->dest[d],
|
||||
find_regs(used_regs, count, align, rctx->bound));
|
||||
assign_regs(rctx, I->dest[d], pick_regs(rctx, I, d));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -297,9 +378,17 @@ agx_ra(agx_context *ctx)
|
|||
agx_compute_liveness(ctx);
|
||||
uint8_t *ssa_to_reg = calloc(ctx->alloc, sizeof(uint8_t));
|
||||
uint8_t *ncomps = calloc(ctx->alloc, sizeof(uint8_t));
|
||||
agx_instr **src_to_collect = calloc(ctx->alloc, sizeof(agx_instr *));
|
||||
BITSET_WORD *visited = calloc(BITSET_WORDS(ctx->alloc), sizeof(BITSET_WORD));
|
||||
|
||||
agx_foreach_instr_global(ctx, I) {
|
||||
/* Record collects so we can coalesce when assigning */
|
||||
if (I->op == AGX_OPCODE_COLLECT) {
|
||||
agx_foreach_ssa_src(I, s) {
|
||||
src_to_collect[I->src[s].value] = I;
|
||||
}
|
||||
}
|
||||
|
||||
agx_foreach_ssa_dest(I, d) {
|
||||
unsigned v = I->dest[d].value;
|
||||
assert(ncomps[v] == 0 && "broken SSA");
|
||||
|
|
@ -315,6 +404,7 @@ agx_ra(agx_context *ctx)
|
|||
.shader = ctx,
|
||||
.block = block,
|
||||
.ssa_to_reg = ssa_to_reg,
|
||||
.src_to_collect = src_to_collect,
|
||||
.ncomps = ncomps,
|
||||
.visited = visited,
|
||||
.bound = AGX_NUM_REGS
|
||||
|
|
@ -436,6 +526,7 @@ agx_ra(agx_context *ctx)
|
|||
}
|
||||
}
|
||||
|
||||
free(src_to_collect);
|
||||
free(ssa_to_reg);
|
||||
free(ncomps);
|
||||
free(visited);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue