mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 02:48:06 +02:00
agx: early-kill sources only if it won't shuffle
rather than always early killing and then hitting pathological shuffle
situations, only early-kill when we can prove that we won't need to shuffle. it
turns out that's most of the time.
even with this heuristic, we still get hurt bad in shader-db due to extra moves.
but hopefully, the #s here are small enough that we can move on with our lives
and fix this source of known unsoundness.
this is tagged for backport as it's needed to avoid a perf regression with the
previous patch.
combined stats from this commit and the previous commit:
total instrs in shared programs: 2846065 -> 2852257 (0.22%)
instrs in affected programs: 618734 -> 624926 (1.00%)
total alu in shared programs: 2329477 -> 2335534 (0.26%)
alu in affected programs: 508119 -> 514176 (1.19%)
total gprs in shared programs: 894762 -> 901327 (0.73%)
gprs in affected programs: 36946 -> 43511 (17.77%)
Backport-to: 25.1
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34595>
(cherry picked from commit b1e86b3eae)
This commit is contained in:
parent
27b46ecfb8
commit
2099c23dab
2 changed files with 75 additions and 11 deletions
|
|
@ -844,7 +844,7 @@
|
||||||
"description": "agx: early-kill sources only if it won't shuffle",
|
"description": "agx: early-kill sources only if it won't shuffle",
|
||||||
"nominated": true,
|
"nominated": true,
|
||||||
"nomination_type": 4,
|
"nomination_type": 4,
|
||||||
"resolution": 0,
|
"resolution": 1,
|
||||||
"main_sha": null,
|
"main_sha": null,
|
||||||
"because_sha": null,
|
"because_sha": null,
|
||||||
"notes": null
|
"notes": null
|
||||||
|
|
|
||||||
|
|
@ -89,6 +89,11 @@ struct ra_ctx {
|
||||||
BITSET_WORD *visited;
|
BITSET_WORD *visited;
|
||||||
BITSET_WORD *used_regs[RA_CLASSES];
|
BITSET_WORD *used_regs[RA_CLASSES];
|
||||||
|
|
||||||
|
/* Were any sources killed early this instruction? We assert this is not true
|
||||||
|
* when shuffling.
|
||||||
|
*/
|
||||||
|
bool early_killed;
|
||||||
|
|
||||||
/* Maintained while assigning registers. Count of registers required, i.e.
|
/* Maintained while assigning registers. Count of registers required, i.e.
|
||||||
* the maximum register assigned + 1.
|
* the maximum register assigned + 1.
|
||||||
*/
|
*/
|
||||||
|
|
@ -604,6 +609,7 @@ find_regs(struct ra_ctx *rctx, agx_instr *I, unsigned dest_idx, unsigned count,
|
||||||
if (find_regs_simple(rctx, cls, count, align, ®)) {
|
if (find_regs_simple(rctx, cls, count, align, ®)) {
|
||||||
return reg;
|
return reg;
|
||||||
} else {
|
} else {
|
||||||
|
assert(!rctx->early_killed && "no live range splits with early kill");
|
||||||
assert(cls == RA_GPR && "no memory live range splits");
|
assert(cls == RA_GPR && "no memory live range splits");
|
||||||
|
|
||||||
struct util_dynarray copies = {0};
|
struct util_dynarray copies = {0};
|
||||||
|
|
@ -998,6 +1004,47 @@ pick_regs(struct ra_ctx *rctx, agx_instr *I, unsigned d)
|
||||||
return find_regs(rctx, I, d, count, align);
|
return find_regs(rctx, I, d, count, align);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
kill_source(struct ra_ctx *rctx, const agx_instr *I, unsigned s)
|
||||||
|
{
|
||||||
|
enum ra_class cls = ra_class_for_index(I->src[s]);
|
||||||
|
unsigned reg = rctx->ssa_to_reg[I->src[s].value];
|
||||||
|
unsigned count = rctx->ncomps[I->src[s].value];
|
||||||
|
|
||||||
|
assert(I->op != AGX_OPCODE_PHI && "phis don't use .kill");
|
||||||
|
assert(count >= 1);
|
||||||
|
|
||||||
|
BITSET_CLEAR_RANGE(rctx->used_regs[cls], reg, reg + count - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
try_kill_early_sources(struct ra_ctx *rctx, const agx_instr *I,
|
||||||
|
unsigned first_source, unsigned last_source,
|
||||||
|
unsigned region_end, unsigned region_base)
|
||||||
|
{
|
||||||
|
unsigned dest_size = util_next_power_of_two(rctx->ncomps[I->dest[0].value]);
|
||||||
|
unsigned dest_end = region_base + dest_size;
|
||||||
|
|
||||||
|
/* We can only early-kill a region if we can trivially allocate the
|
||||||
|
* destination to it. That way we never shuffle killed sources.
|
||||||
|
*
|
||||||
|
* To ensure that, the region must be aligned and cover the destination.
|
||||||
|
*/
|
||||||
|
if (region_base == region_end ||
|
||||||
|
(rctx->ssa_to_reg[I->src[first_source].value] & (dest_size - 1)) ||
|
||||||
|
((region_end < dest_end) &&
|
||||||
|
BITSET_TEST_RANGE(rctx->used_regs[RA_GPR], region_end, dest_end)))
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (unsigned s = first_source; s <= last_source; ++s) {
|
||||||
|
if (I->src[s].kill && !I->src[s].memory) {
|
||||||
|
kill_source(rctx, I, s);
|
||||||
|
rctx->early_killed = true;
|
||||||
|
I->src[s].kill = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** Assign registers to SSA values in a block. */
|
/** Assign registers to SSA values in a block. */
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -1008,7 +1055,6 @@ agx_ra_assign_local(struct ra_ctx *rctx)
|
||||||
uint16_t *ssa_to_reg = calloc(rctx->shader->alloc, sizeof(uint16_t));
|
uint16_t *ssa_to_reg = calloc(rctx->shader->alloc, sizeof(uint16_t));
|
||||||
|
|
||||||
agx_block *block = rctx->block;
|
agx_block *block = rctx->block;
|
||||||
uint8_t *ncomps = rctx->ncomps;
|
|
||||||
rctx->used_regs[RA_GPR] = used_regs_gpr;
|
rctx->used_regs[RA_GPR] = used_regs_gpr;
|
||||||
rctx->used_regs[RA_MEM] = used_regs_mem;
|
rctx->used_regs[RA_MEM] = used_regs_mem;
|
||||||
rctx->ssa_to_reg = ssa_to_reg;
|
rctx->ssa_to_reg = ssa_to_reg;
|
||||||
|
|
@ -1085,6 +1131,31 @@ agx_ra_assign_local(struct ra_ctx *rctx)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Search for regions of contiguous killed sources to early-kill. */
|
||||||
|
rctx->early_killed = false;
|
||||||
|
|
||||||
|
if (I->nr_dests == 1) {
|
||||||
|
unsigned first_src = 0;
|
||||||
|
unsigned end = 0;
|
||||||
|
unsigned start = 0;
|
||||||
|
|
||||||
|
agx_foreach_ssa_src(I, s) {
|
||||||
|
if (I->src[s].kill && !I->src[s].memory) {
|
||||||
|
unsigned reg = rctx->ssa_to_reg[I->src[s].value];
|
||||||
|
|
||||||
|
if (start == end || end != reg) {
|
||||||
|
try_kill_early_sources(rctx, I, first_src, s, end, start);
|
||||||
|
first_src = s;
|
||||||
|
start = reg;
|
||||||
|
}
|
||||||
|
|
||||||
|
end = reg + rctx->ncomps[I->src[s].value];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try_kill_early_sources(rctx, I, first_src, I->nr_srcs - 1, end, start);
|
||||||
|
}
|
||||||
|
|
||||||
/* Next, assign destinations one at a time. This is always legal
|
/* Next, assign destinations one at a time. This is always legal
|
||||||
* because of the SSA form.
|
* because of the SSA form.
|
||||||
*/
|
*/
|
||||||
|
|
@ -1095,17 +1166,10 @@ agx_ra_assign_local(struct ra_ctx *rctx)
|
||||||
assign_regs(rctx, I->dest[d], pick_regs(rctx, I, d));
|
assign_regs(rctx, I->dest[d], pick_regs(rctx, I, d));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Free killed sources */
|
/* Free late-killed sources */
|
||||||
agx_foreach_ssa_src(I, s) {
|
agx_foreach_ssa_src(I, s) {
|
||||||
if (I->src[s].kill) {
|
if (I->src[s].kill) {
|
||||||
assert(I->op != AGX_OPCODE_PHI && "phis don't use .kill");
|
kill_source(rctx, I, s);
|
||||||
|
|
||||||
enum ra_class cls = ra_class_for_index(I->src[s]);
|
|
||||||
unsigned reg = ssa_to_reg[I->src[s].value];
|
|
||||||
unsigned count = ncomps[I->src[s].value];
|
|
||||||
|
|
||||||
assert(count >= 1);
|
|
||||||
BITSET_CLEAR_RANGE(rctx->used_regs[cls], reg, reg + count - 1);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue