aco/spill: select more loop-carried variables to be spilled

This changes the heuristic to also spill live-through variables
if they are used within the loop if the register pressure is too high.

Totals from 68 (0.09% of 79395) affected shaders: (GFX11)

Instrs: 3385477 -> 3386659 (+0.03%); split: -0.11%, +0.14%
CodeSize: 17329668 -> 17353796 (+0.14%); split: -0.12%, +0.26%
SpillSGPRs: 4246 -> 3659 (-13.82%); split: -17.12%, +3.30%
SpillVGPRs: 978 -> 884 (-9.61%); split: -10.02%, +0.41%
Scratch: 58880 -> 59648 (+1.30%)
Latency: 20567445 -> 20753412 (+0.90%); split: -0.08%, +0.98%
InvThroughput: 5091128 -> 5188098 (+1.90%); split: -0.07%, +1.97%
VClause: 90446 -> 90499 (+0.06%); split: -0.05%, +0.11%
SClause: 68270 -> 68072 (-0.29%); split: -0.35%, +0.06%
Copies: 280689 -> 278003 (-0.96%); split: -1.08%, +0.12%
Branches: 112577 -> 112608 (+0.03%); split: -0.10%, +0.13%
VALU: 1863415 -> 1866878 (+0.19%); split: -0.07%, +0.26%
SALU: 415572 -> 413062 (-0.60%); split: -0.63%, +0.03%
VMEM: 140890 -> 141045 (+0.11%); split: -0.04%, +0.15%
VOPD: 650 -> 670 (+3.08%); split: +3.54%, -0.46%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27774>
This commit is contained in:
Daniel Schürmann 2024-02-21 14:57:36 +01:00 committed by Marge Bot
parent 31f6aebb2c
commit 80bc658efc

View file

@ -519,7 +519,7 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx)
const auto& next_use_distances = ctx.next_use_distances_start[block_idx];
/* loop header block */
if (block->loop_nest_depth > ctx.program->blocks[block_idx - 1].loop_nest_depth) {
if (block->kind & block_kind_loop_header) {
assert(block->linear_preds[0] == block_idx - 1);
assert(block->logical_preds[0] == block_idx - 1);
@ -527,11 +527,8 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx)
RegisterDemand reg_pressure = get_live_in_demand(ctx, block_idx);
RegisterDemand loop_demand = reg_pressure;
unsigned i = block_idx;
while (ctx.program->blocks[i].loop_nest_depth >= block->loop_nest_depth) {
assert(ctx.program->blocks.size() > i);
while (ctx.program->blocks[i].loop_nest_depth >= block->loop_nest_depth)
loop_demand.update(ctx.program->blocks[i++].register_demand);
}
unsigned loop_end = i;
for (auto spilled : ctx.spills_exit[block_idx - 1]) {
auto it = next_use_distances.find(spilled.first);
@ -557,15 +554,17 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx)
break;
unsigned distance = 0;
unsigned remat = 0;
Temp to_spill;
for (const std::pair<const Temp, std::pair<uint32_t, uint32_t>>& pair :
next_use_distances) {
if (pair.first.type() == type &&
(pair.second.first >= loop_end ||
(ctx.remat.count(pair.first) && type == RegType::sgpr)) &&
pair.second.second > distance && !ctx.spills_entry[block_idx].count(pair.first)) {
unsigned can_remat = ctx.remat.count(pair.first);
if (pair.first.type() == type && !ctx.spills_entry[block_idx].count(pair.first) &&
ctx.next_use_distances_end[block_idx - 1].count(pair.first) &&
(can_remat > remat || (can_remat == remat && pair.second.second > distance))) {
to_spill = pair.first;
distance = pair.second.second;
remat = can_remat;
}
}
@ -577,12 +576,7 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx)
continue;
}
if (!ctx.spills_exit[block_idx - 1].count(to_spill)) {
ctx.add_to_spills(to_spill, ctx.spills_entry[block_idx]);
} else {
ctx.spills_entry[block_idx][to_spill] = ctx.spills_exit[block_idx - 1][to_spill];
}
ctx.add_to_spills(to_spill, ctx.spills_entry[block_idx]);
spilled_registers += to_spill;
loop_demand -= to_spill;
}