aco/spill: Restore registers spilled by call immediately

Makes for better latency hiding if we're not short on registers
otherwise.

On top of RT function calls:
Totals from 7 (0.01% of 81072) affected shaders:

Instrs: 9084 -> 8980 (-1.14%)
CodeSize: 52564 -> 51976 (-1.12%)
SpillSGPRs: 244 -> 248 (+1.64%); split: -3.28%, +4.92%
SpillVGPRs: 360 -> 367 (+1.94%)
Latency: 138989 -> 135669 (-2.39%); split: -2.49%, +0.10%
InvThroughput: 35120 -> 35301 (+0.52%); split: -0.06%, +0.57%
VClause: 258 -> 241 (-6.59%)
SClause: 116 -> 117 (+0.86%)
Copies: 1290 -> 1311 (+1.63%)
Branches: 131 -> 119 (-9.16%)
VALU: 6125 -> 6143 (+0.29%); split: -0.20%, +0.49%
SALU: 920 -> 913 (-0.76%); split: -0.98%, +0.22%
VMEM: 1026 -> 989 (-3.61%)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38281>
This commit is contained in:
Natalie Vock 2025-02-17 18:42:49 +01:00 committed by Marge Bot
parent 6616f25e43
commit 7059068b61

View file

@ -907,6 +907,8 @@ process_block(spill_ctx& ctx, unsigned block_idx, Block* block, RegisterDemand s
auto& current_spills = ctx.spills_exit[block_idx];
std::vector<Temp> call_spills;
while (idx < block->instructions.size()) {
aco_ptr<Instruction>& instr = block->instructions[idx];
@ -921,6 +923,22 @@ process_block(spill_ctx& ctx, unsigned block_idx, Block* block, RegisterDemand s
std::map<Temp, std::pair<Temp, uint32_t>> reloads;
if (!call_spills.empty()) {
RegisterDemand demand = instr->register_demand;
while (!call_spills.empty() &&
!(demand - spilled_registers + call_spills.back()).exceeds(ctx.target_pressure)) {
Temp old_tmp = call_spills.back();
call_spills.pop_back();
Temp new_tmp = ctx.program->allocateTmp(old_tmp.regClass());
ctx.renames[block_idx][old_tmp] = new_tmp;
reloads[old_tmp] = std::make_pair(new_tmp, current_spills[old_tmp]);
current_spills.erase(old_tmp);
spilled_registers -= new_tmp;
}
call_spills.clear();
}
/* rename and reload operands */
for (Operand& op : instr->operands) {
if (!op.isTemp())
@ -1076,6 +1094,9 @@ process_block(spill_ctx& ctx, unsigned block_idx, Block* block, RegisterDemand s
}
uint32_t spill_id = ctx.add_to_spills(to_spill, current_spills);
if (instr->isCall())
call_spills.emplace_back(to_spill);
/* add interferences with reloads */
for (std::pair<const Temp, std::pair<Temp, uint32_t>>& pair : reloads)
ctx.add_interference(spill_id, pair.second.second);