From 80bc658efcbcbb071ef95fd099e4d5dda3193913 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 21 Feb 2024 14:57:36 +0100 Subject: [PATCH] aco/spill: select more loop-carried variables to be spilled This changes the heuristic to also spill live-through variables if they are used within the loop if the register pressure is too high. Totals from 68 (0.09% of 79395) affected shaders: (GFX11) Instrs: 3385477 -> 3386659 (+0.03%); split: -0.11%, +0.14% CodeSize: 17329668 -> 17353796 (+0.14%); split: -0.12%, +0.26% SpillSGPRs: 4246 -> 3659 (-13.82%); split: -17.12%, +3.30% SpillVGPRs: 978 -> 884 (-9.61%); split: -10.02%, +0.41% Scratch: 58880 -> 59648 (+1.30%) Latency: 20567445 -> 20753412 (+0.90%); split: -0.08%, +0.98% InvThroughput: 5091128 -> 5188098 (+1.90%); split: -0.07%, +1.97% VClause: 90446 -> 90499 (+0.06%); split: -0.05%, +0.11% SClause: 68270 -> 68072 (-0.29%); split: -0.35%, +0.06% Copies: 280689 -> 278003 (-0.96%); split: -1.08%, +0.12% Branches: 112577 -> 112608 (+0.03%); split: -0.10%, +0.13% VALU: 1863415 -> 1866878 (+0.19%); split: -0.07%, +0.26% SALU: 415572 -> 413062 (-0.60%); split: -0.63%, +0.03% VMEM: 140890 -> 141045 (+0.11%); split: -0.04%, +0.15% VOPD: 650 -> 670 (+3.08%); split: +3.54%, -0.46% Part-of: --- src/amd/compiler/aco_spill.cpp | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp index 9bfc702c399..dfcedaf6520 100644 --- a/src/amd/compiler/aco_spill.cpp +++ b/src/amd/compiler/aco_spill.cpp @@ -519,7 +519,7 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx) const auto& next_use_distances = ctx.next_use_distances_start[block_idx]; /* loop header block */ - if (block->loop_nest_depth > ctx.program->blocks[block_idx - 1].loop_nest_depth) { + if (block->kind & block_kind_loop_header) { assert(block->linear_preds[0] == block_idx - 1); assert(block->logical_preds[0] == block_idx - 1); @@ -527,11 +527,8 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx) RegisterDemand reg_pressure = get_live_in_demand(ctx, block_idx); RegisterDemand loop_demand = reg_pressure; unsigned i = block_idx; - while (ctx.program->blocks[i].loop_nest_depth >= block->loop_nest_depth) { - assert(ctx.program->blocks.size() > i); + while (ctx.program->blocks[i].loop_nest_depth >= block->loop_nest_depth) loop_demand.update(ctx.program->blocks[i++].register_demand); - } - unsigned loop_end = i; for (auto spilled : ctx.spills_exit[block_idx - 1]) { auto it = next_use_distances.find(spilled.first); @@ -557,15 +554,17 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx) break; unsigned distance = 0; + unsigned remat = 0; Temp to_spill; for (const std::pair>& pair : next_use_distances) { - if (pair.first.type() == type && - (pair.second.first >= loop_end || - (ctx.remat.count(pair.first) && type == RegType::sgpr)) && - pair.second.second > distance && !ctx.spills_entry[block_idx].count(pair.first)) { + unsigned can_remat = ctx.remat.count(pair.first); + if (pair.first.type() == type && !ctx.spills_entry[block_idx].count(pair.first) && + ctx.next_use_distances_end[block_idx - 1].count(pair.first) && + (can_remat > remat || (can_remat == remat && pair.second.second > distance))) { to_spill = pair.first; distance = pair.second.second; + remat = can_remat; } } @@ -577,12 +576,7 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx) continue; } - if (!ctx.spills_exit[block_idx - 1].count(to_spill)) { - ctx.add_to_spills(to_spill, ctx.spills_entry[block_idx]); - } else { - ctx.spills_entry[block_idx][to_spill] = ctx.spills_exit[block_idx - 1][to_spill]; - } - + ctx.add_to_spills(to_spill, ctx.spills_entry[block_idx]); spilled_registers += to_spill; loop_demand -= to_spill; }