From c5c02d0571a9e3d8ea978bf536ec710b25754376 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 17 Oct 2023 01:40:25 -0700 Subject: [PATCH 1/2] intel/elk: Move code to enable spilling to choose_spill_reg() The first time we spill a register, we may need to discard and rebuild the interference graph with some registers reserved, as we unfortunately have to use registers to send messages to spill registers. We also need to set spill costs. It makes sense to do both of these tasks in choose_spill_reg(), rather than open coding it in the middle of the spill/retry-allocation loop. We also introduce a new boolean for whether the current interference graph supports spilling, in order to simplify some logic. Cc: mesa-stable Fixes: e99081e76d4 ("intel/fs/ra: Spill without destroying the interference graph") --- .../compiler/elk/elk_fs_reg_allocate.cpp | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/intel/compiler/elk/elk_fs_reg_allocate.cpp b/src/intel/compiler/elk/elk_fs_reg_allocate.cpp index 229a0853cfe..ca31dbc43f8 100644 --- a/src/intel/compiler/elk/elk_fs_reg_allocate.cpp +++ b/src/intel/compiler/elk/elk_fs_reg_allocate.cpp @@ -387,6 +387,7 @@ private: int rsi; ra_graph *g; + bool interference_graph_supports_spilling; bool have_spill_costs; int payload_node_count; @@ -629,6 +630,8 @@ elk_fs_reg_alloc::setup_inst_interference(const elk_fs_inst *inst) void elk_fs_reg_alloc::build_interference_graph(bool allow_spilling) { + interference_graph_supports_spilling = allow_spilling; + /* Compute the RA node layout */ node_count = 0; first_payload_node = node_count; @@ -925,6 +928,15 @@ elk_fs_reg_alloc::set_spill_costs() int elk_fs_reg_alloc::choose_spill_reg() { + /* If we're going to spill but we've never spilled before, we need + * to re-build the interference graph with MRFs enabled to allow + * spilling. + */ + if (!interference_graph_supports_spilling) { + discard_interference_graph(); + build_interference_graph(true); + } + if (!have_spill_costs) set_spill_costs(); @@ -1158,15 +1170,6 @@ elk_fs_reg_alloc::assign_regs(bool allow_spilling, bool spill_all) break; } - /* If we're going to spill but we've never spilled before, we need - * to re-build the interference graph with MRFs enabled to allow - * spilling. - */ - if (!fs->spilled_any_registers) { - discard_interference_graph(); - build_interference_graph(true); - } - spill_reg(reg); spilled++; } From a8fafc0f32023c076be9720d6a231ccfb1415a0f Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sat, 9 Sep 2023 03:25:35 -0700 Subject: [PATCH 2/2] intel/elk: Re-run register allocation once after recreating the graph Our backend does a somewhat unusual sequence: 1. Set up the interference graph 2. Try to register allocate 3. Fail and realize we have to spill 4. Recreate(!) the interference graph with different node counts, because unfortunately spills and fills may need temporary registers set aside for that purpose, which can no longer be used generally. 5. Ask for the best spill node because we know we must spill On step 4, ra_realloc_interference_graph() reallocs the in_stack bitset for the new nodes. However, it leaves the new bitset words uninitialized, because it's supposed to be set up by ra_select(). On step 5, however, the Intel backend calls ra_get_best_spill_node() _without_ first calling ra_select() (or ra_allocate()). So at that point, the in_stack bitset is not properly initialized, and we'll end up reading uninitialized garbage in ra_get_best_spill_node(), and non-deterministically end up skipping candidates for spilling. While debugging this, I observed ra_get_best_spill_node() seeing non-zero in_stack bits set while g->tmp.stack_count was 0. So no nodes could possibly be in the stack. We could simply initialize the memory, but there's a deeper problem: in Chaitin-Briggs allocators, the list of spill candidates is built in the "Select" step. In our implementation, we technically don't make a list of candidates, but rather flag registers that *aren't* candidates. By never running ra_allocate() on our new graph, we never produce that info. So when we ask for a spill node, we consider *all* registers as spill candidates, which is far from ideal. To fix this, we simply call ra_allocate() to rebuild that information on the new graph. It's worth noting that it may not be quite the same as the information we had for our old graph, too, as we reserved some registers, increasing interference. This escaped our notice for a long time because our allocation loop tries to spill a single register, tries to allocate, and repeats if it fails. Because retrying calls ra_select(), which initializes the spill candidate info, this non-determinism only happened for the first register selected. However, recently the backend gained support for spilling multiple registers in each loop step, which highlighted this problem, as different per-step-spill-sizes produced different results due to this non-determinism. Cc: mesa-stable Fixes: e99081e76d4 ("intel/fs/ra: Spill without destroying the interference graph") --- src/intel/compiler/elk/elk_fs_reg_allocate.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/intel/compiler/elk/elk_fs_reg_allocate.cpp b/src/intel/compiler/elk/elk_fs_reg_allocate.cpp index ca31dbc43f8..17bfdaf4792 100644 --- a/src/intel/compiler/elk/elk_fs_reg_allocate.cpp +++ b/src/intel/compiler/elk/elk_fs_reg_allocate.cpp @@ -935,6 +935,19 @@ elk_fs_reg_alloc::choose_spill_reg() if (!interference_graph_supports_spilling) { discard_interference_graph(); build_interference_graph(true); + + /* ra_get_best_spill_node() relies on ra_allocate() having been called + * once to set up the stack of trivially colorable and optimistically + * colored nodes. By torching and rebuilding our interference graph, + * we also discarded the information needed to pick spill candidates. + * + * The simplest (if expensive) solution is to call ra_allocate() again + * on the new graph. This can't succeed - allocation already failed on + * our old graph which had fewer constraints - but it creates the list + * of spill candidates for our new more constrained graph. + */ + ASSERTED bool allocated = ra_allocate(g); + assert(!allocated); } if (!have_spill_costs)