From 5c1dea7ee492337c2de8dbb06f56f281d2388773 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olle=20L=C3=B6gdahl?= Date: Tue, 31 Mar 2026 16:47:12 +0200 Subject: [PATCH] aco/isel: move if_context and loop_context to heap if_context and loop_context are large structs and may cause stack overflows during CF traversal. This fix moves them to the heap. Part-of: --- .../aco_instruction_selection.h | 6 +++-- .../instruction_selection/aco_isel_cfg.cpp | 11 +++++---- .../instruction_selection/aco_select_nir.cpp | 24 +++++++++++-------- .../aco_select_nir_intrinsics.cpp | 8 +++---- .../aco_select_trap_handler.cpp | 12 +++++----- 5 files changed, 34 insertions(+), 27 deletions(-) diff --git a/src/amd/compiler/instruction_selection/aco_instruction_selection.h b/src/amd/compiler/instruction_selection/aco_instruction_selection.h index 18a90fd33a2..96e1bc310cd 100644 --- a/src/amd/compiler/instruction_selection/aco_instruction_selection.h +++ b/src/amd/compiler/instruction_selection/aco_instruction_selection.h @@ -98,8 +98,6 @@ struct exec_info { struct cf_context { struct { - unsigned header_idx = 0; - Block* exit = NULL; bool has_divergent_continue = false; bool has_divergent_break = false; } parent_loop; @@ -127,6 +125,7 @@ struct if_context { struct loop_context { Block loop_exit; + unsigned header_idx = 0; cf_context cf_info_old; }; @@ -147,6 +146,9 @@ struct isel_context { bool skipping_empty_exec = false; if_context empty_exec_skip; + std::vector if_stack; + std::vector loop_stack; + /* NIR range analysis. */ struct hash_table* range_ht; struct hash_table* numlsb_ht; diff --git a/src/amd/compiler/instruction_selection/aco_isel_cfg.cpp b/src/amd/compiler/instruction_selection/aco_isel_cfg.cpp index 6da46a38317..ba816d33f0d 100644 --- a/src/amd/compiler/instruction_selection/aco_isel_cfg.cpp +++ b/src/amd/compiler/instruction_selection/aco_isel_cfg.cpp @@ -41,7 +41,7 @@ emit_loop_jump(isel_context* ctx, bool is_break) unsigned idx = ctx->block->index; if (is_break) { - logical_target = ctx->cf_info.parent_loop.exit; + logical_target = &ctx->loop_stack.back().loop_exit; add_logical_edge(idx, logical_target); ctx->block->kind |= block_kind_break; @@ -60,7 +60,7 @@ emit_loop_jump(isel_context* ctx, bool is_break) if (!ctx->cf_info.exec.potentially_empty_break) ctx->cf_info.exec.potentially_empty_break = true; } else { - logical_target = &ctx->program->blocks[ctx->cf_info.parent_loop.header_idx]; + logical_target = &ctx->program->blocks[ctx->loop_stack.back().header_idx]; add_logical_edge(idx, logical_target); ctx->block->kind |= block_kind_continue; @@ -92,7 +92,7 @@ emit_loop_jump(isel_context* ctx, bool is_break) add_linear_edge(idx, break_block); /* the loop_header pointer might be invalidated by this point */ if (!is_break) - logical_target = &ctx->program->blocks[ctx->cf_info.parent_loop.header_idx]; + logical_target = &ctx->program->blocks[ctx->loop_stack.back().header_idx]; add_linear_edge(break_block->index, logical_target); bld.reset(break_block); bld.branch(aco_opcode::p_branch); @@ -136,7 +136,8 @@ begin_loop(isel_context* ctx, loop_context* lc) append_logical_start(ctx->block); lc->cf_info_old = ctx->cf_info; - ctx->cf_info.parent_loop = {loop_header->index, &lc->loop_exit, false}; + lc->header_idx = loop_header->index; + ctx->cf_info.parent_loop = {false, false}; ctx->cf_info.parent_if.is_divergent = false; /* Never enter a loop with empty exec mask. */ @@ -152,7 +153,7 @@ end_loop(isel_context* ctx, loop_context* lc) * divergent control flow requires WQM. */ assert(!ctx->cf_info.exec.potentially_empty_discard); - Block& header = ctx->program->blocks[ctx->cf_info.parent_loop.header_idx]; + Block& header = ctx->program->blocks[lc->header_idx]; /* Add the trivial continue. */ if (!ctx->cf_info.has_branch) { diff --git a/src/amd/compiler/instruction_selection/aco_select_nir.cpp b/src/amd/compiler/instruction_selection/aco_select_nir.cpp index b4589fc51e3..637e20482ec 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir.cpp @@ -941,15 +941,17 @@ void visit_loop(isel_context* ctx, nir_loop* loop) { assert(!nir_loop_has_continue_construct(loop)); - loop_context lc; - begin_loop(ctx, &lc); + + ctx->loop_stack.push_back(loop_context()); + begin_loop(ctx, &ctx->loop_stack.back()); ctx->cf_info.parent_loop.has_divergent_break = loop->divergent_break && nir_block_num_preds(nir_loop_first_block(loop)) > 1; ctx->cf_info.in_divergent_cf |= ctx->cf_info.parent_loop.has_divergent_break; visit_cf_list(ctx, &loop->body); - end_loop(ctx, &lc); + end_loop(ctx, &ctx->loop_stack.back()); + ctx->loop_stack.pop_back(); } void @@ -958,7 +960,7 @@ visit_if(isel_context* ctx, nir_if* if_stmt) Temp cond = get_ssa_temp(ctx, if_stmt->condition.ssa); Builder bld(ctx->program, ctx->block); aco_ptr branch; - if_context ic; + ctx->if_stack.push_back(if_context()); if (!nir_src_is_divergent(&if_stmt->condition)) { /* uniform condition */ /** @@ -980,13 +982,13 @@ visit_if(isel_context* ctx, nir_if* if_stmt) assert(cond.regClass() == ctx->program->lane_mask); cond = bool_to_scalar_condition(ctx, cond); - begin_uniform_if_then(ctx, &ic, cond); + begin_uniform_if_then(ctx, &ctx->if_stack.back(), cond); visit_cf_list(ctx, &if_stmt->then_list); - begin_uniform_if_else(ctx, &ic); + begin_uniform_if_else(ctx, &ctx->if_stack.back()); visit_cf_list(ctx, &if_stmt->else_list); - end_uniform_if(ctx, &ic); + end_uniform_if(ctx, &ctx->if_stack.back()); } else { /* non-uniform condition */ /** * To maintain a logical and linear CFG without critical edges, @@ -1023,14 +1025,16 @@ visit_if(isel_context* ctx, nir_if* if_stmt) * BB_ENDIF **/ - begin_divergent_if_then(ctx, &ic, cond, if_stmt->control); + begin_divergent_if_then(ctx, &ctx->if_stack.back(), cond, if_stmt->control); visit_cf_list(ctx, &if_stmt->then_list); - begin_divergent_if_else(ctx, &ic, if_stmt->control); + begin_divergent_if_else(ctx, &ctx->if_stack.back(), if_stmt->control); visit_cf_list(ctx, &if_stmt->else_list); - end_divergent_if(ctx, &ic); + end_divergent_if(ctx, &ctx->if_stack.back()); } + + ctx->if_stack.pop_back(); } void diff --git a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp index 80aee79c4d3..49840821d46 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp @@ -3773,8 +3773,8 @@ pops_await_overlapped_waves(isel_context* ctx) /* Await the overlapped waves. */ - loop_context wait_loop_context; - begin_loop(ctx, &wait_loop_context); + ctx->loop_stack.push_back(loop_context()); + begin_loop(ctx, &ctx->loop_stack.back()); bld.reset(ctx->block); const Temp exiting_wave_id = bld.pseudo(aco_opcode::p_pops_gfx9_add_exiting_wave_id, bld.def(s1), @@ -3795,7 +3795,7 @@ pops_await_overlapped_waves(isel_context* ctx) /* Sleep before rechecking to let overlapped waves run for some time. */ bld.sopp(aco_opcode::s_sleep, ctx->program->gfx_level >= GFX10 ? UINT16_MAX : 3); - end_loop(ctx, &wait_loop_context); + end_loop(ctx, &ctx->loop_stack.back()); bld.reset(ctx->block); /* Indicate the wait has been done to subsequent compilation stages. */ @@ -4648,7 +4648,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) } case nir_intrinsic_terminate: case nir_intrinsic_terminate_if: { - assert(ctx->cf_info.parent_loop.exit == NULL && "Terminate must not appear in loops."); + assert(ctx->loop_stack.empty() && "Terminate must not appear in loops."); Operand cond = Operand::c32(-1u); if (instr->intrinsic == nir_intrinsic_terminate_if) { Temp src = get_ssa_temp(ctx, instr->src[0].ssa); diff --git a/src/amd/compiler/instruction_selection/aco_select_trap_handler.cpp b/src/amd/compiler/instruction_selection/aco_select_trap_handler.cpp index 2940652f093..d93dca99d5c 100644 --- a/src/amd/compiler/instruction_selection/aco_select_trap_handler.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_trap_handler.cpp @@ -143,8 +143,8 @@ dump_vgprs_to_mem(isel_context* ctx, Builder& bld, Operand rsrc) Operand(PhysReg{1}, s1) /* SRC0 mode */); } - loop_context lc; - begin_loop(ctx, &lc); + ctx->loop_stack.push_back(loop_context()); + begin_loop(ctx, &ctx->loop_stack.back()); { bld.reset(ctx->block); @@ -177,7 +177,7 @@ dump_vgprs_to_mem(isel_context* ctx, Builder& bld, Operand rsrc) } end_uniform_if(ctx, &loop_break); } - end_loop(ctx, &lc); + end_loop(ctx, &ctx->loop_stack.back()); bld.reset(ctx->block); if (ctx->program->gfx_level < GFX10) { @@ -240,8 +240,8 @@ dump_lds_to_mem(isel_context* ctx, Builder& bld, Operand rsrc) Operand m = load_lds_size_m0(bld); - loop_context lc; - begin_loop(ctx, &lc); + ctx->loop_stack.push_back(loop_context()); + begin_loop(ctx, &ctx->loop_stack.back()); { bld.reset(ctx->block); @@ -274,7 +274,7 @@ dump_lds_to_mem(isel_context* ctx, Builder& bld, Operand rsrc) } end_uniform_if(ctx, &loop_break); } - end_loop(ctx, &lc); + end_loop(ctx, &ctx->loop_stack.back()); bld.reset(ctx->block); } begin_uniform_if_else(ctx, &ic);