aco/isel: move if_context and loop_context to heap

if_context and loop_context are large structs and may cause
stack overflows during CF traversal. This fix moves them to
the heap.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40364>
This commit is contained in:
Olle Lögdahl 2026-03-31 16:47:12 +02:00 committed by Marge Bot
parent 909e0026d8
commit 5c1dea7ee4
5 changed files with 34 additions and 27 deletions

View file

@ -98,8 +98,6 @@ struct exec_info {
struct cf_context {
struct {
unsigned header_idx = 0;
Block* exit = NULL;
bool has_divergent_continue = false;
bool has_divergent_break = false;
} parent_loop;
@ -127,6 +125,7 @@ struct if_context {
struct loop_context {
Block loop_exit;
unsigned header_idx = 0;
cf_context cf_info_old;
};
@ -147,6 +146,9 @@ struct isel_context {
bool skipping_empty_exec = false;
if_context empty_exec_skip;
std::vector<if_context> if_stack;
std::vector<loop_context> loop_stack;
/* NIR range analysis. */
struct hash_table* range_ht;
struct hash_table* numlsb_ht;

View file

@ -41,7 +41,7 @@ emit_loop_jump(isel_context* ctx, bool is_break)
unsigned idx = ctx->block->index;
if (is_break) {
logical_target = ctx->cf_info.parent_loop.exit;
logical_target = &ctx->loop_stack.back().loop_exit;
add_logical_edge(idx, logical_target);
ctx->block->kind |= block_kind_break;
@ -60,7 +60,7 @@ emit_loop_jump(isel_context* ctx, bool is_break)
if (!ctx->cf_info.exec.potentially_empty_break)
ctx->cf_info.exec.potentially_empty_break = true;
} else {
logical_target = &ctx->program->blocks[ctx->cf_info.parent_loop.header_idx];
logical_target = &ctx->program->blocks[ctx->loop_stack.back().header_idx];
add_logical_edge(idx, logical_target);
ctx->block->kind |= block_kind_continue;
@ -92,7 +92,7 @@ emit_loop_jump(isel_context* ctx, bool is_break)
add_linear_edge(idx, break_block);
/* the loop_header pointer might be invalidated by this point */
if (!is_break)
logical_target = &ctx->program->blocks[ctx->cf_info.parent_loop.header_idx];
logical_target = &ctx->program->blocks[ctx->loop_stack.back().header_idx];
add_linear_edge(break_block->index, logical_target);
bld.reset(break_block);
bld.branch(aco_opcode::p_branch);
@ -136,7 +136,8 @@ begin_loop(isel_context* ctx, loop_context* lc)
append_logical_start(ctx->block);
lc->cf_info_old = ctx->cf_info;
ctx->cf_info.parent_loop = {loop_header->index, &lc->loop_exit, false};
lc->header_idx = loop_header->index;
ctx->cf_info.parent_loop = {false, false};
ctx->cf_info.parent_if.is_divergent = false;
/* Never enter a loop with empty exec mask. */
@ -152,7 +153,7 @@ end_loop(isel_context* ctx, loop_context* lc)
* divergent control flow requires WQM.
*/
assert(!ctx->cf_info.exec.potentially_empty_discard);
Block& header = ctx->program->blocks[ctx->cf_info.parent_loop.header_idx];
Block& header = ctx->program->blocks[lc->header_idx];
/* Add the trivial continue. */
if (!ctx->cf_info.has_branch) {

View file

@ -941,15 +941,17 @@ void
visit_loop(isel_context* ctx, nir_loop* loop)
{
assert(!nir_loop_has_continue_construct(loop));
loop_context lc;
begin_loop(ctx, &lc);
ctx->loop_stack.push_back(loop_context());
begin_loop(ctx, &ctx->loop_stack.back());
ctx->cf_info.parent_loop.has_divergent_break =
loop->divergent_break && nir_block_num_preds(nir_loop_first_block(loop)) > 1;
ctx->cf_info.in_divergent_cf |= ctx->cf_info.parent_loop.has_divergent_break;
visit_cf_list(ctx, &loop->body);
end_loop(ctx, &lc);
end_loop(ctx, &ctx->loop_stack.back());
ctx->loop_stack.pop_back();
}
void
@ -958,7 +960,7 @@ visit_if(isel_context* ctx, nir_if* if_stmt)
Temp cond = get_ssa_temp(ctx, if_stmt->condition.ssa);
Builder bld(ctx->program, ctx->block);
aco_ptr<Instruction> branch;
if_context ic;
ctx->if_stack.push_back(if_context());
if (!nir_src_is_divergent(&if_stmt->condition)) { /* uniform condition */
/**
@ -980,13 +982,13 @@ visit_if(isel_context* ctx, nir_if* if_stmt)
assert(cond.regClass() == ctx->program->lane_mask);
cond = bool_to_scalar_condition(ctx, cond);
begin_uniform_if_then(ctx, &ic, cond);
begin_uniform_if_then(ctx, &ctx->if_stack.back(), cond);
visit_cf_list(ctx, &if_stmt->then_list);
begin_uniform_if_else(ctx, &ic);
begin_uniform_if_else(ctx, &ctx->if_stack.back());
visit_cf_list(ctx, &if_stmt->else_list);
end_uniform_if(ctx, &ic);
end_uniform_if(ctx, &ctx->if_stack.back());
} else { /* non-uniform condition */
/**
* To maintain a logical and linear CFG without critical edges,
@ -1023,14 +1025,16 @@ visit_if(isel_context* ctx, nir_if* if_stmt)
* BB_ENDIF
**/
begin_divergent_if_then(ctx, &ic, cond, if_stmt->control);
begin_divergent_if_then(ctx, &ctx->if_stack.back(), cond, if_stmt->control);
visit_cf_list(ctx, &if_stmt->then_list);
begin_divergent_if_else(ctx, &ic, if_stmt->control);
begin_divergent_if_else(ctx, &ctx->if_stack.back(), if_stmt->control);
visit_cf_list(ctx, &if_stmt->else_list);
end_divergent_if(ctx, &ic);
end_divergent_if(ctx, &ctx->if_stack.back());
}
ctx->if_stack.pop_back();
}
void

View file

@ -3773,8 +3773,8 @@ pops_await_overlapped_waves(isel_context* ctx)
/* Await the overlapped waves. */
loop_context wait_loop_context;
begin_loop(ctx, &wait_loop_context);
ctx->loop_stack.push_back(loop_context());
begin_loop(ctx, &ctx->loop_stack.back());
bld.reset(ctx->block);
const Temp exiting_wave_id = bld.pseudo(aco_opcode::p_pops_gfx9_add_exiting_wave_id, bld.def(s1),
@ -3795,7 +3795,7 @@ pops_await_overlapped_waves(isel_context* ctx)
/* Sleep before rechecking to let overlapped waves run for some time. */
bld.sopp(aco_opcode::s_sleep, ctx->program->gfx_level >= GFX10 ? UINT16_MAX : 3);
end_loop(ctx, &wait_loop_context);
end_loop(ctx, &ctx->loop_stack.back());
bld.reset(ctx->block);
/* Indicate the wait has been done to subsequent compilation stages. */
@ -4648,7 +4648,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
}
case nir_intrinsic_terminate:
case nir_intrinsic_terminate_if: {
assert(ctx->cf_info.parent_loop.exit == NULL && "Terminate must not appear in loops.");
assert(ctx->loop_stack.empty() && "Terminate must not appear in loops.");
Operand cond = Operand::c32(-1u);
if (instr->intrinsic == nir_intrinsic_terminate_if) {
Temp src = get_ssa_temp(ctx, instr->src[0].ssa);

View file

@ -143,8 +143,8 @@ dump_vgprs_to_mem(isel_context* ctx, Builder& bld, Operand rsrc)
Operand(PhysReg{1}, s1) /* SRC0 mode */);
}
loop_context lc;
begin_loop(ctx, &lc);
ctx->loop_stack.push_back(loop_context());
begin_loop(ctx, &ctx->loop_stack.back());
{
bld.reset(ctx->block);
@ -177,7 +177,7 @@ dump_vgprs_to_mem(isel_context* ctx, Builder& bld, Operand rsrc)
}
end_uniform_if(ctx, &loop_break);
}
end_loop(ctx, &lc);
end_loop(ctx, &ctx->loop_stack.back());
bld.reset(ctx->block);
if (ctx->program->gfx_level < GFX10) {
@ -240,8 +240,8 @@ dump_lds_to_mem(isel_context* ctx, Builder& bld, Operand rsrc)
Operand m = load_lds_size_m0(bld);
loop_context lc;
begin_loop(ctx, &lc);
ctx->loop_stack.push_back(loop_context());
begin_loop(ctx, &ctx->loop_stack.back());
{
bld.reset(ctx->block);
@ -274,7 +274,7 @@ dump_lds_to_mem(isel_context* ctx, Builder& bld, Operand rsrc)
}
end_uniform_if(ctx, &loop_break);
}
end_loop(ctx, &lc);
end_loop(ctx, &ctx->loop_stack.back());
bld.reset(ctx->block);
}
begin_uniform_if_else(ctx, &ic);