From 8d9e6715be7fd9a41352169eeb8eaa4faada5c88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Mon, 9 Feb 2026 12:44:02 +0100 Subject: [PATCH 01/15] nir/lower_continue_constructs: Simplify loops before lowering continue constructs The idea is inspired by LLVM's LoopSimplify pass. Before lowering continue constructs, the pass now also lowers all continue statements, leaving only the trivial continue. This ensures that loops will always only have one back-edge. Totals from 396 (0.47% of 84383) affected shaders: (Navi48) Instrs: 900330 -> 899850 (-0.05%); split: -0.17%, +0.12% CodeSize: 4727216 -> 4727508 (+0.01%); split: -0.13%, +0.13% Latency: 7276816 -> 7097199 (-2.47%); split: -2.53%, +0.06% InvThroughput: 1580718 -> 1558646 (-1.40%); split: -1.42%, +0.03% VClause: 12872 -> 12879 (+0.05%); split: -0.01%, +0.06% SClause: 22237 -> 22240 (+0.01%); split: -0.00%, +0.02% Copies: 67359 -> 65723 (-2.43%); split: -2.56%, +0.14% Branches: 24252 -> 24163 (-0.37%); split: -0.52%, +0.15% PreSGPRs: 34371 -> 34399 (+0.08%) PreVGPRs: 25268 -> 25280 (+0.05%); split: -0.00%, +0.05% VALU: 512493 -> 511580 (-0.18%); split: -0.33%, +0.15% SALU: 122767 -> 122993 (+0.18%); split: -0.13%, +0.32% VMEM: 22181 -> 22213 (+0.14%) SMEM: 41370 -> 41376 (+0.01%) --- .../nir/nir_lower_continue_constructs.c | 206 +++++++++++++++++- 1 file changed, 204 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir_lower_continue_constructs.c b/src/compiler/nir/nir_lower_continue_constructs.c index ce0a74ea1ce..b80973309a6 100644 --- a/src/compiler/nir/nir_lower_continue_constructs.c +++ b/src/compiler/nir/nir_lower_continue_constructs.c @@ -26,12 +26,207 @@ #include "nir_builder.h" #include "nir_control_flow.h" +/* NIR pass to lower loop continue constructs. + * + * NIR loops are maintained in canonical form with these properties: + * - a pre-header: the only predecessor of the loop header + * - a dedicated exit node: dominated by the loop-header + * - a single back-edge to the loop header: the trivial continue + * + * If the loop has a continue construct, the trivial continue is the + * back-edge from the last block of the continue construct to the loop + * header. Otherwise, it is the back-edge from the last block of the + * loop body to the loop header. + * + * In order to lower the continue construct of a loop, all continue + * statements are being removed by either + * - moving the following code to the other side of a branch or + * - guarding following code by inserted IF-statements + * + * Afterwards, the continue construct is inlined before the trivial + * back-edge. + * + */ + +struct loop_simplify_state { + nir_builder *b; + nir_def *continue_flag; + struct exec_list *cf_list; +}; + +static bool +block_ends_in_continue(nir_block *block) +{ + if (nir_block_ends_in_jump(block)) { + nir_jump_instr *jump = nir_instr_as_jump(nir_block_last_instr(block)); + return jump->type == nir_jump_continue; + } + + return false; +} + +static bool +lower_continues_in_cf_list(struct exec_list *cf_list, + struct loop_simplify_state *state); + +static bool +lower_continue(nir_block *block, struct loop_simplify_state *state) +{ + if (!block_ends_in_continue(block)) + return false; + + assert(nir_cf_node_is_last(&block->cf_node)); + + /* Remove the continue instruction and set the predicate to 'true'. */ + state->b->cursor = nir_instr_remove(nir_block_last_instr(block)); + nir_store_reg(state->b, nir_imm_true(state->b), state->continue_flag); + + return true; +} + +static bool +lower_continues_in_if(nir_if *nif, struct loop_simplify_state *state) +{ + nir_block *then_block = nir_if_last_then_block(nif); + nir_block *else_block = nir_if_last_else_block(nif); + bool then_jumps = nir_block_ends_in_jump(then_block); + bool else_jumps = nir_block_ends_in_jump(else_block); + + bool progress = false; + progress |= lower_continue(then_block, state); + progress |= lower_continue(else_block, state); + + nir_block *next_block = nir_cf_node_cf_tree_next(&nif->cf_node); + bool is_empty_block = nir_cf_node_is_last(&next_block->cf_node) && + exec_list_is_empty(&next_block->instr_list); + + /* If a branch leg ends in a jump, we lower already any continue statements, + * so that we know if we have to move the following blocks to the other side. + */ + if (then_jumps) + progress |= lower_continues_in_cf_list(&nif->then_list, state); + if (else_jumps) + progress |= lower_continues_in_cf_list(&nif->else_list, state); + + if (!is_empty_block && progress) { + /* If at least one side has a continue statement, move the following code + * to the other side. This is necessary to maintain SSA dominance. + */ + nir_cf_list list; + nir_cf_extract(&list, nir_after_cf_node_and_phis(&nif->cf_node), + nir_after_cf_list(state->cf_list)); + + if (then_jumps && else_jumps) { + /* Both branches jump, just delete instructions following the IF. */ + nir_cf_delete(&list); + } else if (then_jumps) { + nir_cf_reinsert(&list, nir_after_cf_list(&nif->else_list)); + } else { + nir_cf_reinsert(&list, nir_after_cf_list(&nif->then_list)); + } + + /* The successor is now empty. No need to predicate following blocks. */ + is_empty_block = true; + } + + /* Recursively lower any continue statements in both branch legs. */ + if (!then_jumps) + progress |= lower_continues_in_cf_list(&nif->then_list, state); + if (!else_jumps) + progress |= lower_continues_in_cf_list(&nif->else_list, state); + + if (!is_empty_block && progress) { + /* Predicate following blocks. */ + nir_cf_list list; + nir_cf_extract(&list, nir_after_cf_node_and_phis(&nif->cf_node), + nir_after_cf_list(state->cf_list)); + + state->b->cursor = nir_after_cf_node_and_phis(&nif->cf_node); + nir_if *if_stmt = nir_push_if(state->b, nir_load_reg(state->b, state->continue_flag)); + + assert(!exec_list_is_empty(&list.list)); + nir_cf_reinsert(&list, nir_before_cf_list(&if_stmt->else_list)); + nir_pop_if(state->b, NULL); + } + + return progress; +} + +static bool +lower_continues_in_cf_list(struct exec_list *cf_list, + struct loop_simplify_state *state) +{ + bool progress = false; + + struct exec_list *parent_list = state->cf_list; + state->cf_list = cf_list; + + /* We iterate over the list backwards because any given lower call may + * take everything following the given CF node and predicate it. In + * order to avoid recursion/iteration problems, we want everything after + * a given node to already be lowered before this happens. + */ + foreach_list_typed_reverse_safe(nir_cf_node, node, node, cf_list) { + switch (node->type) { + case nir_cf_node_if: + if (lower_continues_in_if(nir_cf_node_as_if(node), state)) + progress = true; + break; + + case nir_cf_node_block: + case nir_cf_node_loop: + break; + + default: + UNREACHABLE("Invalid inner CF node type"); + } + } + + state->cf_list = parent_list; + + return progress; +} + +static void +simplify_loop(nir_loop *loop) +{ + nir_block *cont = nir_loop_first_continue_block(loop); + nir_block *last = nir_loop_last_block(loop); + + /* Remove trivial continue statement. */ + if (block_ends_in_continue(last)) + nir_instr_remove_v(nir_block_last_instr(last)); + + /* If the loop has only the trivial continue, there is nothing to do. */ + if (!nir_block_ends_in_jump(last) && cont->predecessors.entries == 1) + return; + + struct loop_simplify_state state; + nir_builder b = nir_builder_at(nir_before_block_after_phis(nir_loop_first_block(loop))); + state.b = &b; + + /* Initialize the variable to False. */ + state.continue_flag = nir_decl_reg(&b, 1, 1, 0); + nir_store_reg(&b, nir_imm_false(&b), state.continue_flag); + + lower_continues_in_cf_list(&loop->body, &state); + + return; +} + static bool lower_loop_continue_block(nir_builder *b, nir_loop *loop, bool *repair_ssa) { if (!nir_loop_has_continue_construct(loop)) return false; + /* Lower loop header and continue-phis to regs as we are going to move the predecessors. */ + nir_lower_phis_to_regs_block(nir_loop_first_block(loop), true); + nir_lower_phis_to_regs_block(nir_loop_first_continue_block(loop), true); + + /* Simplify the loop in order to ensure that it has at most one back-edge. */ + simplify_loop(loop); + nir_block *header = nir_loop_first_block(loop); nir_block *cont = nir_loop_first_continue_block(loop); @@ -49,8 +244,6 @@ lower_loop_continue_block(nir_builder *b, nir_loop *loop, bool *repair_ssa) break; } - nir_lower_phis_to_regs_block(header, false); - if (num_continue == 0) { /* this loop doesn't continue at all. delete the continue construct */ nir_cf_list extracted; @@ -118,8 +311,17 @@ visit_cf_list(nir_builder *b, struct exec_list *list, bool *repair_ssa) } case nir_cf_node_loop: { nir_loop *loop = nir_cf_node_as_loop(node); + /* By first lowering inner loops, we ensure that we don't encounter + * any continue statements which don't belong to the current loop. + */ progress |= visit_cf_list(b, &loop->body, repair_ssa); + + /* If we lower continue constructs after inlining functions, they + * might contain nested loops. + */ progress |= visit_cf_list(b, &loop->continue_list, repair_ssa); + + /* Lower continue construct. */ progress |= lower_loop_continue_block(b, loop, repair_ssa); break; } From 36975c60c271dd16de723e56f765546fd5938ca9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Mon, 9 Feb 2026 13:10:26 +0100 Subject: [PATCH 02/15] nir/lower_continue_constructs: Remove unnecessary handling of multiple continue statements --- .../nir/nir_lower_continue_constructs.c | 88 +++---------------- 1 file changed, 14 insertions(+), 74 deletions(-) diff --git a/src/compiler/nir/nir_lower_continue_constructs.c b/src/compiler/nir/nir_lower_continue_constructs.c index b80973309a6..da795db28f3 100644 --- a/src/compiler/nir/nir_lower_continue_constructs.c +++ b/src/compiler/nir/nir_lower_continue_constructs.c @@ -215,7 +215,7 @@ simplify_loop(nir_loop *loop) } static bool -lower_loop_continue_block(nir_builder *b, nir_loop *loop, bool *repair_ssa) +lower_loop_continue_block(nir_builder *b, nir_loop *loop) { if (!nir_loop_has_continue_construct(loop)) return false; @@ -227,67 +227,15 @@ lower_loop_continue_block(nir_builder *b, nir_loop *loop, bool *repair_ssa) /* Simplify the loop in order to ensure that it has at most one back-edge. */ simplify_loop(loop); - nir_block *header = nir_loop_first_block(loop); - nir_block *cont = nir_loop_first_continue_block(loop); + nir_cf_list extracted; + nir_cf_list_extract(&extracted, &loop->continue_list); - /* count continue statements excluding unreachable ones */ - unsigned num_continue = 0; - nir_block *single_predecessor = NULL; - set_foreach(&cont->predecessors, entry) { - nir_block *pred = (nir_block *)entry->key; - /* If the continue block has no predecessors, it is unreachable. */ - if (pred->predecessors.entries == 0) - continue; - - single_predecessor = pred; - if (num_continue++) - break; - } - - if (num_continue == 0) { - /* this loop doesn't continue at all. delete the continue construct */ - nir_cf_list extracted; - nir_cf_list_extract(&extracted, &loop->continue_list); + if (nir_loop_first_continue_block(loop)->predecessors.entries == 0) { + /* This loop doesn't continue at all. Delete the continue construct. */ nir_cf_delete(&extracted); - } else if (num_continue == 1) { - /* inline the continue construct */ - assert(single_predecessor->successors[0] == cont); - assert(single_predecessor->successors[1] == NULL); - - nir_cf_list extracted; - nir_cf_list_extract(&extracted, &loop->continue_list); - nir_cf_reinsert(&extracted, - nir_after_block_before_jump(single_predecessor)); } else { - nir_lower_phis_to_regs_block(cont, false); - *repair_ssa = true; - - /* As control flow has to re-converge before executing the continue - * construct, we insert it at the beginning of the loop with a flag - * to ensure that it doesn't get executed in the first iteration: - * - * loop { - * if (i != 0) { - * continue construct - * } - * loop body - * } - */ - - nir_variable *do_cont = - nir_local_variable_create(b->impl, glsl_bool_type(), "cont"); - - b->cursor = nir_before_cf_node(&loop->cf_node); - nir_store_var(b, do_cont, nir_imm_false(b), 1); - b->cursor = nir_before_block(header); - nir_if *cont_if = nir_push_if(b, nir_load_var(b, do_cont)); - { - nir_cf_list extracted; - nir_cf_list_extract(&extracted, &loop->continue_list); - nir_cf_reinsert(&extracted, nir_before_cf_list(&cont_if->then_list)); - } - nir_pop_if(b, cont_if); - nir_store_var(b, do_cont, nir_imm_true(b), 1); + /* Inline the continue construct before the trivial continue. */ + nir_cf_reinsert(&extracted, nir_after_cf_list(&loop->body)); } nir_loop_remove_continue_construct(loop); @@ -295,7 +243,7 @@ lower_loop_continue_block(nir_builder *b, nir_loop *loop, bool *repair_ssa) } static bool -visit_cf_list(nir_builder *b, struct exec_list *list, bool *repair_ssa) +visit_cf_list(nir_builder *b, struct exec_list *list) { bool progress = false; @@ -305,8 +253,8 @@ visit_cf_list(nir_builder *b, struct exec_list *list, bool *repair_ssa) continue; case nir_cf_node_if: { nir_if *nif = nir_cf_node_as_if(node); - progress |= visit_cf_list(b, &nif->then_list, repair_ssa); - progress |= visit_cf_list(b, &nif->else_list, repair_ssa); + progress |= visit_cf_list(b, &nif->then_list); + progress |= visit_cf_list(b, &nif->else_list); break; } case nir_cf_node_loop: { @@ -314,15 +262,15 @@ visit_cf_list(nir_builder *b, struct exec_list *list, bool *repair_ssa) /* By first lowering inner loops, we ensure that we don't encounter * any continue statements which don't belong to the current loop. */ - progress |= visit_cf_list(b, &loop->body, repair_ssa); + progress |= visit_cf_list(b, &loop->body); /* If we lower continue constructs after inlining functions, they * might contain nested loops. */ - progress |= visit_cf_list(b, &loop->continue_list, repair_ssa); + progress |= visit_cf_list(b, &loop->continue_list); /* Lower continue construct. */ - progress |= lower_loop_continue_block(b, loop, repair_ssa); + progress |= lower_loop_continue_block(b, loop); break; } case nir_cf_node_function: @@ -337,21 +285,13 @@ static bool lower_continue_constructs_impl(nir_function_impl *impl) { nir_builder b = nir_builder_create(impl); - bool repair_ssa = false; - bool progress = visit_cf_list(&b, &impl->body, &repair_ssa); + bool progress = visit_cf_list(&b, &impl->body); if (progress) { nir_progress(true, impl, nir_metadata_none); /* Merge the Phis from Header and Continue Target */ nir_lower_reg_intrinsics_to_ssa_impl(impl); - - /* Re-inserting the Continue Target at the beginning of the loop - * violates the dominance property if instructions in the continue - * use SSA defs from the loop body. - */ - if (repair_ssa) - nir_repair_ssa_impl(impl); } else { nir_no_progress(impl); } From 9f38dbad7ea6efe2de85a5fe530d821e1d7b4e1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Mon, 9 Feb 2026 17:50:11 +0100 Subject: [PATCH 03/15] radv/rt: add and lower loop continue construct in traversal shaders We are going to disallow continue statements without loop continue constructs. Totals from 58 (0.07% of 84369) affected shaders: (Navi21) Instrs: 2320937 -> 2318938 (-0.09%) CodeSize: 12554896 -> 12546892 (-0.06%); split: -0.06%, +0.00% SpillSGPRs: 116 -> 102 (-12.07%) SpillVGPRs: 2648 -> 2627 (-0.79%) Latency: 13440088 -> 13424694 (-0.11%) InvThroughput: 3262579 -> 3259088 (-0.11%) VClause: 69628 -> 69666 (+0.05%) SClause: 51528 -> 51519 (-0.02%) Copies: 203958 -> 203478 (-0.24%); split: -0.29%, +0.05% Branches: 70777 -> 70400 (-0.53%) PreSGPRs: 4768 -> 4660 (-2.27%) VALU: 1537105 -> 1536899 (-0.01%); split: -0.02%, +0.01% SALU: 340938 -> 339623 (-0.39%) VMEM: 120339 -> 120286 (-0.04%) --- src/amd/vulkan/nir/radv_nir_lower_ray_queries.c | 1 + src/amd/vulkan/nir/radv_nir_rt_common.c | 10 ++++++++-- src/amd/vulkan/nir/radv_nir_rt_traversal_shader.c | 1 + 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c index 5fe0b29d074..3877d2328f3 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c +++ b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c @@ -700,6 +700,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device ralloc_free(query_ht); if (progress) { + NIR_PASS(_, shader, nir_lower_continue_constructs); NIR_PASS(_, shader, nir_split_struct_vars, nir_var_shader_temp); NIR_PASS(_, shader, nir_lower_global_vars_to_local); NIR_PASS(_, shader, nir_lower_vars_to_ssa); diff --git a/src/amd/vulkan/nir/radv_nir_rt_common.c b/src/amd/vulkan/nir/radv_nir_rt_common.c index 8300aa57a46..ae57836635f 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_common.c +++ b/src/amd/vulkan/nir/radv_nir_rt_common.c @@ -857,8 +857,11 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc nir_def *desc = create_bvh_descriptor(b, pdev, &ray_flags); nir_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0); - nir_push_loop(b); + nir_loop *loop = nir_push_loop(b); { + if (!args->use_bvh_stack_rtn) + nir_loop_add_continue_construct(loop); + /* When exiting instances via stack, current_node won't ever be invalid with ds_bvh_stack_rtn */ if (args->use_bvh_stack_rtn) { /* Early-exit when the stack is empty and there are no more nodes to process. */ @@ -1154,8 +1157,11 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const nir_def *desc = create_bvh_descriptor(b, pdev, &ray_flags); - nir_push_loop(b); + nir_loop *loop = nir_push_loop(b); { + if (!args->use_bvh_stack_rtn) + nir_loop_add_continue_construct(loop); + /* When exiting instances via stack, current_node won't ever be invalid with ds_bvh_stack_rtn */ if (args->use_bvh_stack_rtn) { /* Early-exit when the stack is empty and there are no more nodes to process. */ diff --git a/src/amd/vulkan/nir/radv_nir_rt_traversal_shader.c b/src/amd/vulkan/nir/radv_nir_rt_traversal_shader.c index c7fcb4d391c..2eff729f89d 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_traversal_shader.c +++ b/src/amd/vulkan/nir/radv_nir_rt_traversal_shader.c @@ -1276,6 +1276,7 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin radv_build_end_trace_token(b, &data, nir_load_var(b, iteration_instance_count)); nir_progress(true, b->impl, nir_metadata_none); + nir_lower_continue_constructs(b->shader); radv_nir_lower_hit_attrib_derefs(b->shader); return data.trav_vars.result; From 97549561011336267e111d4e85a9f88572b64138 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Mon, 9 Feb 2026 18:22:09 +0100 Subject: [PATCH 04/15] radv/dgc: add and lower loop continue construct We are going to disallow continue statements without loop continue constructs. --- src/amd/vulkan/radv_dgc.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/amd/vulkan/radv_dgc.c b/src/amd/vulkan/radv_dgc.c index 8802dc631f9..9eaeccc62df 100644 --- a/src/amd/vulkan/radv_dgc.c +++ b/src/amd/vulkan/radv_dgc.c @@ -1754,8 +1754,10 @@ dgc_alloc_push_constant(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *se nir_variable *idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "idx"); nir_store_var(b, idx, nir_imm_int(b, 0), 0x1); - nir_push_loop(b); + nir_loop *loop = nir_push_loop(b); { + nir_loop_add_continue_construct(loop); + nir_def *cur_idx = nir_load_var(b, idx); nir_break_if(b, nir_ieq(b, cur_idx, load_param8(b, push_constant_size))); @@ -1777,7 +1779,7 @@ dgc_alloc_push_constant(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *se nir_store_var(b, idx, nir_iadd_imm(b, cur_idx, 1), 0x1); } - nir_pop_loop(b, NULL); + nir_pop_loop(b, loop); /* Store push constants set by DGC tokens. */ u_foreach_bit64 (i, layout->push_constant_mask) { @@ -2025,8 +2027,10 @@ dgc_emit_vertex_buffer(struct dgc_cmdbuf *cs, nir_def *stream_addr) nir_variable *vbo_idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "vbo_idx"); nir_store_var(b, vbo_idx, nir_imm_int(b, 0), 0x1); - nir_push_loop(b); + nir_loop *loop = nir_push_loop(b); { + nir_loop_add_continue_construct(loop); + nir_def *cur_idx = nir_load_var(b, vbo_idx); nir_break_if(b, nir_uge_imm(b, cur_idx, 32 /* bits in vb_desc_usage_mask */)); @@ -2097,7 +2101,7 @@ dgc_emit_vertex_buffer(struct dgc_cmdbuf *cs, nir_def *stream_addr) nir_store_var(b, vbo_idx, nir_iadd_imm(b, cur_idx, 1), 0x1); } - nir_pop_loop(b, NULL); + nir_pop_loop(b, loop); } /** @@ -2959,6 +2963,8 @@ build_dgc_prepare_shader(struct radv_device *dev, struct radv_indirect_command_l } nir_pop_if(&b, NULL); + nir_lower_continue_constructs(b.shader); + return b.shader; } From e9dee984afc66904d21303e3d133ad9419a8076a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 11 Feb 2026 11:14:31 +0100 Subject: [PATCH 05/15] tgsi_to_nir: Add and lower loop continue constructs We are going to disallow continue statements without loop continue constructs. --- src/gallium/auxiliary/nir/tgsi_to_nir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index df0eddd87d2..edc771ee8f2 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -1995,7 +1995,7 @@ ttn_emit_instruction(struct ttn_compile *c) break; case TGSI_OPCODE_BGNLOOP: - nir_push_loop(&c->build); + nir_loop_add_continue_construct(nir_push_loop(&c->build)); break; case TGSI_OPCODE_BRK: @@ -2537,6 +2537,7 @@ ttn_finalize_nir(struct ttn_compile *c, struct pipe_screen *screen) MESA_TRACE_FUNC(); + NIR_PASS(_, nir, nir_lower_continue_constructs); NIR_PASS(_, nir, nir_lower_returns); NIR_PASS(_, nir, nir_lower_vars_to_ssa); NIR_PASS(_, nir, nir_lower_reg_intrinsics_to_ssa); From 6e804ede87a732cd8f6ada0343cc28a6e6e5c18e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 11 Feb 2026 11:19:33 +0100 Subject: [PATCH 06/15] dxil/nir: Remove nir_jump_continue from lower_subgroup_scan() We are going to disallow continue statements without loop continue constructs. --- src/microsoft/compiler/dxil_nir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/microsoft/compiler/dxil_nir.c b/src/microsoft/compiler/dxil_nir.c index c81aa090bea..1f6b3cc1e4e 100644 --- a/src/microsoft/compiler/dxil_nir.c +++ b/src/microsoft/compiler/dxil_nir.c @@ -2113,10 +2113,10 @@ lower_subgroup_scan(nir_builder *b, nir_intrinsic_instr *intr, void *data) nir_pop_if(b, if_active_thread); nir_store_var(b, loop_counter_var, nir_iadd_imm(b, loop_counter, 1), 1); - nir_jump(b, nir_jump_continue); - nir_pop_if(b, nif); + nir_push_else(b, nif); nir_jump(b, nir_jump_break); + nir_pop_loop(b, loop); result = nir_load_var(b, result_var); From 97b51db9dbcc9358ed3d8721828c28e5130ba563 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 11 Feb 2026 11:25:44 +0100 Subject: [PATCH 07/15] dozen: add and lower loop continue construct for dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(). We are going to disallow continue statements without loop continue constructs. --- src/microsoft/vulkan/dzn_nir.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/microsoft/vulkan/dzn_nir.c b/src/microsoft/vulkan/dzn_nir.c index 2c86774cde4..196e82a88a4 100644 --- a/src/microsoft/vulkan/dzn_nir.c +++ b/src/microsoft/vulkan/dzn_nir.c @@ -374,7 +374,8 @@ dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(uint8_t old_index_size) * TODO: Might be a good thing to use use the CL compiler we have and turn * those shaders into CL kernels. */ - nir_push_loop(&b); + nir_loop *loop = nir_push_loop(&b); + nir_loop_add_continue_construct(loop); old_index_ptr = nir_load_var(&b, old_index_ptr_var); nir_def *index0 = nir_load_var(&b, index0_var); @@ -445,6 +446,8 @@ dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(uint8_t old_index_size) new_index_count_ptr_desc, nir_imm_int(&b, 0), .write_mask = 1, .access = ACCESS_NON_READABLE, .align_mul = 4); + nir_lower_continue_constructs(b.shader); + return b.shader; } From 10b61b3f68706bef76f4e93d4af5d782f695b09c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 11 Feb 2026 11:33:00 +0100 Subject: [PATCH 08/15] nir/lower_goto_ifs: Add and lower loop continue constructs We are going to disallow continue statements without loop continue constructs. --- src/compiler/nir/nir_lower_goto_ifs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_lower_goto_ifs.c b/src/compiler/nir/nir_lower_goto_ifs.c index c83c8ef78b3..c0b5213b547 100644 --- a/src/compiler/nir/nir_lower_goto_ifs.c +++ b/src/compiler/nir/nir_lower_goto_ifs.c @@ -346,7 +346,9 @@ loop_routing_start(struct routes *routing, nir_builder *b, routing->brk.fork = fork; routing->brk.reachable = fork_reachable(fork); } - nir_push_loop(b); + + nir_loop *loop = nir_push_loop(b); + nir_loop_add_continue_construct(loop); } /** @@ -978,5 +980,8 @@ nir_lower_goto_ifs(nir_shader *shader) progress = true; } + if (progress) + nir_lower_continue_constructs(shader); + return progress; } From 96f1645cfe8d673027b5a6db185c68fcaff74f67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 11 Feb 2026 11:49:40 +0100 Subject: [PATCH 09/15] ac: add and lower loop continue construct for streamout buffer info loop We are going to disallow continue statements without loop continue constructs. --- src/amd/common/nir/ac_nir_lower_ngg.c | 2 ++ src/amd/common/nir/ac_nir_lower_ngg_gs.c | 2 ++ src/amd/common/nir/ac_nir_prerast_utils.c | 2 ++ 3 files changed, 6 insertions(+) diff --git a/src/amd/common/nir/ac_nir_lower_ngg.c b/src/amd/common/nir/ac_nir_lower_ngg.c index 5a54bc7365e..0581f9d97cd 100644 --- a/src/amd/common/nir/ac_nir_lower_ngg.c +++ b/src/amd/common/nir/ac_nir_lower_ngg.c @@ -1770,6 +1770,8 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option nir_validate_shader(shader, "after emitting NGG VS/TES"); /* Cleanup */ + if (state.streamout_enabled) + nir_lower_continue_constructs(shader); nir_opt_dead_write_vars(shader); nir_lower_vars_to_ssa(shader); nir_remove_dead_variables(shader, nir_var_function_temp, NULL); diff --git a/src/amd/common/nir/ac_nir_lower_ngg_gs.c b/src/amd/common/nir/ac_nir_lower_ngg_gs.c index 5a676b5a243..dc128c9eebb 100644 --- a/src/amd/common/nir/ac_nir_lower_ngg_gs.c +++ b/src/amd/common/nir/ac_nir_lower_ngg_gs.c @@ -925,6 +925,8 @@ ac_nir_lower_ngg_gs(nir_shader *shader, const ac_nir_lower_ngg_options *options, nir_validate_shader(shader, "after emitting NGG GS"); /* Cleanup */ + if (state.streamout_enabled) + nir_lower_continue_constructs(shader); nir_lower_vars_to_ssa(shader); nir_remove_dead_variables(shader, nir_var_function_temp, NULL); diff --git a/src/amd/common/nir/ac_nir_prerast_utils.c b/src/amd/common/nir/ac_nir_prerast_utils.c index 22a9fb56f48..08b56b025d2 100644 --- a/src/amd/common/nir/ac_nir_prerast_utils.c +++ b/src/amd/common/nir/ac_nir_prerast_utils.c @@ -1050,6 +1050,8 @@ ac_nir_ngg_build_streamout_buffer_info(nir_builder *b, nir_loop *loop = nir_push_loop(b); { + nir_loop_add_continue_construct(loop); + for (unsigned i = 0; i < NUM_ATOMICS_IN_FLIGHT; i++) { int issue_index = (NUM_ATOMICS_IN_FLIGHT - 1 + i) % NUM_ATOMICS_IN_FLIGHT; int read_index = i; From 16f13564a82edeb60ec71c62663d0b526788dc8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 11 Feb 2026 11:55:26 +0100 Subject: [PATCH 10/15] tu/rt: add and lower loop continue construct in traversal shaders We are going to disallow continue statements without loop continue constructs. --- src/freedreno/vulkan/tu_nir_lower_ray_query.cc | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/freedreno/vulkan/tu_nir_lower_ray_query.cc b/src/freedreno/vulkan/tu_nir_lower_ray_query.cc index 28f27a0e474..97657ba33ed 100644 --- a/src/freedreno/vulkan/tu_nir_lower_ray_query.cc +++ b/src/freedreno/vulkan/tu_nir_lower_ray_query.cc @@ -10,6 +10,7 @@ #include "compiler/spirv/spirv.h" #include "nir_builder.h" +#include "nir_control_flow.h" #include "nir_deref.h" enum rq_intersection_var_index { @@ -560,8 +561,10 @@ build_ray_traversal(nir_builder *b, nir_deref_instr *rq, nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete"); nir_store_var(b, incomplete, nir_imm_true(b), 0x1); - nir_push_loop(b); + nir_loop *loop = nir_push_loop(b); { + nir_loop_add_continue_construct(loop); + /* Go up the stack if current_node == VK_BVH_INVALID_NODE */ nir_push_if(b, nir_ieq_imm(b, rq_load(b, rq, current_node), VK_BVH_INVALID_NODE)); { @@ -928,7 +931,7 @@ build_ray_traversal(nir_builder *b, nir_deref_instr *rq, } nir_pop_if(b, NULL); } - nir_pop_loop(b, NULL); + nir_pop_loop(b, loop); return nir_load_var(b, incomplete); } @@ -1035,6 +1038,9 @@ tu_nir_lower_ray_queries(nir_shader *shader) ralloc_free(query_ht); + if (progress) + nir_lower_continue_constructs(shader); + return progress; } From c67d9ec4d63f95c087debac168b564b850b85f76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 11 Feb 2026 11:59:46 +0100 Subject: [PATCH 11/15] lavapipe/rt: add and lower loop continue construct in traversal shaders We are going to disallow continue statements without loop continue constructs. --- src/gallium/frontends/lavapipe/lvp_ray_tracing_pipeline.c | 1 + .../frontends/lavapipe/nir/lvp_nir_lower_ray_queries.c | 1 + src/gallium/frontends/lavapipe/nir/lvp_nir_ray_tracing.c | 6 ++++-- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/gallium/frontends/lavapipe/lvp_ray_tracing_pipeline.c b/src/gallium/frontends/lavapipe/lvp_ray_tracing_pipeline.c index 098c39a40d1..c924092b366 100644 --- a/src/gallium/frontends/lavapipe/lvp_ray_tracing_pipeline.c +++ b/src/gallium/frontends/lavapipe/lvp_ray_tracing_pipeline.c @@ -1083,6 +1083,7 @@ lvp_compile_ray_tracing_pipeline(struct lvp_pipeline *pipeline, nir_shader_instructions_pass(b->shader, lvp_lower_ray_tracing_instr, nir_metadata_none, &compiler); + NIR_PASS(_, b->shader, nir_lower_continue_constructs); NIR_PASS(_, b->shader, nir_lower_returns); const struct nir_lower_compute_system_values_options compute_system_values = {0}; diff --git a/src/gallium/frontends/lavapipe/nir/lvp_nir_lower_ray_queries.c b/src/gallium/frontends/lavapipe/nir/lvp_nir_lower_ray_queries.c index cfe7d78e351..e16e587cab9 100644 --- a/src/gallium/frontends/lavapipe/nir/lvp_nir_lower_ray_queries.c +++ b/src/gallium/frontends/lavapipe/nir/lvp_nir_lower_ray_queries.c @@ -655,6 +655,7 @@ lvp_nir_lower_ray_queries(struct nir_shader *shader) ralloc_free(query_ht); if (progress) { + NIR_PASS(_, shader, nir_lower_continue_constructs); NIR_PASS(_, shader, nir_lower_global_vars_to_local); NIR_PASS(_, shader, nir_lower_vars_to_ssa); diff --git a/src/gallium/frontends/lavapipe/nir/lvp_nir_ray_tracing.c b/src/gallium/frontends/lavapipe/nir/lvp_nir_ray_tracing.c index c06a6b263b8..b752a0503a0 100644 --- a/src/gallium/frontends/lavapipe/nir/lvp_nir_ray_tracing.c +++ b/src/gallium/frontends/lavapipe/nir/lvp_nir_ray_tracing.c @@ -497,8 +497,10 @@ lvp_build_ray_traversal(nir_builder *b, const struct lvp_ray_traversal_args *arg .no_skip_aabbs = nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipAABBsKHRMask), 0), }; - nir_push_loop(b); + nir_loop *loop = nir_push_loop(b); { + nir_loop_add_continue_construct(loop); + nir_push_if(b, nir_ieq_imm(b, nir_load_deref(b, args->vars.current_node), LVP_BVH_INVALID_NODE)); { nir_push_if(b, nir_ieq_imm(b, nir_load_deref(b, args->vars.stack_ptr), 0)); @@ -607,7 +609,7 @@ lvp_build_ray_traversal(nir_builder *b, const struct lvp_ray_traversal_args *arg } nir_pop_if(b, NULL); } - nir_pop_loop(b, NULL); + nir_pop_loop(b, loop); return nir_load_var(b, incomplete); } From 5988edb1a18459704bae7603823329138f2dccdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 11 Feb 2026 13:40:12 +0100 Subject: [PATCH 12/15] aco/tests: add and lower loop continue constructs in all tests which use continues We are going to disallow continue statements without loop continue constructs. --- src/amd/compiler/tests/helpers.cpp | 1 + src/amd/compiler/tests/test_isel.cpp | 244 +++++++++++++++++++-------- 2 files changed, 172 insertions(+), 73 deletions(-) diff --git a/src/amd/compiler/tests/helpers.cpp b/src/amd/compiler/tests/helpers.cpp index f56c6f99111..64a51aba65a 100644 --- a/src/amd/compiler/tests/helpers.cpp +++ b/src/amd/compiler/tests/helpers.cpp @@ -410,6 +410,7 @@ void finish_isel_test(enum ac_hw_stage hw_stage, unsigned wave_size) { nir_validate_shader(nb->shader, "in finish_isel_test"); + nir_lower_continue_constructs(nb->shader); program.reset(new Program); program->debug.func = nullptr; diff --git a/src/amd/compiler/tests/test_isel.cpp b/src/amd/compiler/tests/test_isel.cpp index eab9baabc28..6b46a058fe3 100644 --- a/src/amd/compiler/tests/test_isel.cpp +++ b/src/amd/compiler/tests/test_isel.cpp @@ -376,6 +376,19 @@ END_TEST * // unreachable block * break; * } + * + * after nir_lower_continue_constructs() and sanitize_if(): + * + * loop { + * if (uniform) { + * cont = true; + * } else { + * cont = true; + * } + * if (false) { + * break; + * } + * } */ BEGIN_TEST(isel.cf.unreachable_break.uniform_continue) if (!setup_nir_cs(GFX11)) @@ -388,46 +401,49 @@ BEGIN_TEST(isel.cf.unreachable_break.uniform_continue) //>> s3: %val1 = p_create_vector 0, 0, 0 //>> s1: %val0 = p_parallelcopy 0 - nir_push_loop(nb); + nir_loop *loop = nir_push_loop(nb); + nir_loop_add_continue_construct(loop); { //>> BB1 - //! /* logical preds: BB0, BB2, BB5, / linear preds: BB0, BB2, BB5, / kind: uniform, loop-header, */ + //! /* logical preds: BB0, BB6, / linear preds: BB0, BB6, / kind: uniform, loop-header, */ nir_push_if(nb, nir_unit_test_uniform_input(nb, 1, 1, .base=2)); { //>> BB2 - //! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, continue, */ + //! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, */ nir_jump(nb, nir_jump_continue); } nir_push_else(nb, NULL); { - /* The contents of this branch is moved to the merge block, and a dummy break is inserted - * before the continue so that the loop has an exit. - */ //>> BB3 //! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, */ //! p_logical_start //! s1: %_ = p_unit_test 5 - //! s2: %zero = p_parallelcopy 0 - //! s2: %_, s1: %cond:scc = s_and_b64 %zero, %0:exec - //! p_logical_end - //! p_cbranch_z %cond:scc - //! BB4 - //! /* logical preds: BB3, / linear preds: BB3, / kind: uniform, break, */ - //>> BB5 - //! /* logical preds: BB3, / linear preds: BB3, / kind: uniform, continue, */ nir_unit_test_uniform_input(nb, 1, 32, .base=5); nir_jump(nb, nir_jump_continue); } nir_pop_if(nb, NULL); - + /* The unreachable break is removed when lowering the continues. However, + * a dummy break is inserted, so that the loop has an exit. + */ + //>> BB4 + //! /* logical preds: BB2, BB3, / linear preds: BB2, BB3, / kind: uniform, */ + //! p_logical_start + //! s2: %zero = p_parallelcopy 0 + //! s2: %_, s1: %cond:scc = s_and_b64 %zero, %0:exec + //! p_logical_end + //! p_cbranch_z %cond:scc + //! BB5 + //! /* logical preds: BB4, / linear preds: BB4, / kind: uniform, break, */ + //>> BB6 + //! /* logical preds: BB4, / linear preds: BB4, / kind: uniform, continue, */ val0 = nir_imm_zero(nb, 1, 32); val1 = nir_load_local_invocation_id(nb); nir_jump(nb, nir_jump_break); } - nir_pop_loop(nb, NULL); - //>> BB6 - //! /* logical preds: BB4, / linear preds: BB4, / kind: uniform, top-level, loop-exit, */ + nir_pop_loop(nb, loop); + //>> BB7 + //! /* logical preds: BB5, / linear preds: BB5, / kind: uniform, top-level, loop-exit, */ //>> p_unit_test 0, %val0 //! p_unit_test 1, %val1 @@ -645,7 +661,8 @@ BEGIN_TEST(isel.cf.unreachable_loop_exit) if (!setup_nir_cs(GFX11)) return; - nir_push_loop(nb); + nir_loop *loop = nir_push_loop(nb); + nir_loop_add_continue_construct(loop); { /* A dummy break is inserted before the continue so that the loop has an exit. */ //>> BB1 @@ -661,7 +678,7 @@ BEGIN_TEST(isel.cf.unreachable_loop_exit) nir_unit_test_uniform_input(nb, 1, 32, .base=0); nir_jump(nb, nir_jump_continue); } - nir_pop_loop(nb, NULL); + nir_pop_loop(nb, loop); finish_isel_test(); END_TEST @@ -720,19 +737,40 @@ END_TEST * } * use(val); * } + * + * after nir_lower_continue_constructs() and sanitize_if(): + * + * loop { + * if (divergent) { + * } else { + * if (uniform) { + * break; + * } + * val = uniform; + * use(val); + * } + * } */ BEGIN_TEST(isel.cf.uniform_if_branch_use) if (!setup_nir_cs(GFX11)) return; - nir_push_loop(nb); + nir_loop *loop = nir_push_loop(nb); + nir_loop_add_continue_construct(loop); { + //>> BB1 + //! /* logical preds: BB0, BB15, / linear preds: BB0, BB15, / kind: loop-header, branch, */ + //>> s2: %_ = p_unit_test 3 nir_push_if(nb, nir_unit_test_divergent_input(nb, 1, 1, .base=3)); { nir_jump(nb, nir_jump_continue); } nir_pop_if(nb, NULL); + //>> BB4 + //! /* logical preds: / linear preds: BB2, BB3, / kind: invert, */ + //>> BB5 + //! /* logical preds: BB1, / linear preds: BB4, / kind: uniform, */ //>> s2: %cond = p_unit_test 2 //! s2: %_, s1: %_:scc = s_and_b64 %cond, %0:exec //! p_logical_end @@ -740,31 +778,31 @@ BEGIN_TEST(isel.cf.uniform_if_branch_use) nir_def *val; nir_push_if(nb, nir_unit_test_uniform_input(nb, 1, 1, .base=2)); { - //>> BB7 - //! /* logical preds: BB6, / linear preds: BB6, / kind: break, */ + //>> BB6 + //! /* logical preds: BB5, / linear preds: BB5, / kind: break, */ nir_jump(nb, nir_jump_break); } nir_push_else(nb, NULL); { /* The contents of this branch is moved to the merge block. */ - //>> BB11 - //! /* logical preds: BB10, / linear preds: BB9, BB10, / kind: uniform, */ + //>> BB10 + //! /* logical preds: BB9, / linear preds: BB8, BB9, / kind: uniform, */ //>> p_cbranch_z %0:exec rarely_taken - //! BB12 - //! /* logical preds: BB11, / linear preds: BB11, / kind: uniform, */ + //! BB11 + //! /* logical preds: BB10, / linear preds: BB10, / kind: uniform, */ //! p_logical_start //! s1: %val = p_unit_test 0 + //! p_unit_test 1, %val val = nir_unit_test_uniform_input(nb, 1, 32, .base=0); } nir_pop_if(nb, NULL); - //! p_unit_test 1, %val nir_unit_test_output(nb, val, .base=1); - //>> BB14 - //! /* logical preds: BB12, / linear preds: BB12, BB13, / kind: uniform, continue, */ + //>> BB15 + //! /* logical preds: BB2, BB13, / linear preds: BB13, BB14, / kind: uniform, continue, merge, */ } - nir_pop_loop(nb, NULL); + nir_pop_loop(nb, loop); finish_isel_test(); END_TEST @@ -780,6 +818,17 @@ END_TEST * d = c or undef * break * } + * + * after nir_lower_continue_constructs() and sanitize_if(): + * + * b = ... + * loop { + * a = linear_phi b, c + * if (!divergent) { + * break + * } + * c = ... + * } */ BEGIN_TEST(isel.cf.hidden_continue) if (!setup_nir_cs(GFX11)) @@ -789,35 +838,38 @@ BEGIN_TEST(isel.cf.hidden_continue) nir_def* init = nir_unit_test_uniform_input(nb, 1, 32, .base = 0); nir_phi_instr* phi; - nir_loop* loop = nir_push_loop(nb); + nir_loop *loop = nir_push_loop(nb); + nir_loop_add_continue_construct(loop); { //>> BB1 - //! /* logical preds: BB0, BB2, / linear preds: BB0, BB3, BB8, / kind: loop-header, branch, */ - //! s1: %2 = p_linear_phi %init, %cont, %phi + //! /* logical preds: BB0, BB6, / linear preds: BB0, BB6, / kind: loop-header, branch, */ + //! s1: %2 = p_linear_phi %init, %cont phi = nir_phi_instr_create(nb->shader); nir_def_init(&phi->instr, &phi->def, 1, 32); nir_phi_instr_add_src(phi, nir_def_block(init), init); + //>> s2: %cond = p_unit_test 4 + //! s2: %inverse_cond, s1: %_:scc = s_not_b64 %cond + //>> p_cbranch_z %inverse_cond + //>> BB2 + //! /* logical preds: BB1, / linear preds: BB1, / kind: break, */ nir_push_if(nb, nir_unit_test_divergent_input(nb, 1, 1, .base = 4)); { - //>> BB2 - //! /* logical preds: BB1, / linear preds: BB1, / kind: continue, */ + //>> BB6 + //! /* logical preds: BB1, / linear preds: BB4, BB5, / kind: uniform, continue, merge, */ //! p_logical_start //! s1: %cont = p_unit_test 1 nir_def* cont = nir_unit_test_uniform_input(nb, 1, 32, .base = 1); - nir_phi_instr_add_src(phi, nir_def_block(cont), cont); + nir_phi_instr_add_src(phi, nir_loop_first_continue_block(loop), cont); nir_jump(nb, nir_jump_continue); } nir_pop_if(nb, NULL); - //>> BB6 - //! /* logical preds: BB1, / linear preds: BB4, BB5, / kind: break, merge, */ - //! s1: %phi = p_linear_phi %cont, s1: undef - //>> BB8 - //! /* logical preds: / linear preds: BB6, / kind: uniform, continue, */ nir_jump(nb, nir_jump_break); } - nir_pop_loop(nb, NULL); + //>> BB7 + //! /* logical preds: BB2, / linear preds: BB3, / kind: uniform, top-level, loop-exit, */ + nir_pop_loop(nb, loop); nb->cursor = nir_after_phis(nir_loop_first_block(loop)); nir_builder_instr_insert(nb, &phi->instr); @@ -1191,15 +1243,35 @@ END_TEST * if (divergent) { * continue * } + * unit_test 3 * //potentially empty * } + * unit_test 4 + * } + * + * after nir_lower_continue_constructs() and sanitize_if(): + * + * loop { + * if (divergent) { + * if (divergent) { + * cont = true + * } else { + * unit_test 3 + * //potentially empty + * } + * } + * if (cont) { + * } else { + * unit_test 4 + * } * } */ BEGIN_TEST(isel.cf.empty_exec.loop_continue) if (!setup_nir_cs(GFX11)) return; - nir_push_loop(nb); + nir_loop *loop = nir_push_loop(nb); + nir_loop_add_continue_construct(loop); { nir_break_if(nb, nir_imm_false(nb)); @@ -1214,28 +1286,39 @@ BEGIN_TEST(isel.cf.empty_exec.loop_continue) nir_push_if(nb, nir_unit_test_divergent_input(nb, 1, 1, .base = 2)); { //>> BB5 - //>> /* logical preds: BB4, / linear preds: BB4, / kind: continue, */ + //! /* logical preds: BB4, / linear preds: BB4, / kind: uniform, */ + //>> s2: %_ = p_parallelcopy -1 + //>> s2: %cont1 = p_parallelcopy %0:exec nir_jump(nb, nir_jump_continue); } nir_pop_if(nb, NULL); - //>> BB9 - //! /* logical preds: BB4, / linear preds: BB7, BB8, / kind: uniform, merge, */ - - //>> p_cbranch_z %0:exec rarely_taken - //>> BB10 + //>> BB8 + //! /* logical preds: BB4, / linear preds: BB7, / kind: uniform, */ //>> p_unit_test 3, %_ + + //>> BB10 + //! /* logical preds: BB5, BB8, / linear preds: BB8, BB9, / kind: uniform, merge, */ + //! s2: %cont2 = p_linear_phi %cont1, %cont1 nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 3); } nir_pop_if(nb, NULL); - //>> BB17 - //! /* logical preds: BB12, BB15, / linear preds: BB15, BB16, / kind: uniform, continue, merge, */ - //! p_logical_start - - //! p_unit_test 4, %_ + //>> BB12 + //! /* logical preds: / linear preds: BB10, BB11, / kind: invert, */ + //! s2: %tmp = p_linear_phi %cont2, s2: undef + //! s2: %cont3, s1: %16:scc = s_and_b64 %tmp, %0:exec + //>> BB15 + //! /* logical preds: BB10, BB13, / linear preds: BB13, BB14, / kind: branch, merge, */ + //! s2: %cont = p_linear_phi %cont3, %cont3 + //>> p_cbranch_z %cont + //>> BB19 + //! /* logical preds: BB15, / linear preds: BB18, / kind: uniform, */ + //>> p_unit_test 4, %_ nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 4); + //>> BB21 + //! /* logical preds: BB16, BB19, / linear preds: BB19, BB20, / kind: uniform, continue, merge, */ } - nir_pop_loop(nb, NULL); - //>> BB18 + nir_pop_loop(nb, loop); + //>> BB22 //! /* logical preds: BB2, / linear preds: BB2, / kind: uniform, top-level, loop-exit, */ //! p_logical_start @@ -1255,15 +1338,28 @@ END_TEST * } * //potentially empty * } + * + * after nir_lower_continue_constructs() and sanitize_if(): + * + * loop { + * if (divergent) { + * } else { + * if (divergent) { + * break + * } + * //potentially empty + * } + * } */ BEGIN_TEST(isel.cf.empty_exec.loop_continue_then_break) if (!setup_nir_cs(GFX11)) return; - nir_push_loop(nb); + nir_loop *loop = nir_push_loop(nb); + nir_loop_add_continue_construct(loop); { //>> BB1 - //! /* logical preds: BB0, BB2, BB14, / linear preds: BB0, BB3, BB14, / kind: loop-header, branch, */ + //! /* logical preds: BB0, BB15, / linear preds: BB0, BB15, / kind: loop-header, branch, */ //>> p_unit_test 0, %_ nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 0); @@ -1271,35 +1367,37 @@ BEGIN_TEST(isel.cf.empty_exec.loop_continue_then_break) nir_push_if(nb, nir_unit_test_divergent_input(nb, 1, 1, .base = 1)); { //>> BB2 - //! /* logical preds: BB1, / linear preds: BB1, / kind: continue, */ + //! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, */ nir_jump(nb, nir_jump_continue); } nir_pop_if(nb, NULL); - //>> BB6 - //! /* logical preds: BB1, / linear preds: BB4, BB5, / kind: branch, merge, */ + //>> BB4 + //! /* logical preds: / linear preds: BB2, BB3, / kind: invert, */ + //>> BB5 + //! /* logical preds: BB1, / linear preds: BB4, / kind: branch, */ //>> p_unit_test 2, %_ nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 2); //>> s2: %_ = p_unit_test 3 - //>> BB7 - //! /* logical preds: BB6, / linear preds: BB6, / kind: break, */ + //>> BB6 + //! /* logical preds: BB5, / linear preds: BB5, / kind: break, */ nir_break_if(nb, nir_unit_test_divergent_input(nb, 1, 1, .base = 3)); - //>> BB11 - //! /* logical preds: BB6, / linear preds: BB9, BB10, / kind: uniform, merge, */ + //>> BB10 + //! /* logical preds: BB5, / linear preds: BB8, BB9, / kind: uniform, merge, */ //>> p_cbranch_z %0:exec rarely_taken - //>> BB12 - //! /* logical preds: BB11, / linear preds: BB11, / kind: uniform, */ + //>> BB11 + //! /* logical preds: BB10, / linear preds: BB10, / kind: uniform, */ //>> p_unit_test 4, %_ nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 4); - //>> BB14 - //! /* logical preds: BB12, / linear preds: BB12, BB13, / kind: uniform, continue, */ + //>> BB15 + //! /* logical preds: BB2, BB13, / linear preds: BB13, BB14, / kind: uniform, continue, merge, */ } - nir_pop_loop(nb, NULL); - //>> BB15 - //! /* logical preds: BB7, / linear preds: BB8, / kind: uniform, top-level, loop-exit, */ + nir_pop_loop(nb, loop); + //>> BB16 + //! /* logical preds: BB6, / linear preds: BB7, / kind: uniform, top-level, loop-exit, */ //! p_logical_start //! p_unit_test 5, %_ From e1ea89150355761729901bf3aa8ede582e1935d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Tue, 17 Feb 2026 09:52:35 +0100 Subject: [PATCH 13/15] nir/tests: change opt_loop_peel_initial_break test to not use nir_jump_continue We are going to disallow continue statements without loop continue constructs. Replaced with a test that checks that the optimization is not applied in absense of actual work after the conditional break. --- src/compiler/nir/tests/opt_loop_tests.cpp | 49 ++++++++++------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/src/compiler/nir/tests/opt_loop_tests.cpp b/src/compiler/nir/tests/opt_loop_tests.cpp index affa612d3c7..ebf9141ad43 100644 --- a/src/compiler/nir/tests/opt_loop_tests.cpp +++ b/src/compiler/nir/tests/opt_loop_tests.cpp @@ -565,19 +565,20 @@ TEST_F(nir_opt_loop_test, opt_loop_merge_terminators_skip_merge_if_phis_nested_l )")); } -TEST_F(nir_opt_loop_test, opt_loop_peel_initial_break_ends_with_jump) +TEST_F(nir_opt_loop_test, opt_loop_peel_initial_break_no_work) { + nir_variable *var = nir_variable_create(b->shader, nir_var_shader_temp, + glsl_int_type(), "dummy_var"); + nir_loop *loop = nir_push_loop(b); + /* do_work1() */ + nir_store_var(b, var, nir_imm_int(b, 0), 1); + /* the break we want to move down: */ nir_break_if(b, nir_imm_true(b)); - /* do_work_2: */ - nir_push_if(b, nir_imm_true(b)); - nir_jump(b, nir_jump_continue); - nir_pop_if(b, NULL); - nir_jump(b, nir_jump_return); - + /* No work afterwards. */ nir_pop_loop(b, loop); ASSERT_FALSE(nir_opt_loop(b->shader)); @@ -593,40 +594,32 @@ TEST_F(nir_opt_loop_test, opt_loop_peel_initial_break_ends_with_jump) decl_var shader_out INTERP_MODE_NONE none int out (FRAG_RESULT_DEPTH.x, 0, 0) decl_var ubo INTERP_MODE_NONE none int ubo1 (0, 0, 0) decl_var ubo INTERP_MODE_NONE none int[4] ubo_array (0, 0, 0) + decl_var INTERP_MODE_NONE none int dummy_var decl_function main () (entrypoint) impl main { block b0: // preds: 32 %0 = deref_var &in (shader_in int) 32 %1 = @load_deref (%0) (access=none) - // succs: b1 + // succs: b1 loop { - block b1: // preds: b0 b5 - 1 %2 = load_const (true) - // succs: b2 b3 - if %2 (true) { + block b1: // preds: b0 b4 + 32 %2 = load_const (0x00000000) + 32 %3 = deref_var &dummy_var (shader_temp int) + @store_deref (%3, %2 (0x0)) (wrmask=x, access=none) + 1 %4 = load_const (true) + // succs: b2 b3 + if %4 (true) { block b2:// preds: b1 break - // succs: b8 + // succs: b5 } else { block b3: // preds: b1, succs: b4 } - block b4: // preds: b3 - 1 %3 = load_const (true) - // succs: b5 b6 - if %3 (true) { - block b5:// preds: b4 - continue - // succs: b1 - } else { - block b6: // preds: b4, succs: b7 - } - block b7:// preds: b6 - return - // succs: b9 + block b4: // preds: b3, succs: b1 } - block b8: // preds: b2, succs: b9 - block b9: + block b5: // preds: b2, succs: b6 + block b6: } )")); } From c5dde3c2c0521c7d57f5bf81f334be160d72f563 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Mon, 9 Feb 2026 15:02:32 +0100 Subject: [PATCH 14/15] nir: ensure that loop continue statements always link to continue constructs --- src/compiler/glsl/glsl_to_nir.cpp | 7 ++++--- src/compiler/nir/nir_builder.c | 2 -- src/compiler/nir/nir_control_flow.c | 9 +++++---- src/compiler/spirv/vtn_structured_cfg.c | 3 +++ 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 306fd66126d..e5cc502d849 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -789,11 +789,12 @@ nir_visitor::visit(ir_function_signature *ir) void nir_visitor::visit(ir_loop *ir) { - nir_push_loop(&b); + nir_loop *loop = nir_push_loop(&b); + nir_loop_add_continue_construct(loop); visit_exec_list(&ir->body_instructions, this); - nir_push_continue(&b, NULL); + nir_push_continue(&b, loop); visit_exec_list(&ir->continue_instructions, this); - nir_pop_loop(&b, NULL); + nir_pop_loop(&b, loop); } void diff --git a/src/compiler/nir/nir_builder.c b/src/compiler/nir/nir_builder.c index fad83b190b5..e1321faed51 100644 --- a/src/compiler/nir/nir_builder.c +++ b/src/compiler/nir/nir_builder.c @@ -568,8 +568,6 @@ nir_push_continue(nir_builder *build, nir_loop *loop) loop = nir_cf_node_as_loop(block->cf_node.parent); } - nir_loop_add_continue_construct(loop); - build->cursor = nir_before_cf_list(&loop->continue_list); return loop; } diff --git a/src/compiler/nir/nir_control_flow.c b/src/compiler/nir/nir_control_flow.c index 6156db77f06..ab7f151387b 100644 --- a/src/compiler/nir/nir_control_flow.c +++ b/src/compiler/nir/nir_control_flow.c @@ -278,10 +278,9 @@ block_add_normal_succs(nir_block *block) nir_loop *loop = nir_cf_node_as_loop(parent); nir_block *cont_block; - if (block == nir_loop_last_block(loop)) { - cont_block = nir_loop_continue_target(loop); + if (block == nir_loop_last_block(loop) && nir_loop_has_continue_construct(loop)) { + cont_block = nir_loop_first_continue_block(loop); } else { - assert(block == nir_loop_last_continue_block(loop)); cont_block = nir_loop_first_block(loop); } @@ -438,6 +437,7 @@ nir_loop_add_continue_construct(nir_loop *loop) /* change predecessors and successors */ nir_block *header = nir_loop_first_block(loop); nir_block *preheader = nir_block_cf_tree_prev(header); + assert(header->predecessors.entries <= 2); set_foreach(&header->predecessors, entry) { nir_block *pred = (nir_block *)entry->key; if (pred != preheader) @@ -455,6 +455,7 @@ nir_loop_remove_continue_construct(nir_loop *loop) /* change predecessors and successors */ nir_block *header = nir_loop_first_block(loop); nir_block *cont = nir_loop_first_continue_block(loop); + assert(cont->predecessors.entries <= 2); set_foreach(&cont->predecessors, entry) { nir_block *pred = (nir_block *)entry->key; replace_successor(pred, cont, header); @@ -513,7 +514,7 @@ nir_handle_add_jump(nir_block *block) case nir_jump_continue: { nir_loop *loop = nearest_loop(&block->cf_node); - nir_block *cont_block = nir_loop_continue_target(loop); + nir_block *cont_block = nir_loop_first_continue_block(loop); link_blocks(block, cont_block, NULL); break; } diff --git a/src/compiler/spirv/vtn_structured_cfg.c b/src/compiler/spirv/vtn_structured_cfg.c index af8cdd85f83..a1fdc84172a 100644 --- a/src/compiler/spirv/vtn_structured_cfg.c +++ b/src/compiler/spirv/vtn_structured_cfg.c @@ -1676,6 +1676,9 @@ vtn_emit_cf_func_structured(struct vtn_builder *b, struct vtn_function *func, next->nloop = nir_push_loop(&b->nb); nir_store_var(&b->nb, next->continue_var, nir_imm_false(&b->nb), 1); + if (!vtn_is_single_block_loop(next)) + nir_loop_add_continue_construct(next->nloop); + next->nloop->control = vtn_loop_control(b, block->merge[3]); break; From 3297557c7413eb4ebc501c61c243459ac97d5f8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Mon, 9 Feb 2026 15:03:12 +0100 Subject: [PATCH 15/15] nir: validate that loop continue statements always link to continue constructs --- src/compiler/nir/nir.h | 12 ------------ src/compiler/nir/nir_validate.c | 21 ++++++++++++--------- 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 9aba4055fbb..80188c697bc 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3815,18 +3815,6 @@ nir_loop_last_continue_block(nir_loop *loop) return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); } -/** - * Return the target block of a nir_jump_continue statement - */ -static inline nir_block * -nir_loop_continue_target(nir_loop *loop) -{ - if (nir_loop_has_continue_construct(loop)) - return nir_loop_first_continue_block(loop); - else - return nir_loop_first_block(loop); -} - /** * Return true if this list of cf_nodes contains a single empty block. */ diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c index fa45b5d9553..068e936df85 100644 --- a/src/compiler/nir/nir_validate.c +++ b/src/compiler/nir/nir_validate.c @@ -1244,8 +1244,11 @@ validate_jump_instr(nir_jump_instr *instr, validate_state *state) validate_assert(state, state->impl->structured); validate_assert(state, state->loop != NULL); if (state->loop) { - nir_block *cont_block = nir_loop_continue_target(state->loop); - validate_assert(state, block->successors[0] == cont_block); + validate_assert(state, nir_loop_has_continue_construct(state->loop)); + if (nir_loop_has_continue_construct(state->loop)) { + nir_block *cont_block = nir_loop_first_continue_block(state->loop); + validate_assert(state, block->successors[0] == cont_block); + } } validate_assert(state, block->successors[1] == NULL); validate_assert(state, instr->target == NULL); @@ -1496,14 +1499,13 @@ validate_block(nir_block *block, validate_state *state) if (next == NULL) { switch (state->parent_node->type) { case nir_cf_node_loop: { - if (block == nir_loop_last_block(state->loop)) { - nir_block *cont = nir_loop_continue_target(state->loop); - validate_assert(state, block->successors[0] == cont); + if (!nir_loop_has_continue_construct(state->loop) || + block == nir_loop_last_continue_block(state->loop)) { + nir_block *header = nir_loop_first_block(state->loop); + validate_assert(state, block->successors[0] == header); } else { - validate_assert(state, nir_loop_has_continue_construct(state->loop) && - block == nir_loop_last_continue_block(state->loop)); - nir_block *head = nir_loop_first_block(state->loop); - validate_assert(state, block->successors[0] == head); + nir_block *cont = nir_loop_first_continue_block(state->loop); + validate_assert(state, block->successors[0] == cont); } /* due to the hack for infinite loops, block->successors[1] may * point to the block after the loop. @@ -1610,6 +1612,7 @@ validate_loop(nir_loop *loop, validate_state *state) validate_assert(state, next_node->type == nir_cf_node_block); validate_assert(state, !exec_list_is_empty(&loop->body)); + validate_assert(state, nir_loop_first_block(loop)->predecessors.entries <= 2); nir_cf_node *old_parent = state->parent_node; state->parent_node = &loop->cf_node;