Merge branch 'nir_loop_continue' into 'main'

nir: Remove nir_jump_continue

See merge request mesa/mesa!39942
This commit is contained in:
Daniel Schürmann 2026-03-11 04:57:49 +00:00
commit bbc0dff0df
25 changed files with 487 additions and 220 deletions

View file

@ -1768,6 +1768,8 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
nir_validate_shader(shader, "after emitting NGG VS/TES");
/* Cleanup */
if (state.streamout_enabled)
nir_lower_continue_constructs(shader);
nir_opt_dead_write_vars(shader);
nir_lower_vars_to_ssa(shader);
nir_remove_dead_variables(shader, nir_var_function_temp, NULL);

View file

@ -929,6 +929,8 @@ ac_nir_lower_ngg_gs(nir_shader *shader, const ac_nir_lower_ngg_options *options,
nir_validate_shader(shader, "after emitting NGG GS");
/* Cleanup */
if (state.streamout_enabled)
nir_lower_continue_constructs(shader);
nir_lower_vars_to_ssa(shader);
nir_remove_dead_variables(shader, nir_var_function_temp, NULL);

View file

@ -1050,6 +1050,8 @@ ac_nir_ngg_build_streamout_buffer_info(nir_builder *b,
nir_loop *loop = nir_push_loop(b);
{
nir_loop_add_continue_construct(loop);
for (unsigned i = 0; i < NUM_ATOMICS_IN_FLIGHT; i++) {
int issue_index = (NUM_ATOMICS_IN_FLIGHT - 1 + i) % NUM_ATOMICS_IN_FLIGHT;
int read_index = i;

View file

@ -410,6 +410,7 @@ void
finish_isel_test(enum ac_hw_stage hw_stage, unsigned wave_size)
{
nir_validate_shader(nb->shader, "in finish_isel_test");
nir_lower_continue_constructs(nb->shader);
program.reset(new Program);
program->debug.func = nullptr;

View file

@ -376,6 +376,19 @@ END_TEST
* // unreachable block
* break;
* }
*
* after nir_lower_continue_constructs() and sanitize_if():
*
* loop {
* if (uniform) {
* cont = true;
* } else {
* cont = true;
* }
* if (false) {
* break;
* }
* }
*/
BEGIN_TEST(isel.cf.unreachable_break.uniform_continue)
if (!setup_nir_cs(GFX11))
@ -388,46 +401,49 @@ BEGIN_TEST(isel.cf.unreachable_break.uniform_continue)
//>> s3: %val1 = p_create_vector 0, 0, 0
//>> s1: %val0 = p_parallelcopy 0
nir_push_loop(nb);
nir_loop *loop = nir_push_loop(nb);
nir_loop_add_continue_construct(loop);
{
//>> BB1
//! /* logical preds: BB0, BB2, BB5, / linear preds: BB0, BB2, BB5, / kind: uniform, loop-header, */
//! /* logical preds: BB0, BB6, / linear preds: BB0, BB6, / kind: uniform, loop-header, */
nir_push_if(nb, nir_unit_test_uniform_input(nb, 1, 1, .base=2));
{
//>> BB2
//! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, continue, */
//! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, */
nir_jump(nb, nir_jump_continue);
}
nir_push_else(nb, NULL);
{
/* The contents of this branch is moved to the merge block, and a dummy break is inserted
* before the continue so that the loop has an exit.
*/
//>> BB3
//! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, */
//! p_logical_start
//! s1: %_ = p_unit_test 5
//! s2: %zero = p_parallelcopy 0
//! s2: %_, s1: %cond:scc = s_and_b64 %zero, %0:exec
//! p_logical_end
//! p_cbranch_z %cond:scc
//! BB4
//! /* logical preds: BB3, / linear preds: BB3, / kind: uniform, break, */
//>> BB5
//! /* logical preds: BB3, / linear preds: BB3, / kind: uniform, continue, */
nir_unit_test_uniform_input(nb, 1, 32, .base=5);
nir_jump(nb, nir_jump_continue);
}
nir_pop_if(nb, NULL);
/* The unreachable break is removed when lowering the continues. However,
* a dummy break is inserted, so that the loop has an exit.
*/
//>> BB4
//! /* logical preds: BB2, BB3, / linear preds: BB2, BB3, / kind: uniform, */
//! p_logical_start
//! s2: %zero = p_parallelcopy 0
//! s2: %_, s1: %cond:scc = s_and_b64 %zero, %0:exec
//! p_logical_end
//! p_cbranch_z %cond:scc
//! BB5
//! /* logical preds: BB4, / linear preds: BB4, / kind: uniform, break, */
//>> BB6
//! /* logical preds: BB4, / linear preds: BB4, / kind: uniform, continue, */
val0 = nir_imm_zero(nb, 1, 32);
val1 = nir_load_local_invocation_id(nb);
nir_jump(nb, nir_jump_break);
}
nir_pop_loop(nb, NULL);
//>> BB6
//! /* logical preds: BB4, / linear preds: BB4, / kind: uniform, top-level, loop-exit, */
nir_pop_loop(nb, loop);
//>> BB7
//! /* logical preds: BB5, / linear preds: BB5, / kind: uniform, top-level, loop-exit, */
//>> p_unit_test 0, %val0
//! p_unit_test 1, %val1
@ -645,7 +661,8 @@ BEGIN_TEST(isel.cf.unreachable_loop_exit)
if (!setup_nir_cs(GFX11))
return;
nir_push_loop(nb);
nir_loop *loop = nir_push_loop(nb);
nir_loop_add_continue_construct(loop);
{
/* A dummy break is inserted before the continue so that the loop has an exit. */
//>> BB1
@ -661,7 +678,7 @@ BEGIN_TEST(isel.cf.unreachable_loop_exit)
nir_unit_test_uniform_input(nb, 1, 32, .base=0);
nir_jump(nb, nir_jump_continue);
}
nir_pop_loop(nb, NULL);
nir_pop_loop(nb, loop);
finish_isel_test();
END_TEST
@ -720,19 +737,40 @@ END_TEST
* }
* use(val);
* }
*
* after nir_lower_continue_constructs() and sanitize_if():
*
* loop {
* if (divergent) {
* } else {
* if (uniform) {
* break;
* }
* val = uniform;
* use(val);
* }
* }
*/
BEGIN_TEST(isel.cf.uniform_if_branch_use)
if (!setup_nir_cs(GFX11))
return;
nir_push_loop(nb);
nir_loop *loop = nir_push_loop(nb);
nir_loop_add_continue_construct(loop);
{
//>> BB1
//! /* logical preds: BB0, BB15, / linear preds: BB0, BB15, / kind: loop-header, branch, */
//>> s2: %_ = p_unit_test 3
nir_push_if(nb, nir_unit_test_divergent_input(nb, 1, 1, .base=3));
{
nir_jump(nb, nir_jump_continue);
}
nir_pop_if(nb, NULL);
//>> BB4
//! /* logical preds: / linear preds: BB2, BB3, / kind: invert, */
//>> BB5
//! /* logical preds: BB1, / linear preds: BB4, / kind: uniform, */
//>> s2: %cond = p_unit_test 2
//! s2: %_, s1: %_:scc = s_and_b64 %cond, %0:exec
//! p_logical_end
@ -740,31 +778,31 @@ BEGIN_TEST(isel.cf.uniform_if_branch_use)
nir_def *val;
nir_push_if(nb, nir_unit_test_uniform_input(nb, 1, 1, .base=2));
{
//>> BB7
//! /* logical preds: BB6, / linear preds: BB6, / kind: break, */
//>> BB6
//! /* logical preds: BB5, / linear preds: BB5, / kind: break, */
nir_jump(nb, nir_jump_break);
}
nir_push_else(nb, NULL);
{
/* The contents of this branch is moved to the merge block. */
//>> BB11
//! /* logical preds: BB10, / linear preds: BB9, BB10, / kind: uniform, */
//>> BB10
//! /* logical preds: BB9, / linear preds: BB8, BB9, / kind: uniform, */
//>> p_cbranch_z %0:exec rarely_taken
//! BB12
//! /* logical preds: BB11, / linear preds: BB11, / kind: uniform, */
//! BB11
//! /* logical preds: BB10, / linear preds: BB10, / kind: uniform, */
//! p_logical_start
//! s1: %val = p_unit_test 0
//! p_unit_test 1, %val
val = nir_unit_test_uniform_input(nb, 1, 32, .base=0);
}
nir_pop_if(nb, NULL);
//! p_unit_test 1, %val
nir_unit_test_output(nb, val, .base=1);
//>> BB14
//! /* logical preds: BB12, / linear preds: BB12, BB13, / kind: uniform, continue, */
//>> BB15
//! /* logical preds: BB2, BB13, / linear preds: BB13, BB14, / kind: uniform, continue, merge, */
}
nir_pop_loop(nb, NULL);
nir_pop_loop(nb, loop);
finish_isel_test();
END_TEST
@ -780,6 +818,17 @@ END_TEST
* d = c or undef
* break
* }
*
* after nir_lower_continue_constructs() and sanitize_if():
*
* b = ...
* loop {
* a = linear_phi b, c
* if (!divergent) {
* break
* }
* c = ...
* }
*/
BEGIN_TEST(isel.cf.hidden_continue)
if (!setup_nir_cs(GFX11))
@ -789,35 +838,38 @@ BEGIN_TEST(isel.cf.hidden_continue)
nir_def* init = nir_unit_test_uniform_input(nb, 1, 32, .base = 0);
nir_phi_instr* phi;
nir_loop* loop = nir_push_loop(nb);
nir_loop *loop = nir_push_loop(nb);
nir_loop_add_continue_construct(loop);
{
//>> BB1
//! /* logical preds: BB0, BB2, / linear preds: BB0, BB3, BB8, / kind: loop-header, branch, */
//! s1: %2 = p_linear_phi %init, %cont, %phi
//! /* logical preds: BB0, BB6, / linear preds: BB0, BB6, / kind: loop-header, branch, */
//! s1: %2 = p_linear_phi %init, %cont
phi = nir_phi_instr_create(nb->shader);
nir_def_init(&phi->instr, &phi->def, 1, 32);
nir_phi_instr_add_src(phi, nir_def_block(init), init);
//>> s2: %cond = p_unit_test 4
//! s2: %inverse_cond, s1: %_:scc = s_not_b64 %cond
//>> p_cbranch_z %inverse_cond
//>> BB2
//! /* logical preds: BB1, / linear preds: BB1, / kind: break, */
nir_push_if(nb, nir_unit_test_divergent_input(nb, 1, 1, .base = 4));
{
//>> BB2
//! /* logical preds: BB1, / linear preds: BB1, / kind: continue, */
//>> BB6
//! /* logical preds: BB1, / linear preds: BB4, BB5, / kind: uniform, continue, merge, */
//! p_logical_start
//! s1: %cont = p_unit_test 1
nir_def* cont = nir_unit_test_uniform_input(nb, 1, 32, .base = 1);
nir_phi_instr_add_src(phi, nir_def_block(cont), cont);
nir_phi_instr_add_src(phi, nir_loop_first_continue_block(loop), cont);
nir_jump(nb, nir_jump_continue);
}
nir_pop_if(nb, NULL);
//>> BB6
//! /* logical preds: BB1, / linear preds: BB4, BB5, / kind: break, merge, */
//! s1: %phi = p_linear_phi %cont, s1: undef
//>> BB8
//! /* logical preds: / linear preds: BB6, / kind: uniform, continue, */
nir_jump(nb, nir_jump_break);
}
nir_pop_loop(nb, NULL);
//>> BB7
//! /* logical preds: BB2, / linear preds: BB3, / kind: uniform, top-level, loop-exit, */
nir_pop_loop(nb, loop);
nb->cursor = nir_after_phis(nir_loop_first_block(loop));
nir_builder_instr_insert(nb, &phi->instr);
@ -1191,15 +1243,35 @@ END_TEST
* if (divergent) {
* continue
* }
* unit_test 3
* //potentially empty
* }
* unit_test 4
* }
*
* after nir_lower_continue_constructs() and sanitize_if():
*
* loop {
* if (divergent) {
* if (divergent) {
* cont = true
* } else {
* unit_test 3
* //potentially empty
* }
* }
* if (cont) {
* } else {
* unit_test 4
* }
* }
*/
BEGIN_TEST(isel.cf.empty_exec.loop_continue)
if (!setup_nir_cs(GFX11))
return;
nir_push_loop(nb);
nir_loop *loop = nir_push_loop(nb);
nir_loop_add_continue_construct(loop);
{
nir_break_if(nb, nir_imm_false(nb));
@ -1214,28 +1286,39 @@ BEGIN_TEST(isel.cf.empty_exec.loop_continue)
nir_push_if(nb, nir_unit_test_divergent_input(nb, 1, 1, .base = 2));
{
//>> BB5
//>> /* logical preds: BB4, / linear preds: BB4, / kind: continue, */
//! /* logical preds: BB4, / linear preds: BB4, / kind: uniform, */
//>> s2: %_ = p_parallelcopy -1
//>> s2: %cont1 = p_parallelcopy %0:exec
nir_jump(nb, nir_jump_continue);
}
nir_pop_if(nb, NULL);
//>> BB9
//! /* logical preds: BB4, / linear preds: BB7, BB8, / kind: uniform, merge, */
//>> p_cbranch_z %0:exec rarely_taken
//>> BB10
//>> BB8
//! /* logical preds: BB4, / linear preds: BB7, / kind: uniform, */
//>> p_unit_test 3, %_
//>> BB10
//! /* logical preds: BB5, BB8, / linear preds: BB8, BB9, / kind: uniform, merge, */
//! s2: %cont2 = p_linear_phi %cont1, %cont1
nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 3);
}
nir_pop_if(nb, NULL);
//>> BB17
//! /* logical preds: BB12, BB15, / linear preds: BB15, BB16, / kind: uniform, continue, merge, */
//! p_logical_start
//! p_unit_test 4, %_
//>> BB12
//! /* logical preds: / linear preds: BB10, BB11, / kind: invert, */
//! s2: %tmp = p_linear_phi %cont2, s2: undef
//! s2: %cont3, s1: %16:scc = s_and_b64 %tmp, %0:exec
//>> BB15
//! /* logical preds: BB10, BB13, / linear preds: BB13, BB14, / kind: branch, merge, */
//! s2: %cont = p_linear_phi %cont3, %cont3
//>> p_cbranch_z %cont
//>> BB19
//! /* logical preds: BB15, / linear preds: BB18, / kind: uniform, */
//>> p_unit_test 4, %_
nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 4);
//>> BB21
//! /* logical preds: BB16, BB19, / linear preds: BB19, BB20, / kind: uniform, continue, merge, */
}
nir_pop_loop(nb, NULL);
//>> BB18
nir_pop_loop(nb, loop);
//>> BB22
//! /* logical preds: BB2, / linear preds: BB2, / kind: uniform, top-level, loop-exit, */
//! p_logical_start
@ -1255,15 +1338,28 @@ END_TEST
* }
* //potentially empty
* }
*
* after nir_lower_continue_constructs() and sanitize_if():
*
* loop {
* if (divergent) {
* } else {
* if (divergent) {
* break
* }
* //potentially empty
* }
* }
*/
BEGIN_TEST(isel.cf.empty_exec.loop_continue_then_break)
if (!setup_nir_cs(GFX11))
return;
nir_push_loop(nb);
nir_loop *loop = nir_push_loop(nb);
nir_loop_add_continue_construct(loop);
{
//>> BB1
//! /* logical preds: BB0, BB2, BB14, / linear preds: BB0, BB3, BB14, / kind: loop-header, branch, */
//! /* logical preds: BB0, BB15, / linear preds: BB0, BB15, / kind: loop-header, branch, */
//>> p_unit_test 0, %_
nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 0);
@ -1271,35 +1367,37 @@ BEGIN_TEST(isel.cf.empty_exec.loop_continue_then_break)
nir_push_if(nb, nir_unit_test_divergent_input(nb, 1, 1, .base = 1));
{
//>> BB2
//! /* logical preds: BB1, / linear preds: BB1, / kind: continue, */
//! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, */
nir_jump(nb, nir_jump_continue);
}
nir_pop_if(nb, NULL);
//>> BB6
//! /* logical preds: BB1, / linear preds: BB4, BB5, / kind: branch, merge, */
//>> BB4
//! /* logical preds: / linear preds: BB2, BB3, / kind: invert, */
//>> BB5
//! /* logical preds: BB1, / linear preds: BB4, / kind: branch, */
//>> p_unit_test 2, %_
nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 2);
//>> s2: %_ = p_unit_test 3
//>> BB7
//! /* logical preds: BB6, / linear preds: BB6, / kind: break, */
//>> BB6
//! /* logical preds: BB5, / linear preds: BB5, / kind: break, */
nir_break_if(nb, nir_unit_test_divergent_input(nb, 1, 1, .base = 3));
//>> BB11
//! /* logical preds: BB6, / linear preds: BB9, BB10, / kind: uniform, merge, */
//>> BB10
//! /* logical preds: BB5, / linear preds: BB8, BB9, / kind: uniform, merge, */
//>> p_cbranch_z %0:exec rarely_taken
//>> BB12
//! /* logical preds: BB11, / linear preds: BB11, / kind: uniform, */
//>> BB11
//! /* logical preds: BB10, / linear preds: BB10, / kind: uniform, */
//>> p_unit_test 4, %_
nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 4);
//>> BB14
//! /* logical preds: BB12, / linear preds: BB12, BB13, / kind: uniform, continue, */
//>> BB15
//! /* logical preds: BB2, BB13, / linear preds: BB13, BB14, / kind: uniform, continue, merge, */
}
nir_pop_loop(nb, NULL);
//>> BB15
//! /* logical preds: BB7, / linear preds: BB8, / kind: uniform, top-level, loop-exit, */
nir_pop_loop(nb, loop);
//>> BB16
//! /* logical preds: BB6, / linear preds: BB7, / kind: uniform, top-level, loop-exit, */
//! p_logical_start
//! p_unit_test 5, %_

View file

@ -708,6 +708,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device
ralloc_free(query_ht);
if (progress) {
NIR_PASS(_, shader, nir_lower_continue_constructs);
NIR_PASS(_, shader, nir_split_struct_vars, nir_var_shader_temp);
NIR_PASS(_, shader, nir_lower_global_vars_to_local);
NIR_PASS(_, shader, nir_lower_vars_to_ssa);

View file

@ -857,8 +857,11 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
nir_def *desc = create_bvh_descriptor(b, pdev, &ray_flags);
nir_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0);
nir_push_loop(b);
nir_loop *loop = nir_push_loop(b);
{
if (!args->use_bvh_stack_rtn)
nir_loop_add_continue_construct(loop);
/* When exiting instances via stack, current_node won't ever be invalid with ds_bvh_stack_rtn */
if (args->use_bvh_stack_rtn) {
/* Early-exit when the stack is empty and there are no more nodes to process. */
@ -1154,8 +1157,11 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const
nir_def *desc = create_bvh_descriptor(b, pdev, &ray_flags);
nir_push_loop(b);
nir_loop *loop = nir_push_loop(b);
{
if (!args->use_bvh_stack_rtn)
nir_loop_add_continue_construct(loop);
/* When exiting instances via stack, current_node won't ever be invalid with ds_bvh_stack_rtn */
if (args->use_bvh_stack_rtn) {
/* Early-exit when the stack is empty and there are no more nodes to process. */

View file

@ -1276,6 +1276,7 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
radv_build_end_trace_token(b, &data, nir_load_var(b, iteration_instance_count));
nir_progress(true, b->impl, nir_metadata_none);
nir_lower_continue_constructs(b->shader);
radv_nir_lower_hit_attrib_derefs(b->shader);
return data.trav_vars.result;

View file

@ -1754,8 +1754,10 @@ dgc_alloc_push_constant(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *se
nir_variable *idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "idx");
nir_store_var(b, idx, nir_imm_int(b, 0), 0x1);
nir_push_loop(b);
nir_loop *loop = nir_push_loop(b);
{
nir_loop_add_continue_construct(loop);
nir_def *cur_idx = nir_load_var(b, idx);
nir_break_if(b, nir_ieq(b, cur_idx, load_param8(b, push_constant_size)));
@ -1777,7 +1779,7 @@ dgc_alloc_push_constant(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *se
nir_store_var(b, idx, nir_iadd_imm(b, cur_idx, 1), 0x1);
}
nir_pop_loop(b, NULL);
nir_pop_loop(b, loop);
/* Store push constants set by DGC tokens. */
u_foreach_bit64 (i, layout->push_constant_mask) {
@ -2025,8 +2027,10 @@ dgc_emit_vertex_buffer(struct dgc_cmdbuf *cs, nir_def *stream_addr)
nir_variable *vbo_idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "vbo_idx");
nir_store_var(b, vbo_idx, nir_imm_int(b, 0), 0x1);
nir_push_loop(b);
nir_loop *loop = nir_push_loop(b);
{
nir_loop_add_continue_construct(loop);
nir_def *cur_idx = nir_load_var(b, vbo_idx);
nir_break_if(b, nir_uge_imm(b, cur_idx, 32 /* bits in vb_desc_usage_mask */));
@ -2097,7 +2101,7 @@ dgc_emit_vertex_buffer(struct dgc_cmdbuf *cs, nir_def *stream_addr)
nir_store_var(b, vbo_idx, nir_iadd_imm(b, cur_idx, 1), 0x1);
}
nir_pop_loop(b, NULL);
nir_pop_loop(b, loop);
}
/**
@ -2959,6 +2963,8 @@ build_dgc_prepare_shader(struct radv_device *dev, struct radv_indirect_command_l
}
nir_pop_if(&b, NULL);
nir_lower_continue_constructs(b.shader);
return b.shader;
}

View file

@ -789,11 +789,12 @@ nir_visitor::visit(ir_function_signature *ir)
void
nir_visitor::visit(ir_loop *ir)
{
nir_push_loop(&b);
nir_loop *loop = nir_push_loop(&b);
nir_loop_add_continue_construct(loop);
visit_exec_list(&ir->body_instructions, this);
nir_push_continue(&b, NULL);
nir_push_continue(&b, loop);
visit_exec_list(&ir->continue_instructions, this);
nir_pop_loop(&b, NULL);
nir_pop_loop(&b, loop);
}
void

View file

@ -3825,18 +3825,6 @@ nir_loop_last_continue_block(nir_loop *loop)
return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node));
}
/**
* Return the target block of a nir_jump_continue statement
*/
static inline nir_block *
nir_loop_continue_target(nir_loop *loop)
{
if (nir_loop_has_continue_construct(loop))
return nir_loop_first_continue_block(loop);
else
return nir_loop_first_block(loop);
}
/**
* Return true if this list of cf_nodes contains a single empty block.
*/

View file

@ -568,8 +568,6 @@ nir_push_continue(nir_builder *build, nir_loop *loop)
loop = nir_cf_node_as_loop(block->cf_node.parent);
}
nir_loop_add_continue_construct(loop);
build->cursor = nir_before_cf_list(&loop->continue_list);
return loop;
}

View file

@ -278,10 +278,9 @@ block_add_normal_succs(nir_block *block)
nir_loop *loop = nir_cf_node_as_loop(parent);
nir_block *cont_block;
if (block == nir_loop_last_block(loop)) {
cont_block = nir_loop_continue_target(loop);
if (block == nir_loop_last_block(loop) && nir_loop_has_continue_construct(loop)) {
cont_block = nir_loop_first_continue_block(loop);
} else {
assert(block == nir_loop_last_continue_block(loop));
cont_block = nir_loop_first_block(loop);
}
@ -438,6 +437,7 @@ nir_loop_add_continue_construct(nir_loop *loop)
/* change predecessors and successors */
nir_block *header = nir_loop_first_block(loop);
nir_block *preheader = nir_block_cf_tree_prev(header);
assert(header->predecessors.entries <= 2);
set_foreach(&header->predecessors, entry) {
nir_block *pred = (nir_block *)entry->key;
if (pred != preheader)
@ -455,6 +455,7 @@ nir_loop_remove_continue_construct(nir_loop *loop)
/* change predecessors and successors */
nir_block *header = nir_loop_first_block(loop);
nir_block *cont = nir_loop_first_continue_block(loop);
assert(cont->predecessors.entries <= 2);
set_foreach(&cont->predecessors, entry) {
nir_block *pred = (nir_block *)entry->key;
replace_successor(pred, cont, header);
@ -513,7 +514,7 @@ nir_handle_add_jump(nir_block *block)
case nir_jump_continue: {
nir_loop *loop = nearest_loop(&block->cf_node);
nir_block *cont_block = nir_loop_continue_target(loop);
nir_block *cont_block = nir_loop_first_continue_block(loop);
link_blocks(block, cont_block, NULL);
break;
}

View file

@ -26,75 +26,216 @@
#include "nir_builder.h"
#include "nir_control_flow.h"
/* NIR pass to lower loop continue constructs.
*
* NIR loops are maintained in canonical form with these properties:
* - a pre-header: the only predecessor of the loop header
* - a dedicated exit node: dominated by the loop-header
* - a single back-edge to the loop header: the trivial continue
*
* If the loop has a continue construct, the trivial continue is the
* back-edge from the last block of the continue construct to the loop
* header. Otherwise, it is the back-edge from the last block of the
* loop body to the loop header.
*
* In order to lower the continue construct of a loop, all continue
* statements are being removed by either
* - moving the following code to the other side of a branch or
* - guarding following code by inserted IF-statements
*
* Afterwards, the continue construct is inlined before the trivial
* back-edge.
*
*/
struct loop_simplify_state {
nir_builder *b;
nir_def *continue_flag;
struct exec_list *cf_list;
};
static bool
lower_loop_continue_block(nir_builder *b, nir_loop *loop, bool *repair_ssa)
block_ends_in_continue(nir_block *block)
{
if (nir_block_ends_in_jump(block)) {
nir_jump_instr *jump = nir_instr_as_jump(nir_block_last_instr(block));
return jump->type == nir_jump_continue;
}
return false;
}
static bool
lower_continues_in_cf_list(struct exec_list *cf_list,
struct loop_simplify_state *state);
static bool
lower_continue(nir_block *block, struct loop_simplify_state *state)
{
if (!block_ends_in_continue(block))
return false;
assert(nir_cf_node_is_last(&block->cf_node));
/* Remove the continue instruction and set the predicate to 'true'. */
state->b->cursor = nir_instr_remove(nir_block_last_instr(block));
nir_store_reg(state->b, nir_imm_true(state->b), state->continue_flag);
return true;
}
static bool
lower_continues_in_if(nir_if *nif, struct loop_simplify_state *state)
{
nir_block *then_block = nir_if_last_then_block(nif);
nir_block *else_block = nir_if_last_else_block(nif);
bool then_jumps = nir_block_ends_in_jump(then_block);
bool else_jumps = nir_block_ends_in_jump(else_block);
bool progress = false;
progress |= lower_continue(then_block, state);
progress |= lower_continue(else_block, state);
nir_block *next_block = nir_cf_node_cf_tree_next(&nif->cf_node);
bool is_empty_block = nir_cf_node_is_last(&next_block->cf_node) &&
exec_list_is_empty(&next_block->instr_list);
/* If a branch leg ends in a jump, we lower already any continue statements,
* so that we know if we have to move the following blocks to the other side.
*/
if (then_jumps)
progress |= lower_continues_in_cf_list(&nif->then_list, state);
if (else_jumps)
progress |= lower_continues_in_cf_list(&nif->else_list, state);
if (!is_empty_block && progress) {
/* If at least one side has a continue statement, move the following code
* to the other side. This is necessary to maintain SSA dominance.
*/
nir_cf_list list;
nir_cf_extract(&list, nir_after_cf_node_and_phis(&nif->cf_node),
nir_after_cf_list(state->cf_list));
if (then_jumps && else_jumps) {
/* Both branches jump, just delete instructions following the IF. */
nir_cf_delete(&list);
} else if (then_jumps) {
nir_cf_reinsert(&list, nir_after_cf_list(&nif->else_list));
} else {
nir_cf_reinsert(&list, nir_after_cf_list(&nif->then_list));
}
/* The successor is now empty. No need to predicate following blocks. */
is_empty_block = true;
}
/* Recursively lower any continue statements in both branch legs. */
if (!then_jumps)
progress |= lower_continues_in_cf_list(&nif->then_list, state);
if (!else_jumps)
progress |= lower_continues_in_cf_list(&nif->else_list, state);
if (!is_empty_block && progress) {
/* Predicate following blocks. */
nir_cf_list list;
nir_cf_extract(&list, nir_after_cf_node_and_phis(&nif->cf_node),
nir_after_cf_list(state->cf_list));
state->b->cursor = nir_after_cf_node_and_phis(&nif->cf_node);
nir_if *if_stmt = nir_push_if(state->b, nir_load_reg(state->b, state->continue_flag));
assert(!exec_list_is_empty(&list.list));
nir_cf_reinsert(&list, nir_before_cf_list(&if_stmt->else_list));
nir_pop_if(state->b, NULL);
}
return progress;
}
static bool
lower_continues_in_cf_list(struct exec_list *cf_list,
struct loop_simplify_state *state)
{
bool progress = false;
struct exec_list *parent_list = state->cf_list;
state->cf_list = cf_list;
/* We iterate over the list backwards because any given lower call may
* take everything following the given CF node and predicate it. In
* order to avoid recursion/iteration problems, we want everything after
* a given node to already be lowered before this happens.
*/
foreach_list_typed_reverse_safe(nir_cf_node, node, node, cf_list) {
switch (node->type) {
case nir_cf_node_if:
if (lower_continues_in_if(nir_cf_node_as_if(node), state))
progress = true;
break;
case nir_cf_node_block:
case nir_cf_node_loop:
break;
default:
UNREACHABLE("Invalid inner CF node type");
}
}
state->cf_list = parent_list;
return progress;
}
static void
simplify_loop(nir_loop *loop)
{
nir_block *cont = nir_loop_first_continue_block(loop);
nir_block *last = nir_loop_last_block(loop);
/* Remove trivial continue statement. */
if (block_ends_in_continue(last))
nir_instr_remove_v(nir_block_last_instr(last));
/* If the loop has only the trivial continue, there is nothing to do. */
if (!nir_block_ends_in_jump(last) && cont->predecessors.entries == 1)
return;
struct loop_simplify_state state;
nir_builder b = nir_builder_at(nir_before_block_after_phis(nir_loop_first_block(loop)));
state.b = &b;
/* Initialize the variable to False. */
state.continue_flag = nir_decl_reg(&b, 1, 1, 0);
nir_store_reg(&b, nir_imm_false(&b), state.continue_flag);
lower_continues_in_cf_list(&loop->body, &state);
return;
}
static bool
lower_loop_continue_block(nir_builder *b, nir_loop *loop)
{
if (!nir_loop_has_continue_construct(loop))
return false;
nir_block *header = nir_loop_first_block(loop);
nir_block *cont = nir_loop_first_continue_block(loop);
/* Lower loop header and continue-phis to regs as we are going to move the predecessors. */
nir_lower_phis_to_regs_block(nir_loop_first_block(loop), true);
nir_lower_phis_to_regs_block(nir_loop_first_continue_block(loop), true);
/* count continue statements excluding unreachable ones */
unsigned num_continue = 0;
nir_block *single_predecessor = NULL;
set_foreach(&cont->predecessors, entry) {
nir_block *pred = (nir_block *)entry->key;
/* If the continue block has no predecessors, it is unreachable. */
if (pred->predecessors.entries == 0)
continue;
/* Simplify the loop in order to ensure that it has at most one back-edge. */
simplify_loop(loop);
single_predecessor = pred;
if (num_continue++)
break;
}
nir_cf_list extracted;
nir_cf_list_extract(&extracted, &loop->continue_list);
nir_lower_phis_to_regs_block(header, false);
if (num_continue == 0) {
/* this loop doesn't continue at all. delete the continue construct */
nir_cf_list extracted;
nir_cf_list_extract(&extracted, &loop->continue_list);
if (nir_loop_first_continue_block(loop)->predecessors.entries == 0) {
/* This loop doesn't continue at all. Delete the continue construct. */
nir_cf_delete(&extracted);
} else if (num_continue == 1) {
/* inline the continue construct */
assert(single_predecessor->successors[0] == cont);
assert(single_predecessor->successors[1] == NULL);
nir_cf_list extracted;
nir_cf_list_extract(&extracted, &loop->continue_list);
nir_cf_reinsert(&extracted,
nir_after_block_before_jump(single_predecessor));
} else {
nir_lower_phis_to_regs_block(cont, false);
*repair_ssa = true;
/* As control flow has to re-converge before executing the continue
* construct, we insert it at the beginning of the loop with a flag
* to ensure that it doesn't get executed in the first iteration:
*
* loop {
* if (i != 0) {
* continue construct
* }
* loop body
* }
*/
nir_variable *do_cont =
nir_local_variable_create(b->impl, glsl_bool_type(), "cont");
b->cursor = nir_before_cf_node(&loop->cf_node);
nir_store_var(b, do_cont, nir_imm_false(b), 1);
b->cursor = nir_before_block(header);
nir_if *cont_if = nir_push_if(b, nir_load_var(b, do_cont));
{
nir_cf_list extracted;
nir_cf_list_extract(&extracted, &loop->continue_list);
nir_cf_reinsert(&extracted, nir_before_cf_list(&cont_if->then_list));
}
nir_pop_if(b, cont_if);
nir_store_var(b, do_cont, nir_imm_true(b), 1);
/* Inline the continue construct before the trivial continue. */
nir_cf_reinsert(&extracted, nir_after_cf_list(&loop->body));
}
nir_loop_remove_continue_construct(loop);
@ -102,7 +243,7 @@ lower_loop_continue_block(nir_builder *b, nir_loop *loop, bool *repair_ssa)
}
static bool
visit_cf_list(nir_builder *b, struct exec_list *list, bool *repair_ssa)
visit_cf_list(nir_builder *b, struct exec_list *list)
{
bool progress = false;
@ -112,15 +253,24 @@ visit_cf_list(nir_builder *b, struct exec_list *list, bool *repair_ssa)
continue;
case nir_cf_node_if: {
nir_if *nif = nir_cf_node_as_if(node);
progress |= visit_cf_list(b, &nif->then_list, repair_ssa);
progress |= visit_cf_list(b, &nif->else_list, repair_ssa);
progress |= visit_cf_list(b, &nif->then_list);
progress |= visit_cf_list(b, &nif->else_list);
break;
}
case nir_cf_node_loop: {
nir_loop *loop = nir_cf_node_as_loop(node);
progress |= visit_cf_list(b, &loop->body, repair_ssa);
progress |= visit_cf_list(b, &loop->continue_list, repair_ssa);
progress |= lower_loop_continue_block(b, loop, repair_ssa);
/* By first lowering inner loops, we ensure that we don't encounter
* any continue statements which don't belong to the current loop.
*/
progress |= visit_cf_list(b, &loop->body);
/* If we lower continue constructs after inlining functions, they
* might contain nested loops.
*/
progress |= visit_cf_list(b, &loop->continue_list);
/* Lower continue construct. */
progress |= lower_loop_continue_block(b, loop);
break;
}
case nir_cf_node_function:
@ -135,21 +285,13 @@ static bool
lower_continue_constructs_impl(nir_function_impl *impl)
{
nir_builder b = nir_builder_create(impl);
bool repair_ssa = false;
bool progress = visit_cf_list(&b, &impl->body, &repair_ssa);
bool progress = visit_cf_list(&b, &impl->body);
if (progress) {
nir_progress(true, impl, nir_metadata_none);
/* Merge the Phis from Header and Continue Target */
nir_lower_reg_intrinsics_to_ssa_impl(impl);
/* Re-inserting the Continue Target at the beginning of the loop
* violates the dominance property if instructions in the continue
* use SSA defs from the loop body.
*/
if (repair_ssa)
nir_repair_ssa_impl(impl);
} else {
nir_no_progress(impl);
}

View file

@ -346,7 +346,9 @@ loop_routing_start(struct routes *routing, nir_builder *b,
routing->brk.fork = fork;
routing->brk.reachable = fork_reachable(fork);
}
nir_push_loop(b);
nir_loop *loop = nir_push_loop(b);
nir_loop_add_continue_construct(loop);
}
/**
@ -978,5 +980,8 @@ nir_lower_goto_ifs(nir_shader *shader)
progress = true;
}
if (progress)
nir_lower_continue_constructs(shader);
return progress;
}

View file

@ -1251,8 +1251,11 @@ validate_jump_instr(nir_jump_instr *instr, validate_state *state)
validate_assert(state, state->impl->structured);
validate_assert(state, state->loop != NULL);
if (state->loop) {
nir_block *cont_block = nir_loop_continue_target(state->loop);
validate_assert(state, block->successors[0] == cont_block);
validate_assert(state, nir_loop_has_continue_construct(state->loop));
if (nir_loop_has_continue_construct(state->loop)) {
nir_block *cont_block = nir_loop_first_continue_block(state->loop);
validate_assert(state, block->successors[0] == cont_block);
}
}
validate_assert(state, block->successors[1] == NULL);
validate_assert(state, instr->target == NULL);
@ -1503,14 +1506,13 @@ validate_block(nir_block *block, validate_state *state)
if (next == NULL) {
switch (state->parent_node->type) {
case nir_cf_node_loop: {
if (block == nir_loop_last_block(state->loop)) {
nir_block *cont = nir_loop_continue_target(state->loop);
validate_assert(state, block->successors[0] == cont);
if (!nir_loop_has_continue_construct(state->loop) ||
block == nir_loop_last_continue_block(state->loop)) {
nir_block *header = nir_loop_first_block(state->loop);
validate_assert(state, block->successors[0] == header);
} else {
validate_assert(state, nir_loop_has_continue_construct(state->loop) &&
block == nir_loop_last_continue_block(state->loop));
nir_block *head = nir_loop_first_block(state->loop);
validate_assert(state, block->successors[0] == head);
nir_block *cont = nir_loop_first_continue_block(state->loop);
validate_assert(state, block->successors[0] == cont);
}
/* due to the hack for infinite loops, block->successors[1] may
* point to the block after the loop.
@ -1617,6 +1619,7 @@ validate_loop(nir_loop *loop, validate_state *state)
validate_assert(state, next_node->type == nir_cf_node_block);
validate_assert(state, !exec_list_is_empty(&loop->body));
validate_assert(state, nir_loop_first_block(loop)->predecessors.entries <= 2);
nir_cf_node *old_parent = state->parent_node;
state->parent_node = &loop->cf_node;

View file

@ -565,19 +565,20 @@ TEST_F(nir_opt_loop_test, opt_loop_merge_terminators_skip_merge_if_phis_nested_l
)"));
}
TEST_F(nir_opt_loop_test, opt_loop_peel_initial_break_ends_with_jump)
TEST_F(nir_opt_loop_test, opt_loop_peel_initial_break_no_work)
{
nir_variable *var = nir_variable_create(b->shader, nir_var_shader_temp,
glsl_int_type(), "dummy_var");
nir_loop *loop = nir_push_loop(b);
/* do_work1() */
nir_store_var(b, var, nir_imm_int(b, 0), 1);
/* the break we want to move down: */
nir_break_if(b, nir_imm_true(b));
/* do_work_2: */
nir_push_if(b, nir_imm_true(b));
nir_jump(b, nir_jump_continue);
nir_pop_if(b, NULL);
nir_jump(b, nir_jump_return);
/* No work afterwards. */
nir_pop_loop(b, loop);
ASSERT_FALSE(nir_opt_loop(b->shader));
@ -593,40 +594,32 @@ TEST_F(nir_opt_loop_test, opt_loop_peel_initial_break_ends_with_jump)
decl_var shader_out INTERP_MODE_NONE none int out (FRAG_RESULT_DEPTH.x, 0, 0)
decl_var ubo INTERP_MODE_NONE none int ubo1 (0, 0, 0)
decl_var ubo INTERP_MODE_NONE none int[4] ubo_array (0, 0, 0)
decl_var INTERP_MODE_NONE none int dummy_var
decl_function main () (entrypoint)
impl main {
block b0: // preds:
32 %0 = deref_var &in (shader_in int)
32 %1 = @load_deref (%0) (access=none)
// succs: b1
// succs: b1
loop {
block b1: // preds: b0 b5
1 %2 = load_const (true)
// succs: b2 b3
if %2 (true) {
block b1: // preds: b0 b4
32 %2 = load_const (0x00000000)
32 %3 = deref_var &dummy_var (shader_temp int)
@store_deref (%3, %2 (0x0)) (wrmask=x, access=none)
1 %4 = load_const (true)
// succs: b2 b3
if %4 (true) {
block b2:// preds: b1
break
// succs: b8
// succs: b5
} else {
block b3: // preds: b1, succs: b4
}
block b4: // preds: b3
1 %3 = load_const (true)
// succs: b5 b6
if %3 (true) {
block b5:// preds: b4
continue
// succs: b1
} else {
block b6: // preds: b4, succs: b7
}
block b7:// preds: b6
return
// succs: b9
block b4: // preds: b3, succs: b1
}
block b8: // preds: b2, succs: b9
block b9:
block b5: // preds: b2, succs: b6
block b6:
}
)"));
}

View file

@ -1676,6 +1676,9 @@ vtn_emit_cf_func_structured(struct vtn_builder *b, struct vtn_function *func,
next->nloop = nir_push_loop(&b->nb);
nir_store_var(&b->nb, next->continue_var, nir_imm_false(&b->nb), 1);
if (!vtn_is_single_block_loop(next))
nir_loop_add_continue_construct(next->nloop);
next->nloop->control = vtn_loop_control(b, block->merge[3]);
break;

View file

@ -10,6 +10,7 @@
#include "compiler/spirv/spirv.h"
#include "nir_builder.h"
#include "nir_control_flow.h"
#include "nir_deref.h"
enum rq_intersection_var_index {
@ -560,8 +561,10 @@ build_ray_traversal(nir_builder *b, nir_deref_instr *rq,
nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete");
nir_store_var(b, incomplete, nir_imm_true(b), 0x1);
nir_push_loop(b);
nir_loop *loop = nir_push_loop(b);
{
nir_loop_add_continue_construct(loop);
/* Go up the stack if current_node == VK_BVH_INVALID_NODE */
nir_push_if(b, nir_ieq_imm(b, rq_load(b, rq, current_node), VK_BVH_INVALID_NODE));
{
@ -928,7 +931,7 @@ build_ray_traversal(nir_builder *b, nir_deref_instr *rq,
}
nir_pop_if(b, NULL);
}
nir_pop_loop(b, NULL);
nir_pop_loop(b, loop);
return nir_load_var(b, incomplete);
}
@ -1035,6 +1038,9 @@ tu_nir_lower_ray_queries(nir_shader *shader)
ralloc_free(query_ht);
if (progress)
nir_lower_continue_constructs(shader);
return progress;
}

View file

@ -1995,7 +1995,7 @@ ttn_emit_instruction(struct ttn_compile *c)
break;
case TGSI_OPCODE_BGNLOOP:
nir_push_loop(&c->build);
nir_loop_add_continue_construct(nir_push_loop(&c->build));
break;
case TGSI_OPCODE_BRK:
@ -2537,6 +2537,7 @@ ttn_finalize_nir(struct ttn_compile *c, struct pipe_screen *screen)
MESA_TRACE_FUNC();
NIR_PASS(_, nir, nir_lower_continue_constructs);
NIR_PASS(_, nir, nir_lower_returns);
NIR_PASS(_, nir, nir_lower_vars_to_ssa);
NIR_PASS(_, nir, nir_lower_reg_intrinsics_to_ssa);

View file

@ -1083,6 +1083,7 @@ lvp_compile_ray_tracing_pipeline(struct lvp_pipeline *pipeline,
nir_shader_instructions_pass(b->shader, lvp_lower_ray_tracing_instr, nir_metadata_none, &compiler);
NIR_PASS(_, b->shader, nir_lower_continue_constructs);
NIR_PASS(_, b->shader, nir_lower_returns);
const struct nir_lower_compute_system_values_options compute_system_values = {0};

View file

@ -655,6 +655,7 @@ lvp_nir_lower_ray_queries(struct nir_shader *shader)
ralloc_free(query_ht);
if (progress) {
NIR_PASS(_, shader, nir_lower_continue_constructs);
NIR_PASS(_, shader, nir_lower_global_vars_to_local);
NIR_PASS(_, shader, nir_lower_vars_to_ssa);

View file

@ -497,8 +497,10 @@ lvp_build_ray_traversal(nir_builder *b, const struct lvp_ray_traversal_args *arg
.no_skip_aabbs = nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipAABBsKHRMask), 0),
};
nir_push_loop(b);
nir_loop *loop = nir_push_loop(b);
{
nir_loop_add_continue_construct(loop);
nir_push_if(b, nir_ieq_imm(b, nir_load_deref(b, args->vars.current_node), LVP_BVH_INVALID_NODE));
{
nir_push_if(b, nir_ieq_imm(b, nir_load_deref(b, args->vars.stack_ptr), 0));
@ -607,7 +609,7 @@ lvp_build_ray_traversal(nir_builder *b, const struct lvp_ray_traversal_args *arg
}
nir_pop_if(b, NULL);
}
nir_pop_loop(b, NULL);
nir_pop_loop(b, loop);
return nir_load_var(b, incomplete);
}

View file

@ -2113,10 +2113,10 @@ lower_subgroup_scan(nir_builder *b, nir_intrinsic_instr *intr, void *data)
nir_pop_if(b, if_active_thread);
nir_store_var(b, loop_counter_var, nir_iadd_imm(b, loop_counter, 1), 1);
nir_jump(b, nir_jump_continue);
nir_pop_if(b, nif);
nir_push_else(b, nif);
nir_jump(b, nir_jump_break);
nir_pop_loop(b, loop);
result = nir_load_var(b, result_var);

View file

@ -374,7 +374,8 @@ dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(uint8_t old_index_size)
* TODO: Might be a good thing to use use the CL compiler we have and turn
* those shaders into CL kernels.
*/
nir_push_loop(&b);
nir_loop *loop = nir_push_loop(&b);
nir_loop_add_continue_construct(loop);
old_index_ptr = nir_load_var(&b, old_index_ptr_var);
nir_def *index0 = nir_load_var(&b, index0_var);
@ -445,6 +446,8 @@ dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(uint8_t old_index_size)
new_index_count_ptr_desc, nir_imm_int(&b, 0),
.write_mask = 1, .access = ACCESS_NON_READABLE, .align_mul = 4);
nir_lower_continue_constructs(b.shader);
return b.shader;
}