diff --git a/src/amd/common/nir/ac_nir_lower_ngg.c b/src/amd/common/nir/ac_nir_lower_ngg.c index b8738fd93ae..7bcbc28cb45 100644 --- a/src/amd/common/nir/ac_nir_lower_ngg.c +++ b/src/amd/common/nir/ac_nir_lower_ngg.c @@ -1768,6 +1768,8 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option nir_validate_shader(shader, "after emitting NGG VS/TES"); /* Cleanup */ + if (state.streamout_enabled) + nir_lower_continue_constructs(shader); nir_opt_dead_write_vars(shader); nir_lower_vars_to_ssa(shader); nir_remove_dead_variables(shader, nir_var_function_temp, NULL); diff --git a/src/amd/common/nir/ac_nir_lower_ngg_gs.c b/src/amd/common/nir/ac_nir_lower_ngg_gs.c index 2498eec0da3..0965000ce33 100644 --- a/src/amd/common/nir/ac_nir_lower_ngg_gs.c +++ b/src/amd/common/nir/ac_nir_lower_ngg_gs.c @@ -929,6 +929,8 @@ ac_nir_lower_ngg_gs(nir_shader *shader, const ac_nir_lower_ngg_options *options, nir_validate_shader(shader, "after emitting NGG GS"); /* Cleanup */ + if (state.streamout_enabled) + nir_lower_continue_constructs(shader); nir_lower_vars_to_ssa(shader); nir_remove_dead_variables(shader, nir_var_function_temp, NULL); diff --git a/src/amd/common/nir/ac_nir_prerast_utils.c b/src/amd/common/nir/ac_nir_prerast_utils.c index 22a9fb56f48..08b56b025d2 100644 --- a/src/amd/common/nir/ac_nir_prerast_utils.c +++ b/src/amd/common/nir/ac_nir_prerast_utils.c @@ -1050,6 +1050,8 @@ ac_nir_ngg_build_streamout_buffer_info(nir_builder *b, nir_loop *loop = nir_push_loop(b); { + nir_loop_add_continue_construct(loop); + for (unsigned i = 0; i < NUM_ATOMICS_IN_FLIGHT; i++) { int issue_index = (NUM_ATOMICS_IN_FLIGHT - 1 + i) % NUM_ATOMICS_IN_FLIGHT; int read_index = i; diff --git a/src/amd/compiler/tests/helpers.cpp b/src/amd/compiler/tests/helpers.cpp index c13819ac2b4..d99f964961a 100644 --- a/src/amd/compiler/tests/helpers.cpp +++ b/src/amd/compiler/tests/helpers.cpp @@ -410,6 +410,7 @@ void finish_isel_test(enum ac_hw_stage hw_stage, unsigned wave_size) { nir_validate_shader(nb->shader, "in finish_isel_test"); + nir_lower_continue_constructs(nb->shader); program.reset(new Program); program->debug.func = nullptr; diff --git a/src/amd/compiler/tests/test_isel.cpp b/src/amd/compiler/tests/test_isel.cpp index eab9baabc28..6b46a058fe3 100644 --- a/src/amd/compiler/tests/test_isel.cpp +++ b/src/amd/compiler/tests/test_isel.cpp @@ -376,6 +376,19 @@ END_TEST * // unreachable block * break; * } + * + * after nir_lower_continue_constructs() and sanitize_if(): + * + * loop { + * if (uniform) { + * cont = true; + * } else { + * cont = true; + * } + * if (false) { + * break; + * } + * } */ BEGIN_TEST(isel.cf.unreachable_break.uniform_continue) if (!setup_nir_cs(GFX11)) @@ -388,46 +401,49 @@ BEGIN_TEST(isel.cf.unreachable_break.uniform_continue) //>> s3: %val1 = p_create_vector 0, 0, 0 //>> s1: %val0 = p_parallelcopy 0 - nir_push_loop(nb); + nir_loop *loop = nir_push_loop(nb); + nir_loop_add_continue_construct(loop); { //>> BB1 - //! /* logical preds: BB0, BB2, BB5, / linear preds: BB0, BB2, BB5, / kind: uniform, loop-header, */ + //! /* logical preds: BB0, BB6, / linear preds: BB0, BB6, / kind: uniform, loop-header, */ nir_push_if(nb, nir_unit_test_uniform_input(nb, 1, 1, .base=2)); { //>> BB2 - //! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, continue, */ + //! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, */ nir_jump(nb, nir_jump_continue); } nir_push_else(nb, NULL); { - /* The contents of this branch is moved to the merge block, and a dummy break is inserted - * before the continue so that the loop has an exit. - */ //>> BB3 //! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, */ //! p_logical_start //! s1: %_ = p_unit_test 5 - //! s2: %zero = p_parallelcopy 0 - //! s2: %_, s1: %cond:scc = s_and_b64 %zero, %0:exec - //! p_logical_end - //! p_cbranch_z %cond:scc - //! BB4 - //! /* logical preds: BB3, / linear preds: BB3, / kind: uniform, break, */ - //>> BB5 - //! /* logical preds: BB3, / linear preds: BB3, / kind: uniform, continue, */ nir_unit_test_uniform_input(nb, 1, 32, .base=5); nir_jump(nb, nir_jump_continue); } nir_pop_if(nb, NULL); - + /* The unreachable break is removed when lowering the continues. However, + * a dummy break is inserted, so that the loop has an exit. + */ + //>> BB4 + //! /* logical preds: BB2, BB3, / linear preds: BB2, BB3, / kind: uniform, */ + //! p_logical_start + //! s2: %zero = p_parallelcopy 0 + //! s2: %_, s1: %cond:scc = s_and_b64 %zero, %0:exec + //! p_logical_end + //! p_cbranch_z %cond:scc + //! BB5 + //! /* logical preds: BB4, / linear preds: BB4, / kind: uniform, break, */ + //>> BB6 + //! /* logical preds: BB4, / linear preds: BB4, / kind: uniform, continue, */ val0 = nir_imm_zero(nb, 1, 32); val1 = nir_load_local_invocation_id(nb); nir_jump(nb, nir_jump_break); } - nir_pop_loop(nb, NULL); - //>> BB6 - //! /* logical preds: BB4, / linear preds: BB4, / kind: uniform, top-level, loop-exit, */ + nir_pop_loop(nb, loop); + //>> BB7 + //! /* logical preds: BB5, / linear preds: BB5, / kind: uniform, top-level, loop-exit, */ //>> p_unit_test 0, %val0 //! p_unit_test 1, %val1 @@ -645,7 +661,8 @@ BEGIN_TEST(isel.cf.unreachable_loop_exit) if (!setup_nir_cs(GFX11)) return; - nir_push_loop(nb); + nir_loop *loop = nir_push_loop(nb); + nir_loop_add_continue_construct(loop); { /* A dummy break is inserted before the continue so that the loop has an exit. */ //>> BB1 @@ -661,7 +678,7 @@ BEGIN_TEST(isel.cf.unreachable_loop_exit) nir_unit_test_uniform_input(nb, 1, 32, .base=0); nir_jump(nb, nir_jump_continue); } - nir_pop_loop(nb, NULL); + nir_pop_loop(nb, loop); finish_isel_test(); END_TEST @@ -720,19 +737,40 @@ END_TEST * } * use(val); * } + * + * after nir_lower_continue_constructs() and sanitize_if(): + * + * loop { + * if (divergent) { + * } else { + * if (uniform) { + * break; + * } + * val = uniform; + * use(val); + * } + * } */ BEGIN_TEST(isel.cf.uniform_if_branch_use) if (!setup_nir_cs(GFX11)) return; - nir_push_loop(nb); + nir_loop *loop = nir_push_loop(nb); + nir_loop_add_continue_construct(loop); { + //>> BB1 + //! /* logical preds: BB0, BB15, / linear preds: BB0, BB15, / kind: loop-header, branch, */ + //>> s2: %_ = p_unit_test 3 nir_push_if(nb, nir_unit_test_divergent_input(nb, 1, 1, .base=3)); { nir_jump(nb, nir_jump_continue); } nir_pop_if(nb, NULL); + //>> BB4 + //! /* logical preds: / linear preds: BB2, BB3, / kind: invert, */ + //>> BB5 + //! /* logical preds: BB1, / linear preds: BB4, / kind: uniform, */ //>> s2: %cond = p_unit_test 2 //! s2: %_, s1: %_:scc = s_and_b64 %cond, %0:exec //! p_logical_end @@ -740,31 +778,31 @@ BEGIN_TEST(isel.cf.uniform_if_branch_use) nir_def *val; nir_push_if(nb, nir_unit_test_uniform_input(nb, 1, 1, .base=2)); { - //>> BB7 - //! /* logical preds: BB6, / linear preds: BB6, / kind: break, */ + //>> BB6 + //! /* logical preds: BB5, / linear preds: BB5, / kind: break, */ nir_jump(nb, nir_jump_break); } nir_push_else(nb, NULL); { /* The contents of this branch is moved to the merge block. */ - //>> BB11 - //! /* logical preds: BB10, / linear preds: BB9, BB10, / kind: uniform, */ + //>> BB10 + //! /* logical preds: BB9, / linear preds: BB8, BB9, / kind: uniform, */ //>> p_cbranch_z %0:exec rarely_taken - //! BB12 - //! /* logical preds: BB11, / linear preds: BB11, / kind: uniform, */ + //! BB11 + //! /* logical preds: BB10, / linear preds: BB10, / kind: uniform, */ //! p_logical_start //! s1: %val = p_unit_test 0 + //! p_unit_test 1, %val val = nir_unit_test_uniform_input(nb, 1, 32, .base=0); } nir_pop_if(nb, NULL); - //! p_unit_test 1, %val nir_unit_test_output(nb, val, .base=1); - //>> BB14 - //! /* logical preds: BB12, / linear preds: BB12, BB13, / kind: uniform, continue, */ + //>> BB15 + //! /* logical preds: BB2, BB13, / linear preds: BB13, BB14, / kind: uniform, continue, merge, */ } - nir_pop_loop(nb, NULL); + nir_pop_loop(nb, loop); finish_isel_test(); END_TEST @@ -780,6 +818,17 @@ END_TEST * d = c or undef * break * } + * + * after nir_lower_continue_constructs() and sanitize_if(): + * + * b = ... + * loop { + * a = linear_phi b, c + * if (!divergent) { + * break + * } + * c = ... + * } */ BEGIN_TEST(isel.cf.hidden_continue) if (!setup_nir_cs(GFX11)) @@ -789,35 +838,38 @@ BEGIN_TEST(isel.cf.hidden_continue) nir_def* init = nir_unit_test_uniform_input(nb, 1, 32, .base = 0); nir_phi_instr* phi; - nir_loop* loop = nir_push_loop(nb); + nir_loop *loop = nir_push_loop(nb); + nir_loop_add_continue_construct(loop); { //>> BB1 - //! /* logical preds: BB0, BB2, / linear preds: BB0, BB3, BB8, / kind: loop-header, branch, */ - //! s1: %2 = p_linear_phi %init, %cont, %phi + //! /* logical preds: BB0, BB6, / linear preds: BB0, BB6, / kind: loop-header, branch, */ + //! s1: %2 = p_linear_phi %init, %cont phi = nir_phi_instr_create(nb->shader); nir_def_init(&phi->instr, &phi->def, 1, 32); nir_phi_instr_add_src(phi, nir_def_block(init), init); + //>> s2: %cond = p_unit_test 4 + //! s2: %inverse_cond, s1: %_:scc = s_not_b64 %cond + //>> p_cbranch_z %inverse_cond + //>> BB2 + //! /* logical preds: BB1, / linear preds: BB1, / kind: break, */ nir_push_if(nb, nir_unit_test_divergent_input(nb, 1, 1, .base = 4)); { - //>> BB2 - //! /* logical preds: BB1, / linear preds: BB1, / kind: continue, */ + //>> BB6 + //! /* logical preds: BB1, / linear preds: BB4, BB5, / kind: uniform, continue, merge, */ //! p_logical_start //! s1: %cont = p_unit_test 1 nir_def* cont = nir_unit_test_uniform_input(nb, 1, 32, .base = 1); - nir_phi_instr_add_src(phi, nir_def_block(cont), cont); + nir_phi_instr_add_src(phi, nir_loop_first_continue_block(loop), cont); nir_jump(nb, nir_jump_continue); } nir_pop_if(nb, NULL); - //>> BB6 - //! /* logical preds: BB1, / linear preds: BB4, BB5, / kind: break, merge, */ - //! s1: %phi = p_linear_phi %cont, s1: undef - //>> BB8 - //! /* logical preds: / linear preds: BB6, / kind: uniform, continue, */ nir_jump(nb, nir_jump_break); } - nir_pop_loop(nb, NULL); + //>> BB7 + //! /* logical preds: BB2, / linear preds: BB3, / kind: uniform, top-level, loop-exit, */ + nir_pop_loop(nb, loop); nb->cursor = nir_after_phis(nir_loop_first_block(loop)); nir_builder_instr_insert(nb, &phi->instr); @@ -1191,15 +1243,35 @@ END_TEST * if (divergent) { * continue * } + * unit_test 3 * //potentially empty * } + * unit_test 4 + * } + * + * after nir_lower_continue_constructs() and sanitize_if(): + * + * loop { + * if (divergent) { + * if (divergent) { + * cont = true + * } else { + * unit_test 3 + * //potentially empty + * } + * } + * if (cont) { + * } else { + * unit_test 4 + * } * } */ BEGIN_TEST(isel.cf.empty_exec.loop_continue) if (!setup_nir_cs(GFX11)) return; - nir_push_loop(nb); + nir_loop *loop = nir_push_loop(nb); + nir_loop_add_continue_construct(loop); { nir_break_if(nb, nir_imm_false(nb)); @@ -1214,28 +1286,39 @@ BEGIN_TEST(isel.cf.empty_exec.loop_continue) nir_push_if(nb, nir_unit_test_divergent_input(nb, 1, 1, .base = 2)); { //>> BB5 - //>> /* logical preds: BB4, / linear preds: BB4, / kind: continue, */ + //! /* logical preds: BB4, / linear preds: BB4, / kind: uniform, */ + //>> s2: %_ = p_parallelcopy -1 + //>> s2: %cont1 = p_parallelcopy %0:exec nir_jump(nb, nir_jump_continue); } nir_pop_if(nb, NULL); - //>> BB9 - //! /* logical preds: BB4, / linear preds: BB7, BB8, / kind: uniform, merge, */ - - //>> p_cbranch_z %0:exec rarely_taken - //>> BB10 + //>> BB8 + //! /* logical preds: BB4, / linear preds: BB7, / kind: uniform, */ //>> p_unit_test 3, %_ + + //>> BB10 + //! /* logical preds: BB5, BB8, / linear preds: BB8, BB9, / kind: uniform, merge, */ + //! s2: %cont2 = p_linear_phi %cont1, %cont1 nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 3); } nir_pop_if(nb, NULL); - //>> BB17 - //! /* logical preds: BB12, BB15, / linear preds: BB15, BB16, / kind: uniform, continue, merge, */ - //! p_logical_start - - //! p_unit_test 4, %_ + //>> BB12 + //! /* logical preds: / linear preds: BB10, BB11, / kind: invert, */ + //! s2: %tmp = p_linear_phi %cont2, s2: undef + //! s2: %cont3, s1: %16:scc = s_and_b64 %tmp, %0:exec + //>> BB15 + //! /* logical preds: BB10, BB13, / linear preds: BB13, BB14, / kind: branch, merge, */ + //! s2: %cont = p_linear_phi %cont3, %cont3 + //>> p_cbranch_z %cont + //>> BB19 + //! /* logical preds: BB15, / linear preds: BB18, / kind: uniform, */ + //>> p_unit_test 4, %_ nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 4); + //>> BB21 + //! /* logical preds: BB16, BB19, / linear preds: BB19, BB20, / kind: uniform, continue, merge, */ } - nir_pop_loop(nb, NULL); - //>> BB18 + nir_pop_loop(nb, loop); + //>> BB22 //! /* logical preds: BB2, / linear preds: BB2, / kind: uniform, top-level, loop-exit, */ //! p_logical_start @@ -1255,15 +1338,28 @@ END_TEST * } * //potentially empty * } + * + * after nir_lower_continue_constructs() and sanitize_if(): + * + * loop { + * if (divergent) { + * } else { + * if (divergent) { + * break + * } + * //potentially empty + * } + * } */ BEGIN_TEST(isel.cf.empty_exec.loop_continue_then_break) if (!setup_nir_cs(GFX11)) return; - nir_push_loop(nb); + nir_loop *loop = nir_push_loop(nb); + nir_loop_add_continue_construct(loop); { //>> BB1 - //! /* logical preds: BB0, BB2, BB14, / linear preds: BB0, BB3, BB14, / kind: loop-header, branch, */ + //! /* logical preds: BB0, BB15, / linear preds: BB0, BB15, / kind: loop-header, branch, */ //>> p_unit_test 0, %_ nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 0); @@ -1271,35 +1367,37 @@ BEGIN_TEST(isel.cf.empty_exec.loop_continue_then_break) nir_push_if(nb, nir_unit_test_divergent_input(nb, 1, 1, .base = 1)); { //>> BB2 - //! /* logical preds: BB1, / linear preds: BB1, / kind: continue, */ + //! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, */ nir_jump(nb, nir_jump_continue); } nir_pop_if(nb, NULL); - //>> BB6 - //! /* logical preds: BB1, / linear preds: BB4, BB5, / kind: branch, merge, */ + //>> BB4 + //! /* logical preds: / linear preds: BB2, BB3, / kind: invert, */ + //>> BB5 + //! /* logical preds: BB1, / linear preds: BB4, / kind: branch, */ //>> p_unit_test 2, %_ nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 2); //>> s2: %_ = p_unit_test 3 - //>> BB7 - //! /* logical preds: BB6, / linear preds: BB6, / kind: break, */ + //>> BB6 + //! /* logical preds: BB5, / linear preds: BB5, / kind: break, */ nir_break_if(nb, nir_unit_test_divergent_input(nb, 1, 1, .base = 3)); - //>> BB11 - //! /* logical preds: BB6, / linear preds: BB9, BB10, / kind: uniform, merge, */ + //>> BB10 + //! /* logical preds: BB5, / linear preds: BB8, BB9, / kind: uniform, merge, */ //>> p_cbranch_z %0:exec rarely_taken - //>> BB12 - //! /* logical preds: BB11, / linear preds: BB11, / kind: uniform, */ + //>> BB11 + //! /* logical preds: BB10, / linear preds: BB10, / kind: uniform, */ //>> p_unit_test 4, %_ nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 4); - //>> BB14 - //! /* logical preds: BB12, / linear preds: BB12, BB13, / kind: uniform, continue, */ + //>> BB15 + //! /* logical preds: BB2, BB13, / linear preds: BB13, BB14, / kind: uniform, continue, merge, */ } - nir_pop_loop(nb, NULL); - //>> BB15 - //! /* logical preds: BB7, / linear preds: BB8, / kind: uniform, top-level, loop-exit, */ + nir_pop_loop(nb, loop); + //>> BB16 + //! /* logical preds: BB6, / linear preds: BB7, / kind: uniform, top-level, loop-exit, */ //! p_logical_start //! p_unit_test 5, %_ diff --git a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c index e945866a706..25b2ecf9a32 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c +++ b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c @@ -708,6 +708,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device ralloc_free(query_ht); if (progress) { + NIR_PASS(_, shader, nir_lower_continue_constructs); NIR_PASS(_, shader, nir_split_struct_vars, nir_var_shader_temp); NIR_PASS(_, shader, nir_lower_global_vars_to_local); NIR_PASS(_, shader, nir_lower_vars_to_ssa); diff --git a/src/amd/vulkan/nir/radv_nir_rt_common.c b/src/amd/vulkan/nir/radv_nir_rt_common.c index e3646cca81c..18526bdd30a 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_common.c +++ b/src/amd/vulkan/nir/radv_nir_rt_common.c @@ -857,8 +857,11 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc nir_def *desc = create_bvh_descriptor(b, pdev, &ray_flags); nir_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0); - nir_push_loop(b); + nir_loop *loop = nir_push_loop(b); { + if (!args->use_bvh_stack_rtn) + nir_loop_add_continue_construct(loop); + /* When exiting instances via stack, current_node won't ever be invalid with ds_bvh_stack_rtn */ if (args->use_bvh_stack_rtn) { /* Early-exit when the stack is empty and there are no more nodes to process. */ @@ -1154,8 +1157,11 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const nir_def *desc = create_bvh_descriptor(b, pdev, &ray_flags); - nir_push_loop(b); + nir_loop *loop = nir_push_loop(b); { + if (!args->use_bvh_stack_rtn) + nir_loop_add_continue_construct(loop); + /* When exiting instances via stack, current_node won't ever be invalid with ds_bvh_stack_rtn */ if (args->use_bvh_stack_rtn) { /* Early-exit when the stack is empty and there are no more nodes to process. */ diff --git a/src/amd/vulkan/nir/radv_nir_rt_traversal_shader.c b/src/amd/vulkan/nir/radv_nir_rt_traversal_shader.c index 6bc121d4dc6..29c7d5c438d 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_traversal_shader.c +++ b/src/amd/vulkan/nir/radv_nir_rt_traversal_shader.c @@ -1276,6 +1276,7 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin radv_build_end_trace_token(b, &data, nir_load_var(b, iteration_instance_count)); nir_progress(true, b->impl, nir_metadata_none); + nir_lower_continue_constructs(b->shader); radv_nir_lower_hit_attrib_derefs(b->shader); return data.trav_vars.result; diff --git a/src/amd/vulkan/radv_dgc.c b/src/amd/vulkan/radv_dgc.c index eb22a5f870f..6c5bd4cacfe 100644 --- a/src/amd/vulkan/radv_dgc.c +++ b/src/amd/vulkan/radv_dgc.c @@ -1754,8 +1754,10 @@ dgc_alloc_push_constant(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *se nir_variable *idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "idx"); nir_store_var(b, idx, nir_imm_int(b, 0), 0x1); - nir_push_loop(b); + nir_loop *loop = nir_push_loop(b); { + nir_loop_add_continue_construct(loop); + nir_def *cur_idx = nir_load_var(b, idx); nir_break_if(b, nir_ieq(b, cur_idx, load_param8(b, push_constant_size))); @@ -1777,7 +1779,7 @@ dgc_alloc_push_constant(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *se nir_store_var(b, idx, nir_iadd_imm(b, cur_idx, 1), 0x1); } - nir_pop_loop(b, NULL); + nir_pop_loop(b, loop); /* Store push constants set by DGC tokens. */ u_foreach_bit64 (i, layout->push_constant_mask) { @@ -2025,8 +2027,10 @@ dgc_emit_vertex_buffer(struct dgc_cmdbuf *cs, nir_def *stream_addr) nir_variable *vbo_idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "vbo_idx"); nir_store_var(b, vbo_idx, nir_imm_int(b, 0), 0x1); - nir_push_loop(b); + nir_loop *loop = nir_push_loop(b); { + nir_loop_add_continue_construct(loop); + nir_def *cur_idx = nir_load_var(b, vbo_idx); nir_break_if(b, nir_uge_imm(b, cur_idx, 32 /* bits in vb_desc_usage_mask */)); @@ -2097,7 +2101,7 @@ dgc_emit_vertex_buffer(struct dgc_cmdbuf *cs, nir_def *stream_addr) nir_store_var(b, vbo_idx, nir_iadd_imm(b, cur_idx, 1), 0x1); } - nir_pop_loop(b, NULL); + nir_pop_loop(b, loop); } /** @@ -2959,6 +2963,8 @@ build_dgc_prepare_shader(struct radv_device *dev, struct radv_indirect_command_l } nir_pop_if(&b, NULL); + nir_lower_continue_constructs(b.shader); + return b.shader; } diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 306fd66126d..e5cc502d849 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -789,11 +789,12 @@ nir_visitor::visit(ir_function_signature *ir) void nir_visitor::visit(ir_loop *ir) { - nir_push_loop(&b); + nir_loop *loop = nir_push_loop(&b); + nir_loop_add_continue_construct(loop); visit_exec_list(&ir->body_instructions, this); - nir_push_continue(&b, NULL); + nir_push_continue(&b, loop); visit_exec_list(&ir->continue_instructions, this); - nir_pop_loop(&b, NULL); + nir_pop_loop(&b, loop); } void diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 7c552657292..0b552e5c4a2 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3825,18 +3825,6 @@ nir_loop_last_continue_block(nir_loop *loop) return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); } -/** - * Return the target block of a nir_jump_continue statement - */ -static inline nir_block * -nir_loop_continue_target(nir_loop *loop) -{ - if (nir_loop_has_continue_construct(loop)) - return nir_loop_first_continue_block(loop); - else - return nir_loop_first_block(loop); -} - /** * Return true if this list of cf_nodes contains a single empty block. */ diff --git a/src/compiler/nir/nir_builder.c b/src/compiler/nir/nir_builder.c index 3e1154bad61..9f265970f0a 100644 --- a/src/compiler/nir/nir_builder.c +++ b/src/compiler/nir/nir_builder.c @@ -568,8 +568,6 @@ nir_push_continue(nir_builder *build, nir_loop *loop) loop = nir_cf_node_as_loop(block->cf_node.parent); } - nir_loop_add_continue_construct(loop); - build->cursor = nir_before_cf_list(&loop->continue_list); return loop; } diff --git a/src/compiler/nir/nir_control_flow.c b/src/compiler/nir/nir_control_flow.c index 6156db77f06..ab7f151387b 100644 --- a/src/compiler/nir/nir_control_flow.c +++ b/src/compiler/nir/nir_control_flow.c @@ -278,10 +278,9 @@ block_add_normal_succs(nir_block *block) nir_loop *loop = nir_cf_node_as_loop(parent); nir_block *cont_block; - if (block == nir_loop_last_block(loop)) { - cont_block = nir_loop_continue_target(loop); + if (block == nir_loop_last_block(loop) && nir_loop_has_continue_construct(loop)) { + cont_block = nir_loop_first_continue_block(loop); } else { - assert(block == nir_loop_last_continue_block(loop)); cont_block = nir_loop_first_block(loop); } @@ -438,6 +437,7 @@ nir_loop_add_continue_construct(nir_loop *loop) /* change predecessors and successors */ nir_block *header = nir_loop_first_block(loop); nir_block *preheader = nir_block_cf_tree_prev(header); + assert(header->predecessors.entries <= 2); set_foreach(&header->predecessors, entry) { nir_block *pred = (nir_block *)entry->key; if (pred != preheader) @@ -455,6 +455,7 @@ nir_loop_remove_continue_construct(nir_loop *loop) /* change predecessors and successors */ nir_block *header = nir_loop_first_block(loop); nir_block *cont = nir_loop_first_continue_block(loop); + assert(cont->predecessors.entries <= 2); set_foreach(&cont->predecessors, entry) { nir_block *pred = (nir_block *)entry->key; replace_successor(pred, cont, header); @@ -513,7 +514,7 @@ nir_handle_add_jump(nir_block *block) case nir_jump_continue: { nir_loop *loop = nearest_loop(&block->cf_node); - nir_block *cont_block = nir_loop_continue_target(loop); + nir_block *cont_block = nir_loop_first_continue_block(loop); link_blocks(block, cont_block, NULL); break; } diff --git a/src/compiler/nir/nir_lower_continue_constructs.c b/src/compiler/nir/nir_lower_continue_constructs.c index ce0a74ea1ce..da795db28f3 100644 --- a/src/compiler/nir/nir_lower_continue_constructs.c +++ b/src/compiler/nir/nir_lower_continue_constructs.c @@ -26,75 +26,216 @@ #include "nir_builder.h" #include "nir_control_flow.h" +/* NIR pass to lower loop continue constructs. + * + * NIR loops are maintained in canonical form with these properties: + * - a pre-header: the only predecessor of the loop header + * - a dedicated exit node: dominated by the loop-header + * - a single back-edge to the loop header: the trivial continue + * + * If the loop has a continue construct, the trivial continue is the + * back-edge from the last block of the continue construct to the loop + * header. Otherwise, it is the back-edge from the last block of the + * loop body to the loop header. + * + * In order to lower the continue construct of a loop, all continue + * statements are being removed by either + * - moving the following code to the other side of a branch or + * - guarding following code by inserted IF-statements + * + * Afterwards, the continue construct is inlined before the trivial + * back-edge. + * + */ + +struct loop_simplify_state { + nir_builder *b; + nir_def *continue_flag; + struct exec_list *cf_list; +}; + static bool -lower_loop_continue_block(nir_builder *b, nir_loop *loop, bool *repair_ssa) +block_ends_in_continue(nir_block *block) +{ + if (nir_block_ends_in_jump(block)) { + nir_jump_instr *jump = nir_instr_as_jump(nir_block_last_instr(block)); + return jump->type == nir_jump_continue; + } + + return false; +} + +static bool +lower_continues_in_cf_list(struct exec_list *cf_list, + struct loop_simplify_state *state); + +static bool +lower_continue(nir_block *block, struct loop_simplify_state *state) +{ + if (!block_ends_in_continue(block)) + return false; + + assert(nir_cf_node_is_last(&block->cf_node)); + + /* Remove the continue instruction and set the predicate to 'true'. */ + state->b->cursor = nir_instr_remove(nir_block_last_instr(block)); + nir_store_reg(state->b, nir_imm_true(state->b), state->continue_flag); + + return true; +} + +static bool +lower_continues_in_if(nir_if *nif, struct loop_simplify_state *state) +{ + nir_block *then_block = nir_if_last_then_block(nif); + nir_block *else_block = nir_if_last_else_block(nif); + bool then_jumps = nir_block_ends_in_jump(then_block); + bool else_jumps = nir_block_ends_in_jump(else_block); + + bool progress = false; + progress |= lower_continue(then_block, state); + progress |= lower_continue(else_block, state); + + nir_block *next_block = nir_cf_node_cf_tree_next(&nif->cf_node); + bool is_empty_block = nir_cf_node_is_last(&next_block->cf_node) && + exec_list_is_empty(&next_block->instr_list); + + /* If a branch leg ends in a jump, we lower already any continue statements, + * so that we know if we have to move the following blocks to the other side. + */ + if (then_jumps) + progress |= lower_continues_in_cf_list(&nif->then_list, state); + if (else_jumps) + progress |= lower_continues_in_cf_list(&nif->else_list, state); + + if (!is_empty_block && progress) { + /* If at least one side has a continue statement, move the following code + * to the other side. This is necessary to maintain SSA dominance. + */ + nir_cf_list list; + nir_cf_extract(&list, nir_after_cf_node_and_phis(&nif->cf_node), + nir_after_cf_list(state->cf_list)); + + if (then_jumps && else_jumps) { + /* Both branches jump, just delete instructions following the IF. */ + nir_cf_delete(&list); + } else if (then_jumps) { + nir_cf_reinsert(&list, nir_after_cf_list(&nif->else_list)); + } else { + nir_cf_reinsert(&list, nir_after_cf_list(&nif->then_list)); + } + + /* The successor is now empty. No need to predicate following blocks. */ + is_empty_block = true; + } + + /* Recursively lower any continue statements in both branch legs. */ + if (!then_jumps) + progress |= lower_continues_in_cf_list(&nif->then_list, state); + if (!else_jumps) + progress |= lower_continues_in_cf_list(&nif->else_list, state); + + if (!is_empty_block && progress) { + /* Predicate following blocks. */ + nir_cf_list list; + nir_cf_extract(&list, nir_after_cf_node_and_phis(&nif->cf_node), + nir_after_cf_list(state->cf_list)); + + state->b->cursor = nir_after_cf_node_and_phis(&nif->cf_node); + nir_if *if_stmt = nir_push_if(state->b, nir_load_reg(state->b, state->continue_flag)); + + assert(!exec_list_is_empty(&list.list)); + nir_cf_reinsert(&list, nir_before_cf_list(&if_stmt->else_list)); + nir_pop_if(state->b, NULL); + } + + return progress; +} + +static bool +lower_continues_in_cf_list(struct exec_list *cf_list, + struct loop_simplify_state *state) +{ + bool progress = false; + + struct exec_list *parent_list = state->cf_list; + state->cf_list = cf_list; + + /* We iterate over the list backwards because any given lower call may + * take everything following the given CF node and predicate it. In + * order to avoid recursion/iteration problems, we want everything after + * a given node to already be lowered before this happens. + */ + foreach_list_typed_reverse_safe(nir_cf_node, node, node, cf_list) { + switch (node->type) { + case nir_cf_node_if: + if (lower_continues_in_if(nir_cf_node_as_if(node), state)) + progress = true; + break; + + case nir_cf_node_block: + case nir_cf_node_loop: + break; + + default: + UNREACHABLE("Invalid inner CF node type"); + } + } + + state->cf_list = parent_list; + + return progress; +} + +static void +simplify_loop(nir_loop *loop) +{ + nir_block *cont = nir_loop_first_continue_block(loop); + nir_block *last = nir_loop_last_block(loop); + + /* Remove trivial continue statement. */ + if (block_ends_in_continue(last)) + nir_instr_remove_v(nir_block_last_instr(last)); + + /* If the loop has only the trivial continue, there is nothing to do. */ + if (!nir_block_ends_in_jump(last) && cont->predecessors.entries == 1) + return; + + struct loop_simplify_state state; + nir_builder b = nir_builder_at(nir_before_block_after_phis(nir_loop_first_block(loop))); + state.b = &b; + + /* Initialize the variable to False. */ + state.continue_flag = nir_decl_reg(&b, 1, 1, 0); + nir_store_reg(&b, nir_imm_false(&b), state.continue_flag); + + lower_continues_in_cf_list(&loop->body, &state); + + return; +} + +static bool +lower_loop_continue_block(nir_builder *b, nir_loop *loop) { if (!nir_loop_has_continue_construct(loop)) return false; - nir_block *header = nir_loop_first_block(loop); - nir_block *cont = nir_loop_first_continue_block(loop); + /* Lower loop header and continue-phis to regs as we are going to move the predecessors. */ + nir_lower_phis_to_regs_block(nir_loop_first_block(loop), true); + nir_lower_phis_to_regs_block(nir_loop_first_continue_block(loop), true); - /* count continue statements excluding unreachable ones */ - unsigned num_continue = 0; - nir_block *single_predecessor = NULL; - set_foreach(&cont->predecessors, entry) { - nir_block *pred = (nir_block *)entry->key; - /* If the continue block has no predecessors, it is unreachable. */ - if (pred->predecessors.entries == 0) - continue; + /* Simplify the loop in order to ensure that it has at most one back-edge. */ + simplify_loop(loop); - single_predecessor = pred; - if (num_continue++) - break; - } + nir_cf_list extracted; + nir_cf_list_extract(&extracted, &loop->continue_list); - nir_lower_phis_to_regs_block(header, false); - - if (num_continue == 0) { - /* this loop doesn't continue at all. delete the continue construct */ - nir_cf_list extracted; - nir_cf_list_extract(&extracted, &loop->continue_list); + if (nir_loop_first_continue_block(loop)->predecessors.entries == 0) { + /* This loop doesn't continue at all. Delete the continue construct. */ nir_cf_delete(&extracted); - } else if (num_continue == 1) { - /* inline the continue construct */ - assert(single_predecessor->successors[0] == cont); - assert(single_predecessor->successors[1] == NULL); - - nir_cf_list extracted; - nir_cf_list_extract(&extracted, &loop->continue_list); - nir_cf_reinsert(&extracted, - nir_after_block_before_jump(single_predecessor)); } else { - nir_lower_phis_to_regs_block(cont, false); - *repair_ssa = true; - - /* As control flow has to re-converge before executing the continue - * construct, we insert it at the beginning of the loop with a flag - * to ensure that it doesn't get executed in the first iteration: - * - * loop { - * if (i != 0) { - * continue construct - * } - * loop body - * } - */ - - nir_variable *do_cont = - nir_local_variable_create(b->impl, glsl_bool_type(), "cont"); - - b->cursor = nir_before_cf_node(&loop->cf_node); - nir_store_var(b, do_cont, nir_imm_false(b), 1); - b->cursor = nir_before_block(header); - nir_if *cont_if = nir_push_if(b, nir_load_var(b, do_cont)); - { - nir_cf_list extracted; - nir_cf_list_extract(&extracted, &loop->continue_list); - nir_cf_reinsert(&extracted, nir_before_cf_list(&cont_if->then_list)); - } - nir_pop_if(b, cont_if); - nir_store_var(b, do_cont, nir_imm_true(b), 1); + /* Inline the continue construct before the trivial continue. */ + nir_cf_reinsert(&extracted, nir_after_cf_list(&loop->body)); } nir_loop_remove_continue_construct(loop); @@ -102,7 +243,7 @@ lower_loop_continue_block(nir_builder *b, nir_loop *loop, bool *repair_ssa) } static bool -visit_cf_list(nir_builder *b, struct exec_list *list, bool *repair_ssa) +visit_cf_list(nir_builder *b, struct exec_list *list) { bool progress = false; @@ -112,15 +253,24 @@ visit_cf_list(nir_builder *b, struct exec_list *list, bool *repair_ssa) continue; case nir_cf_node_if: { nir_if *nif = nir_cf_node_as_if(node); - progress |= visit_cf_list(b, &nif->then_list, repair_ssa); - progress |= visit_cf_list(b, &nif->else_list, repair_ssa); + progress |= visit_cf_list(b, &nif->then_list); + progress |= visit_cf_list(b, &nif->else_list); break; } case nir_cf_node_loop: { nir_loop *loop = nir_cf_node_as_loop(node); - progress |= visit_cf_list(b, &loop->body, repair_ssa); - progress |= visit_cf_list(b, &loop->continue_list, repair_ssa); - progress |= lower_loop_continue_block(b, loop, repair_ssa); + /* By first lowering inner loops, we ensure that we don't encounter + * any continue statements which don't belong to the current loop. + */ + progress |= visit_cf_list(b, &loop->body); + + /* If we lower continue constructs after inlining functions, they + * might contain nested loops. + */ + progress |= visit_cf_list(b, &loop->continue_list); + + /* Lower continue construct. */ + progress |= lower_loop_continue_block(b, loop); break; } case nir_cf_node_function: @@ -135,21 +285,13 @@ static bool lower_continue_constructs_impl(nir_function_impl *impl) { nir_builder b = nir_builder_create(impl); - bool repair_ssa = false; - bool progress = visit_cf_list(&b, &impl->body, &repair_ssa); + bool progress = visit_cf_list(&b, &impl->body); if (progress) { nir_progress(true, impl, nir_metadata_none); /* Merge the Phis from Header and Continue Target */ nir_lower_reg_intrinsics_to_ssa_impl(impl); - - /* Re-inserting the Continue Target at the beginning of the loop - * violates the dominance property if instructions in the continue - * use SSA defs from the loop body. - */ - if (repair_ssa) - nir_repair_ssa_impl(impl); } else { nir_no_progress(impl); } diff --git a/src/compiler/nir/nir_lower_goto_ifs.c b/src/compiler/nir/nir_lower_goto_ifs.c index c83c8ef78b3..c0b5213b547 100644 --- a/src/compiler/nir/nir_lower_goto_ifs.c +++ b/src/compiler/nir/nir_lower_goto_ifs.c @@ -346,7 +346,9 @@ loop_routing_start(struct routes *routing, nir_builder *b, routing->brk.fork = fork; routing->brk.reachable = fork_reachable(fork); } - nir_push_loop(b); + + nir_loop *loop = nir_push_loop(b); + nir_loop_add_continue_construct(loop); } /** @@ -978,5 +980,8 @@ nir_lower_goto_ifs(nir_shader *shader) progress = true; } + if (progress) + nir_lower_continue_constructs(shader); + return progress; } diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c index 085f9842475..aa6370544a1 100644 --- a/src/compiler/nir/nir_validate.c +++ b/src/compiler/nir/nir_validate.c @@ -1251,8 +1251,11 @@ validate_jump_instr(nir_jump_instr *instr, validate_state *state) validate_assert(state, state->impl->structured); validate_assert(state, state->loop != NULL); if (state->loop) { - nir_block *cont_block = nir_loop_continue_target(state->loop); - validate_assert(state, block->successors[0] == cont_block); + validate_assert(state, nir_loop_has_continue_construct(state->loop)); + if (nir_loop_has_continue_construct(state->loop)) { + nir_block *cont_block = nir_loop_first_continue_block(state->loop); + validate_assert(state, block->successors[0] == cont_block); + } } validate_assert(state, block->successors[1] == NULL); validate_assert(state, instr->target == NULL); @@ -1503,14 +1506,13 @@ validate_block(nir_block *block, validate_state *state) if (next == NULL) { switch (state->parent_node->type) { case nir_cf_node_loop: { - if (block == nir_loop_last_block(state->loop)) { - nir_block *cont = nir_loop_continue_target(state->loop); - validate_assert(state, block->successors[0] == cont); + if (!nir_loop_has_continue_construct(state->loop) || + block == nir_loop_last_continue_block(state->loop)) { + nir_block *header = nir_loop_first_block(state->loop); + validate_assert(state, block->successors[0] == header); } else { - validate_assert(state, nir_loop_has_continue_construct(state->loop) && - block == nir_loop_last_continue_block(state->loop)); - nir_block *head = nir_loop_first_block(state->loop); - validate_assert(state, block->successors[0] == head); + nir_block *cont = nir_loop_first_continue_block(state->loop); + validate_assert(state, block->successors[0] == cont); } /* due to the hack for infinite loops, block->successors[1] may * point to the block after the loop. @@ -1617,6 +1619,7 @@ validate_loop(nir_loop *loop, validate_state *state) validate_assert(state, next_node->type == nir_cf_node_block); validate_assert(state, !exec_list_is_empty(&loop->body)); + validate_assert(state, nir_loop_first_block(loop)->predecessors.entries <= 2); nir_cf_node *old_parent = state->parent_node; state->parent_node = &loop->cf_node; diff --git a/src/compiler/nir/tests/opt_loop_tests.cpp b/src/compiler/nir/tests/opt_loop_tests.cpp index affa612d3c7..ebf9141ad43 100644 --- a/src/compiler/nir/tests/opt_loop_tests.cpp +++ b/src/compiler/nir/tests/opt_loop_tests.cpp @@ -565,19 +565,20 @@ TEST_F(nir_opt_loop_test, opt_loop_merge_terminators_skip_merge_if_phis_nested_l )")); } -TEST_F(nir_opt_loop_test, opt_loop_peel_initial_break_ends_with_jump) +TEST_F(nir_opt_loop_test, opt_loop_peel_initial_break_no_work) { + nir_variable *var = nir_variable_create(b->shader, nir_var_shader_temp, + glsl_int_type(), "dummy_var"); + nir_loop *loop = nir_push_loop(b); + /* do_work1() */ + nir_store_var(b, var, nir_imm_int(b, 0), 1); + /* the break we want to move down: */ nir_break_if(b, nir_imm_true(b)); - /* do_work_2: */ - nir_push_if(b, nir_imm_true(b)); - nir_jump(b, nir_jump_continue); - nir_pop_if(b, NULL); - nir_jump(b, nir_jump_return); - + /* No work afterwards. */ nir_pop_loop(b, loop); ASSERT_FALSE(nir_opt_loop(b->shader)); @@ -593,40 +594,32 @@ TEST_F(nir_opt_loop_test, opt_loop_peel_initial_break_ends_with_jump) decl_var shader_out INTERP_MODE_NONE none int out (FRAG_RESULT_DEPTH.x, 0, 0) decl_var ubo INTERP_MODE_NONE none int ubo1 (0, 0, 0) decl_var ubo INTERP_MODE_NONE none int[4] ubo_array (0, 0, 0) + decl_var INTERP_MODE_NONE none int dummy_var decl_function main () (entrypoint) impl main { block b0: // preds: 32 %0 = deref_var &in (shader_in int) 32 %1 = @load_deref (%0) (access=none) - // succs: b1 + // succs: b1 loop { - block b1: // preds: b0 b5 - 1 %2 = load_const (true) - // succs: b2 b3 - if %2 (true) { + block b1: // preds: b0 b4 + 32 %2 = load_const (0x00000000) + 32 %3 = deref_var &dummy_var (shader_temp int) + @store_deref (%3, %2 (0x0)) (wrmask=x, access=none) + 1 %4 = load_const (true) + // succs: b2 b3 + if %4 (true) { block b2:// preds: b1 break - // succs: b8 + // succs: b5 } else { block b3: // preds: b1, succs: b4 } - block b4: // preds: b3 - 1 %3 = load_const (true) - // succs: b5 b6 - if %3 (true) { - block b5:// preds: b4 - continue - // succs: b1 - } else { - block b6: // preds: b4, succs: b7 - } - block b7:// preds: b6 - return - // succs: b9 + block b4: // preds: b3, succs: b1 } - block b8: // preds: b2, succs: b9 - block b9: + block b5: // preds: b2, succs: b6 + block b6: } )")); } diff --git a/src/compiler/spirv/vtn_structured_cfg.c b/src/compiler/spirv/vtn_structured_cfg.c index af8cdd85f83..a1fdc84172a 100644 --- a/src/compiler/spirv/vtn_structured_cfg.c +++ b/src/compiler/spirv/vtn_structured_cfg.c @@ -1676,6 +1676,9 @@ vtn_emit_cf_func_structured(struct vtn_builder *b, struct vtn_function *func, next->nloop = nir_push_loop(&b->nb); nir_store_var(&b->nb, next->continue_var, nir_imm_false(&b->nb), 1); + if (!vtn_is_single_block_loop(next)) + nir_loop_add_continue_construct(next->nloop); + next->nloop->control = vtn_loop_control(b, block->merge[3]); break; diff --git a/src/freedreno/vulkan/tu_nir_lower_ray_query.cc b/src/freedreno/vulkan/tu_nir_lower_ray_query.cc index 28f27a0e474..97657ba33ed 100644 --- a/src/freedreno/vulkan/tu_nir_lower_ray_query.cc +++ b/src/freedreno/vulkan/tu_nir_lower_ray_query.cc @@ -10,6 +10,7 @@ #include "compiler/spirv/spirv.h" #include "nir_builder.h" +#include "nir_control_flow.h" #include "nir_deref.h" enum rq_intersection_var_index { @@ -560,8 +561,10 @@ build_ray_traversal(nir_builder *b, nir_deref_instr *rq, nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete"); nir_store_var(b, incomplete, nir_imm_true(b), 0x1); - nir_push_loop(b); + nir_loop *loop = nir_push_loop(b); { + nir_loop_add_continue_construct(loop); + /* Go up the stack if current_node == VK_BVH_INVALID_NODE */ nir_push_if(b, nir_ieq_imm(b, rq_load(b, rq, current_node), VK_BVH_INVALID_NODE)); { @@ -928,7 +931,7 @@ build_ray_traversal(nir_builder *b, nir_deref_instr *rq, } nir_pop_if(b, NULL); } - nir_pop_loop(b, NULL); + nir_pop_loop(b, loop); return nir_load_var(b, incomplete); } @@ -1035,6 +1038,9 @@ tu_nir_lower_ray_queries(nir_shader *shader) ralloc_free(query_ht); + if (progress) + nir_lower_continue_constructs(shader); + return progress; } diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index df0eddd87d2..edc771ee8f2 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -1995,7 +1995,7 @@ ttn_emit_instruction(struct ttn_compile *c) break; case TGSI_OPCODE_BGNLOOP: - nir_push_loop(&c->build); + nir_loop_add_continue_construct(nir_push_loop(&c->build)); break; case TGSI_OPCODE_BRK: @@ -2537,6 +2537,7 @@ ttn_finalize_nir(struct ttn_compile *c, struct pipe_screen *screen) MESA_TRACE_FUNC(); + NIR_PASS(_, nir, nir_lower_continue_constructs); NIR_PASS(_, nir, nir_lower_returns); NIR_PASS(_, nir, nir_lower_vars_to_ssa); NIR_PASS(_, nir, nir_lower_reg_intrinsics_to_ssa); diff --git a/src/gallium/frontends/lavapipe/lvp_ray_tracing_pipeline.c b/src/gallium/frontends/lavapipe/lvp_ray_tracing_pipeline.c index 098c39a40d1..c924092b366 100644 --- a/src/gallium/frontends/lavapipe/lvp_ray_tracing_pipeline.c +++ b/src/gallium/frontends/lavapipe/lvp_ray_tracing_pipeline.c @@ -1083,6 +1083,7 @@ lvp_compile_ray_tracing_pipeline(struct lvp_pipeline *pipeline, nir_shader_instructions_pass(b->shader, lvp_lower_ray_tracing_instr, nir_metadata_none, &compiler); + NIR_PASS(_, b->shader, nir_lower_continue_constructs); NIR_PASS(_, b->shader, nir_lower_returns); const struct nir_lower_compute_system_values_options compute_system_values = {0}; diff --git a/src/gallium/frontends/lavapipe/nir/lvp_nir_lower_ray_queries.c b/src/gallium/frontends/lavapipe/nir/lvp_nir_lower_ray_queries.c index cfe7d78e351..e16e587cab9 100644 --- a/src/gallium/frontends/lavapipe/nir/lvp_nir_lower_ray_queries.c +++ b/src/gallium/frontends/lavapipe/nir/lvp_nir_lower_ray_queries.c @@ -655,6 +655,7 @@ lvp_nir_lower_ray_queries(struct nir_shader *shader) ralloc_free(query_ht); if (progress) { + NIR_PASS(_, shader, nir_lower_continue_constructs); NIR_PASS(_, shader, nir_lower_global_vars_to_local); NIR_PASS(_, shader, nir_lower_vars_to_ssa); diff --git a/src/gallium/frontends/lavapipe/nir/lvp_nir_ray_tracing.c b/src/gallium/frontends/lavapipe/nir/lvp_nir_ray_tracing.c index c06a6b263b8..b752a0503a0 100644 --- a/src/gallium/frontends/lavapipe/nir/lvp_nir_ray_tracing.c +++ b/src/gallium/frontends/lavapipe/nir/lvp_nir_ray_tracing.c @@ -497,8 +497,10 @@ lvp_build_ray_traversal(nir_builder *b, const struct lvp_ray_traversal_args *arg .no_skip_aabbs = nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipAABBsKHRMask), 0), }; - nir_push_loop(b); + nir_loop *loop = nir_push_loop(b); { + nir_loop_add_continue_construct(loop); + nir_push_if(b, nir_ieq_imm(b, nir_load_deref(b, args->vars.current_node), LVP_BVH_INVALID_NODE)); { nir_push_if(b, nir_ieq_imm(b, nir_load_deref(b, args->vars.stack_ptr), 0)); @@ -607,7 +609,7 @@ lvp_build_ray_traversal(nir_builder *b, const struct lvp_ray_traversal_args *arg } nir_pop_if(b, NULL); } - nir_pop_loop(b, NULL); + nir_pop_loop(b, loop); return nir_load_var(b, incomplete); } diff --git a/src/microsoft/compiler/dxil_nir.c b/src/microsoft/compiler/dxil_nir.c index c81aa090bea..1f6b3cc1e4e 100644 --- a/src/microsoft/compiler/dxil_nir.c +++ b/src/microsoft/compiler/dxil_nir.c @@ -2113,10 +2113,10 @@ lower_subgroup_scan(nir_builder *b, nir_intrinsic_instr *intr, void *data) nir_pop_if(b, if_active_thread); nir_store_var(b, loop_counter_var, nir_iadd_imm(b, loop_counter, 1), 1); - nir_jump(b, nir_jump_continue); - nir_pop_if(b, nif); + nir_push_else(b, nif); nir_jump(b, nir_jump_break); + nir_pop_loop(b, loop); result = nir_load_var(b, result_var); diff --git a/src/microsoft/vulkan/dzn_nir.c b/src/microsoft/vulkan/dzn_nir.c index 2c86774cde4..196e82a88a4 100644 --- a/src/microsoft/vulkan/dzn_nir.c +++ b/src/microsoft/vulkan/dzn_nir.c @@ -374,7 +374,8 @@ dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(uint8_t old_index_size) * TODO: Might be a good thing to use use the CL compiler we have and turn * those shaders into CL kernels. */ - nir_push_loop(&b); + nir_loop *loop = nir_push_loop(&b); + nir_loop_add_continue_construct(loop); old_index_ptr = nir_load_var(&b, old_index_ptr_var); nir_def *index0 = nir_load_var(&b, index0_var); @@ -445,6 +446,8 @@ dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(uint8_t old_index_size) new_index_count_ptr_desc, nir_imm_int(&b, 0), .write_mask = 1, .access = ACCESS_NON_READABLE, .align_mul = 4); + nir_lower_continue_constructs(b.shader); + return b.shader; }