diff --git a/src/amd/compiler/instruction_selection/aco_instruction_selection.h b/src/amd/compiler/instruction_selection/aco_instruction_selection.h index 96e1bc310cd..0e2910b6e57 100644 --- a/src/amd/compiler/instruction_selection/aco_instruction_selection.h +++ b/src/amd/compiler/instruction_selection/aco_instruction_selection.h @@ -248,8 +248,8 @@ isel_context setup_isel_context(Program* program, unsigned shader_count, /* aco_isel_cfg.cpp */ void emit_loop_break(isel_context* ctx); void emit_loop_continue(isel_context* ctx); -void begin_loop(isel_context* ctx, loop_context* lc); -void end_loop(isel_context* ctx, loop_context* lc); +void begin_loop(isel_context* ctx); +void end_loop(isel_context* ctx); void begin_uniform_if_then(isel_context* ctx, if_context* ic, Temp cond); void begin_uniform_if_else(isel_context* ctx, if_context* ic, bool logical_else = true); void end_uniform_if(isel_context* ctx, if_context* ic, bool logical_else = true); diff --git a/src/amd/compiler/instruction_selection/aco_isel_cfg.cpp b/src/amd/compiler/instruction_selection/aco_isel_cfg.cpp index ba816d33f0d..b0c69b87338 100644 --- a/src/amd/compiler/instruction_selection/aco_isel_cfg.cpp +++ b/src/amd/compiler/instruction_selection/aco_isel_cfg.cpp @@ -116,15 +116,16 @@ update_exec_info(isel_context* ctx) } void -begin_loop(isel_context* ctx, loop_context* lc) +begin_loop(isel_context* ctx) { + loop_context lc; append_logical_end(ctx); ctx->block->kind |= block_kind_loop_preheader | block_kind_uniform; Builder bld(ctx->program, ctx->block); bld.branch(aco_opcode::p_branch); unsigned loop_preheader_idx = ctx->block->index; - lc->loop_exit.kind |= (block_kind_loop_exit | (ctx->block->kind & block_kind_top_level)); + lc.loop_exit.kind |= (block_kind_loop_exit | (ctx->block->kind & block_kind_top_level)); ctx->program->next_loop_depth++; @@ -135,17 +136,18 @@ begin_loop(isel_context* ctx, loop_context* lc) append_logical_start(ctx->block); - lc->cf_info_old = ctx->cf_info; - lc->header_idx = loop_header->index; + lc.cf_info_old = ctx->cf_info; + lc.header_idx = loop_header->index; ctx->cf_info.parent_loop = {false, false}; ctx->cf_info.parent_if.is_divergent = false; + ctx->loop_stack.push_back(std::move(lc)); /* Never enter a loop with empty exec mask. */ assert(!ctx->cf_info.exec.empty()); } void -end_loop(isel_context* ctx, loop_context* lc) +end_loop(isel_context* ctx) { /* No need to check exec.potentially_empty_break/continue originating inside the loop. In the * only case where it's possible at this point (divergent break after divergent continue), we @@ -153,7 +155,8 @@ end_loop(isel_context* ctx, loop_context* lc) * divergent control flow requires WQM. */ assert(!ctx->cf_info.exec.potentially_empty_discard); - Block& header = ctx->program->blocks[lc->header_idx]; + loop_context& lc = ctx->loop_stack.back(); + Block& header = ctx->program->blocks[lc.header_idx]; /* Add the trivial continue. */ if (!ctx->cf_info.has_branch) { @@ -176,14 +179,16 @@ end_loop(isel_context* ctx, loop_context* lc) /* emit loop successor block */ ctx->program->next_loop_depth--; - ctx->block = ctx->program->insert_block(std::move(lc->loop_exit)); + ctx->block = ctx->program->insert_block(std::move(lc.loop_exit)); append_logical_start(ctx->block); /* Propagate information about discards and restore previous CF info. */ - lc->cf_info_old.exec.potentially_empty_discard |= ctx->cf_info.exec.potentially_empty_discard; - lc->cf_info_old.had_divergent_discard |= ctx->cf_info.had_divergent_discard; - ctx->cf_info = lc->cf_info_old; + lc.cf_info_old.exec.potentially_empty_discard |= ctx->cf_info.exec.potentially_empty_discard; + lc.cf_info_old.had_divergent_discard |= ctx->cf_info.had_divergent_discard; + ctx->cf_info = lc.cf_info_old; update_exec_info(ctx); + + ctx->loop_stack.pop_back(); } void diff --git a/src/amd/compiler/instruction_selection/aco_select_nir.cpp b/src/amd/compiler/instruction_selection/aco_select_nir.cpp index 637e20482ec..c1289d12a92 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir.cpp @@ -942,16 +942,14 @@ visit_loop(isel_context* ctx, nir_loop* loop) { assert(!nir_loop_has_continue_construct(loop)); - ctx->loop_stack.push_back(loop_context()); - begin_loop(ctx, &ctx->loop_stack.back()); + begin_loop(ctx); ctx->cf_info.parent_loop.has_divergent_break = loop->divergent_break && nir_block_num_preds(nir_loop_first_block(loop)) > 1; ctx->cf_info.in_divergent_cf |= ctx->cf_info.parent_loop.has_divergent_break; visit_cf_list(ctx, &loop->body); - end_loop(ctx, &ctx->loop_stack.back()); - ctx->loop_stack.pop_back(); + end_loop(ctx); } void diff --git a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp index 49840821d46..c8675e6245f 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp @@ -3773,8 +3773,7 @@ pops_await_overlapped_waves(isel_context* ctx) /* Await the overlapped waves. */ - ctx->loop_stack.push_back(loop_context()); - begin_loop(ctx, &ctx->loop_stack.back()); + begin_loop(ctx); bld.reset(ctx->block); const Temp exiting_wave_id = bld.pseudo(aco_opcode::p_pops_gfx9_add_exiting_wave_id, bld.def(s1), @@ -3795,7 +3794,7 @@ pops_await_overlapped_waves(isel_context* ctx) /* Sleep before rechecking to let overlapped waves run for some time. */ bld.sopp(aco_opcode::s_sleep, ctx->program->gfx_level >= GFX10 ? UINT16_MAX : 3); - end_loop(ctx, &ctx->loop_stack.back()); + end_loop(ctx); bld.reset(ctx->block); /* Indicate the wait has been done to subsequent compilation stages. */ diff --git a/src/amd/compiler/instruction_selection/aco_select_trap_handler.cpp b/src/amd/compiler/instruction_selection/aco_select_trap_handler.cpp index d93dca99d5c..59081207715 100644 --- a/src/amd/compiler/instruction_selection/aco_select_trap_handler.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_trap_handler.cpp @@ -143,8 +143,7 @@ dump_vgprs_to_mem(isel_context* ctx, Builder& bld, Operand rsrc) Operand(PhysReg{1}, s1) /* SRC0 mode */); } - ctx->loop_stack.push_back(loop_context()); - begin_loop(ctx, &ctx->loop_stack.back()); + begin_loop(ctx); { bld.reset(ctx->block); @@ -177,7 +176,7 @@ dump_vgprs_to_mem(isel_context* ctx, Builder& bld, Operand rsrc) } end_uniform_if(ctx, &loop_break); } - end_loop(ctx, &ctx->loop_stack.back()); + end_loop(ctx); bld.reset(ctx->block); if (ctx->program->gfx_level < GFX10) { @@ -240,8 +239,7 @@ dump_lds_to_mem(isel_context* ctx, Builder& bld, Operand rsrc) Operand m = load_lds_size_m0(bld); - ctx->loop_stack.push_back(loop_context()); - begin_loop(ctx, &ctx->loop_stack.back()); + begin_loop(ctx); { bld.reset(ctx->block); @@ -274,7 +272,7 @@ dump_lds_to_mem(isel_context* ctx, Builder& bld, Operand rsrc) } end_uniform_if(ctx, &loop_break); } - end_loop(ctx, &ctx->loop_stack.back()); + end_loop(ctx); bld.reset(ctx->block); } begin_uniform_if_else(ctx, &ic);