diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp index 21260f883ac..0a4c80dab6e 100644 --- a/src/amd/compiler/aco_validate.cpp +++ b/src/amd/compiler/aco_validate.cpp @@ -1171,14 +1171,14 @@ validate_cfg(Program* program) "logical successors must be sorted", &block); /* critical edges are not allowed */ - if (block.linear_preds.size() > 1) { + if (block.linear_preds.size() > 1) for (unsigned pred : block.linear_preds) check_block(program->blocks[pred].linear_succs.size() == 1, "linear critical edges are not allowed", &program->blocks[pred]); + if (block.logical_preds.size() > 1) for (unsigned pred : block.logical_preds) check_block(program->blocks[pred].logical_succs.size() == 1, "logical critical edges are not allowed", &program->blocks[pred]); - } } return is_valid; diff --git a/src/amd/compiler/instruction_selection/aco_isel_cfg.cpp b/src/amd/compiler/instruction_selection/aco_isel_cfg.cpp index af50c33ee45..a2a323ceb0e 100644 --- a/src/amd/compiler/instruction_selection/aco_isel_cfg.cpp +++ b/src/amd/compiler/instruction_selection/aco_isel_cfg.cpp @@ -352,16 +352,17 @@ void begin_divergent_if_else(isel_context* ctx, if_context* ic, nir_selection_control sel_ctrl) { Block* BB_then_logical = ctx->block; - if (!ctx->cf_info.has_divergent_branch) { - append_logical_end(ctx); - add_logical_edge(BB_then_logical->index, &ic->BB_endif); - } + if (ctx->cf_info.has_divergent_branch) + return; + + append_logical_end(ctx); /* branch from logical then block to invert block */ aco_ptr branch; branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); BB_then_logical->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_then_logical->index, &ic->BB_invert); + add_logical_edge(BB_then_logical->index, &ic->BB_endif); BB_then_logical->kind |= block_kind_uniform; assert(!ctx->cf_info.has_branch); ctx->cf_info.has_divergent_branch = false; @@ -420,12 +421,16 @@ end_divergent_if(isel_context* ctx, if_context* ic) ctx->program->next_divergent_if_logical_depth--; assert(!ctx->cf_info.has_branch); - ctx->cf_info.has_divergent_branch = false; /** emit linear else block */ Block* BB_else_linear = ctx->program->create_and_insert_block(); BB_else_linear->kind |= block_kind_uniform; - add_linear_edge(ic->invert_idx, BB_else_linear); + if (ctx->cf_info.has_divergent_branch) { + add_linear_edge(ic->BB_if_idx, BB_else_linear); + add_logical_edge(ic->BB_if_idx, &ic->BB_endif); + } else { + add_linear_edge(ic->invert_idx, BB_else_linear); + } /* branch from linear else block to endif block */ branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); @@ -436,6 +441,7 @@ end_divergent_if(isel_context* ctx, if_context* ic) ctx->block = ctx->program->insert_block(std::move(ic->BB_endif)); append_logical_start(ctx->block); + ctx->cf_info.has_divergent_branch = false; ctx->cf_info.parent_if = ic->cf_info_old.parent_if; ctx->cf_info.had_divergent_discard |= ic->cf_info_old.had_divergent_discard; ctx->cf_info.in_divergent_cf = ic->cf_info_old.in_divergent_cf || diff --git a/src/amd/compiler/instruction_selection/aco_select_nir.cpp b/src/amd/compiler/instruction_selection/aco_select_nir.cpp index 03019d3b667..e10c5900590 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir.cpp @@ -1008,7 +1008,17 @@ visit_if(isel_context* ctx, nir_if* if_stmt) * \ / * BB_ENDIF * - * *) Exceptions may be due to break and continue statements within loops + * + * Exceptions may be due to break and continue statements within loops: + * + * The linear CFG: + * BB_IF + * / \ + * BB_THEN (logical) \ + * / \ \ + * BB_JUMP BB_CONTINUE BB_ELSE (all linear) + * \ / + * BB_ENDIF **/ begin_divergent_if_then(ctx, &ic, cond, if_stmt->control); diff --git a/src/amd/compiler/tests/test_isel.cpp b/src/amd/compiler/tests/test_isel.cpp index 2d9604f78c8..eab9baabc28 100644 --- a/src/amd/compiler/tests/test_isel.cpp +++ b/src/amd/compiler/tests/test_isel.cpp @@ -339,10 +339,8 @@ BEGIN_TEST(isel.cf.unreachable_continue.divergent_break) nir_push_else(nb, NULL); { /* The contents of this branch is moved to the merge block. */ - //>> BB7 - //! /* logical preds: BB1, / linear preds: BB6, / kind: uniform, */ - //>> BB9 - //! /* logical preds: BB7, / linear preds: BB7, BB8, / kind: uniform, break, merge, */ + //>> BB6 + //! /* logical preds: BB1, / linear preds: BB4, BB5, / kind: uniform, break, merge, */ //! p_logical_start //! s1: %_ = p_unit_test 5 //! p_logical_end @@ -500,10 +498,8 @@ BEGIN_TEST(isel.cf.unreachable_continue.mixed_break) nir_push_else(nb, NULL); { /* The contents of this branch is moved to the merge block. */ - //>> BB9 - //! /* logical preds: BB3, / linear preds: BB8, / kind: uniform, */ - //>> BB11 - //! /* logical preds: BB9, / linear preds: BB9, BB10, / kind: uniform, break, merge, */ + //>> BB8 + //! /* logical preds: BB3, / linear preds: BB6, BB7, / kind: uniform, break, merge, */ //! p_logical_start //! s1: %_ = p_unit_test 6 nir_unit_test_uniform_input(nb, 1, 32, .base=6); @@ -520,8 +516,8 @@ BEGIN_TEST(isel.cf.unreachable_continue.mixed_break) nir_phi_instr_add_src(phi[1], nir_loop_last_block(loop), cont1); } nir_pop_loop(nb, NULL); - //>> BB12 - //! /* logical preds: BB2, BB4, BB11, / linear preds: BB2, BB5, BB11, / kind: uniform, top-level, loop-exit, */ + //>> BB9 + //! /* logical preds: BB2, BB4, BB8, / linear preds: BB2, BB5, BB8, / kind: uniform, top-level, loop-exit, */ nb->cursor = nir_after_phis(nir_loop_first_block(loop)); nir_builder_instr_insert(nb, &phi[0]->instr); @@ -613,10 +609,8 @@ BEGIN_TEST(isel.cf.unreachable_continue.nested_mixed_break) nir_push_else(nb, NULL); { /* The contents of this branch is moved to the merge block. */ - //>> BB11 - //! /* logical preds: BB5, / linear preds: BB10, / kind: uniform, */ - //>> BB13 - //! /* logical preds: BB11, / linear preds: BB11, BB12, / kind: uniform, break, merge, */ + //>> BB10 + //! /* logical preds: BB5, / linear preds: BB8, BB9, / kind: uniform, break, merge, */ nir_jump(nb, nir_jump_break); } nir_pop_if(nb, NULL); @@ -698,8 +692,8 @@ BEGIN_TEST(isel.cf.divergent_if_branch_use) nir_push_else(nb, NULL); { /* The contents of this branch is moved to the merge block. */ - //>> BB9 - //! /* logical preds: BB7, / linear preds: BB7, BB8, / kind: uniform, continue, merge, */ + //>> BB6 + //! /* logical preds: BB1, / linear preds: BB4, BB5, / kind: uniform, continue, merge, */ //! p_logical_start //! s1: %val = p_unit_test 0 val = nir_unit_test_uniform_input(nb, 1, 32, .base=0); @@ -746,18 +740,18 @@ BEGIN_TEST(isel.cf.uniform_if_branch_use) nir_def *val; nir_push_if(nb, nir_unit_test_uniform_input(nb, 1, 1, .base=2)); { - //>> BB10 - //! /* logical preds: BB9, / linear preds: BB9, / kind: break, */ + //>> BB7 + //! /* logical preds: BB6, / linear preds: BB6, / kind: break, */ nir_jump(nb, nir_jump_break); } nir_push_else(nb, NULL); { /* The contents of this branch is moved to the merge block. */ - //>> BB14 - //! /* logical preds: BB13, / linear preds: BB12, BB13, / kind: uniform, */ + //>> BB11 + //! /* logical preds: BB10, / linear preds: BB9, BB10, / kind: uniform, */ //>> p_cbranch_z %0:exec rarely_taken - //! BB15 - //! /* logical preds: BB14, / linear preds: BB14, / kind: uniform, */ + //! BB12 + //! /* logical preds: BB11, / linear preds: BB11, / kind: uniform, */ //! p_logical_start //! s1: %val = p_unit_test 0 val = nir_unit_test_uniform_input(nb, 1, 32, .base=0); @@ -767,8 +761,8 @@ BEGIN_TEST(isel.cf.uniform_if_branch_use) //! p_unit_test 1, %val nir_unit_test_output(nb, val, .base=1); - //>> BB17 - //! /* logical preds: BB15, / linear preds: BB15, BB16, / kind: uniform, continue, */ + //>> BB14 + //! /* logical preds: BB12, / linear preds: BB12, BB13, / kind: uniform, continue, */ } nir_pop_loop(nb, NULL); @@ -798,7 +792,7 @@ BEGIN_TEST(isel.cf.hidden_continue) nir_loop* loop = nir_push_loop(nb); { //>> BB1 - //! /* logical preds: BB0, BB2, / linear preds: BB0, BB3, BB11, / kind: loop-header, branch, */ + //! /* logical preds: BB0, BB2, / linear preds: BB0, BB3, BB8, / kind: loop-header, branch, */ //! s1: %2 = p_linear_phi %init, %cont, %phi phi = nir_phi_instr_create(nb->shader); nir_def_init(&phi->instr, &phi->def, 1, 32); @@ -816,13 +810,11 @@ BEGIN_TEST(isel.cf.hidden_continue) } nir_pop_if(nb, NULL); //>> BB6 - //! /* logical preds: / linear preds: BB4, BB5, / kind: invert, */ + //! /* logical preds: BB1, / linear preds: BB4, BB5, / kind: break, merge, */ //! s1: %phi = p_linear_phi %cont, s1: undef - //>> BB9 - //! /* logical preds: BB7, / linear preds: BB7, BB8, / kind: break, merge, */ - //>> BB11 - //! /* logical preds: / linear preds: BB9, / kind: uniform, continue, */ + //>> BB8 + //! /* logical preds: / linear preds: BB6, / kind: uniform, continue, */ nir_jump(nb, nir_jump_break); } nir_pop_loop(nb, NULL); @@ -910,14 +902,14 @@ END_TEST * b = phi(a); * } */ -BEGIN_TEST(isel.cf.divergent_if_undef.break) +BEGIN_TEST(isel.cf.divergent_if_phi.break) if (!setup_nir_cs(GFX11)) return; nir_push_loop(nb); { //>> BB1 - //! /* logical preds: BB0, BB9, / linear preds: BB0, BB9, / kind: loop-header, branch, */ + //! /* logical preds: BB0, BB6, / linear preds: BB0, BB6, / kind: loop-header, branch, */ //! p_logical_start //! s1: %val = p_unit_test 0 //! s2: %_ = p_unit_test 2 @@ -931,10 +923,11 @@ BEGIN_TEST(isel.cf.divergent_if_undef.break) nir_push_else(nb, NULL); {} nir_pop_if(nb, NULL); + /* As the ELSE gets omitted, the logical predecessor dominates both linear predecessors. */ - //>> BB9 - //! /* logical preds: BB7, / linear preds: BB7, BB8, / kind: uniform, continue, merge, */ - //! s1: %phi = p_linear_phi %val, s1: undef + //>> BB6 + //! /* logical preds: BB1, / linear preds: BB4, BB5, / kind: uniform, continue, merge, */ + //! s1: %phi = p_linear_phi %val, %val nir_phi_instr* phi = nir_phi_instr_create(nb->shader); nir_phi_instr_add_src(phi, nir_if_last_else_block(nif), val); nir_def_init(&phi->instr, &phi->def, 1, 32); @@ -1165,24 +1158,24 @@ BEGIN_TEST(isel.cf.empty_exec.loop_break) //>> BB3 //! /* logical preds: BB2, / linear preds: BB2, / kind: break, */ nir_break_if(nb, nir_unit_test_divergent_input(nb, 1, 1, .base = 2)); - //>> BB10 - //! /* logical preds: BB8, / linear preds: BB8, BB9, / kind: uniform, merge, */ + //>> BB7 + //! /* logical preds: BB2, / linear preds: BB5, BB6, / kind: uniform, merge, */ //>> p_cbranch_z %0:exec rarely_taken - //>> BB11 + //>> BB8 //>> p_unit_test 3, %_ nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 3); } nir_pop_if(nb, NULL); - //>> BB18 - //! /* logical preds: BB13, BB16, / linear preds: BB16, BB17, / kind: uniform, continue, merge, */ + //>> BB15 + //! /* logical preds: BB10, BB13, / linear preds: BB13, BB14, / kind: uniform, continue, merge, */ //! p_logical_start //! p_unit_test 4, %_ nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 4); } nir_pop_loop(nb, NULL); - //>> BB19 + //>> BB16 //! /* logical preds: BB3, / linear preds: BB4, / kind: uniform, top-level, loop-exit, */ //! p_logical_start @@ -1225,24 +1218,24 @@ BEGIN_TEST(isel.cf.empty_exec.loop_continue) nir_jump(nb, nir_jump_continue); } nir_pop_if(nb, NULL); - //>> BB12 - //! /* logical preds: BB10, / linear preds: BB10, BB11, / kind: uniform, merge, */ + //>> BB9 + //! /* logical preds: BB4, / linear preds: BB7, BB8, / kind: uniform, merge, */ //>> p_cbranch_z %0:exec rarely_taken - //>> BB13 + //>> BB10 //>> p_unit_test 3, %_ nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 3); } nir_pop_if(nb, NULL); - //>> BB20 - //! /* logical preds: BB15, BB18, / linear preds: BB18, BB19, / kind: uniform, continue, merge, */ + //>> BB17 + //! /* logical preds: BB12, BB15, / linear preds: BB15, BB16, / kind: uniform, continue, merge, */ //! p_logical_start //! p_unit_test 4, %_ nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 4); } nir_pop_loop(nb, NULL); - //>> BB21 + //>> BB18 //! /* logical preds: BB2, / linear preds: BB2, / kind: uniform, top-level, loop-exit, */ //! p_logical_start @@ -1270,7 +1263,7 @@ BEGIN_TEST(isel.cf.empty_exec.loop_continue_then_break) nir_push_loop(nb); { //>> BB1 - //! /* logical preds: BB0, BB2, BB20, / linear preds: BB0, BB3, BB20, / kind: loop-header, branch, */ + //! /* logical preds: BB0, BB2, BB14, / linear preds: BB0, BB3, BB14, / kind: loop-header, branch, */ //>> p_unit_test 0, %_ nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 0); @@ -1283,30 +1276,30 @@ BEGIN_TEST(isel.cf.empty_exec.loop_continue_then_break) } nir_pop_if(nb, NULL); - //>> BB9 - //! /* logical preds: BB7, / linear preds: BB7, BB8, / kind: branch, merge, */ + //>> BB6 + //! /* logical preds: BB1, / linear preds: BB4, BB5, / kind: branch, merge, */ //>> p_unit_test 2, %_ nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 2); //>> s2: %_ = p_unit_test 3 - //>> BB10 - //! /* logical preds: BB9, / linear preds: BB9, / kind: break, */ + //>> BB7 + //! /* logical preds: BB6, / linear preds: BB6, / kind: break, */ nir_break_if(nb, nir_unit_test_divergent_input(nb, 1, 1, .base = 3)); - //>> BB17 - //! /* logical preds: BB15, / linear preds: BB15, BB16, / kind: uniform, merge, */ + //>> BB11 + //! /* logical preds: BB6, / linear preds: BB9, BB10, / kind: uniform, merge, */ //>> p_cbranch_z %0:exec rarely_taken - //>> BB18 - //! /* logical preds: BB17, / linear preds: BB17, / kind: uniform, */ + //>> BB12 + //! /* logical preds: BB11, / linear preds: BB11, / kind: uniform, */ //>> p_unit_test 4, %_ nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 4); - //>> BB20 - //! /* logical preds: BB18, / linear preds: BB18, BB19, / kind: uniform, continue, */ + //>> BB14 + //! /* logical preds: BB12, / linear preds: BB12, BB13, / kind: uniform, continue, */ } nir_pop_loop(nb, NULL); - //>> BB21 - //! /* logical preds: BB10, / linear preds: BB11, / kind: uniform, top-level, loop-exit, */ + //>> BB15 + //! /* logical preds: BB7, / linear preds: BB8, / kind: uniform, top-level, loop-exit, */ //! p_logical_start //! p_unit_test 5, %_