aco/isel: Don't emit ELSE side of divergent branches which jump

Totals from 50 (0.06% of 84383) affected shaders: (Navi48)

Instrs: 402490 -> 402444 (-0.01%); split: -0.01%, +0.00%
CodeSize: 2239024 -> 2238864 (-0.01%); split: -0.01%, +0.00%
SpillSGPRs: 1493 -> 1496 (+0.20%)
Latency: 5836785 -> 5836747 (-0.00%); split: -0.00%, +0.00%
InvThroughput: 1120893 -> 1120909 (+0.00%); split: -0.00%, +0.00%
Copies: 46128 -> 46082 (-0.10%)
VALU: 222708 -> 222715 (+0.00%); split: -0.00%, +0.00%
SALU: 53039 -> 52993 (-0.09%)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39519>
This commit is contained in:
Daniel Schürmann 2026-01-21 14:09:52 +01:00 committed by Marge Bot
parent ba32219cf8
commit fbf2083b8f
4 changed files with 78 additions and 69 deletions

View file

@ -1171,14 +1171,14 @@ validate_cfg(Program* program)
"logical successors must be sorted", &block);
/* critical edges are not allowed */
if (block.linear_preds.size() > 1) {
if (block.linear_preds.size() > 1)
for (unsigned pred : block.linear_preds)
check_block(program->blocks[pred].linear_succs.size() == 1,
"linear critical edges are not allowed", &program->blocks[pred]);
if (block.logical_preds.size() > 1)
for (unsigned pred : block.logical_preds)
check_block(program->blocks[pred].logical_succs.size() == 1,
"logical critical edges are not allowed", &program->blocks[pred]);
}
}
return is_valid;

View file

@ -352,16 +352,17 @@ void
begin_divergent_if_else(isel_context* ctx, if_context* ic, nir_selection_control sel_ctrl)
{
Block* BB_then_logical = ctx->block;
if (!ctx->cf_info.has_divergent_branch) {
append_logical_end(ctx);
add_logical_edge(BB_then_logical->index, &ic->BB_endif);
}
if (ctx->cf_info.has_divergent_branch)
return;
append_logical_end(ctx);
/* branch from logical then block to invert block */
aco_ptr<Instruction> branch;
branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
BB_then_logical->instructions.emplace_back(std::move(branch));
add_linear_edge(BB_then_logical->index, &ic->BB_invert);
add_logical_edge(BB_then_logical->index, &ic->BB_endif);
BB_then_logical->kind |= block_kind_uniform;
assert(!ctx->cf_info.has_branch);
ctx->cf_info.has_divergent_branch = false;
@ -420,12 +421,16 @@ end_divergent_if(isel_context* ctx, if_context* ic)
ctx->program->next_divergent_if_logical_depth--;
assert(!ctx->cf_info.has_branch);
ctx->cf_info.has_divergent_branch = false;
/** emit linear else block */
Block* BB_else_linear = ctx->program->create_and_insert_block();
BB_else_linear->kind |= block_kind_uniform;
add_linear_edge(ic->invert_idx, BB_else_linear);
if (ctx->cf_info.has_divergent_branch) {
add_linear_edge(ic->BB_if_idx, BB_else_linear);
add_logical_edge(ic->BB_if_idx, &ic->BB_endif);
} else {
add_linear_edge(ic->invert_idx, BB_else_linear);
}
/* branch from linear else block to endif block */
branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
@ -436,6 +441,7 @@ end_divergent_if(isel_context* ctx, if_context* ic)
ctx->block = ctx->program->insert_block(std::move(ic->BB_endif));
append_logical_start(ctx->block);
ctx->cf_info.has_divergent_branch = false;
ctx->cf_info.parent_if = ic->cf_info_old.parent_if;
ctx->cf_info.had_divergent_discard |= ic->cf_info_old.had_divergent_discard;
ctx->cf_info.in_divergent_cf = ic->cf_info_old.in_divergent_cf ||

View file

@ -1008,7 +1008,17 @@ visit_if(isel_context* ctx, nir_if* if_stmt)
* \ /
* BB_ENDIF
*
* *) Exceptions may be due to break and continue statements within loops
*
* Exceptions may be due to break and continue statements within loops:
*
* The linear CFG:
* BB_IF
* / \
* BB_THEN (logical) \
* / \ \
* BB_JUMP BB_CONTINUE BB_ELSE (all linear)
* \ /
* BB_ENDIF
**/
begin_divergent_if_then(ctx, &ic, cond, if_stmt->control);

View file

@ -339,10 +339,8 @@ BEGIN_TEST(isel.cf.unreachable_continue.divergent_break)
nir_push_else(nb, NULL);
{
/* The contents of this branch is moved to the merge block. */
//>> BB7
//! /* logical preds: BB1, / linear preds: BB6, / kind: uniform, */
//>> BB9
//! /* logical preds: BB7, / linear preds: BB7, BB8, / kind: uniform, break, merge, */
//>> BB6
//! /* logical preds: BB1, / linear preds: BB4, BB5, / kind: uniform, break, merge, */
//! p_logical_start
//! s1: %_ = p_unit_test 5
//! p_logical_end
@ -500,10 +498,8 @@ BEGIN_TEST(isel.cf.unreachable_continue.mixed_break)
nir_push_else(nb, NULL);
{
/* The contents of this branch is moved to the merge block. */
//>> BB9
//! /* logical preds: BB3, / linear preds: BB8, / kind: uniform, */
//>> BB11
//! /* logical preds: BB9, / linear preds: BB9, BB10, / kind: uniform, break, merge, */
//>> BB8
//! /* logical preds: BB3, / linear preds: BB6, BB7, / kind: uniform, break, merge, */
//! p_logical_start
//! s1: %_ = p_unit_test 6
nir_unit_test_uniform_input(nb, 1, 32, .base=6);
@ -520,8 +516,8 @@ BEGIN_TEST(isel.cf.unreachable_continue.mixed_break)
nir_phi_instr_add_src(phi[1], nir_loop_last_block(loop), cont1);
}
nir_pop_loop(nb, NULL);
//>> BB12
//! /* logical preds: BB2, BB4, BB11, / linear preds: BB2, BB5, BB11, / kind: uniform, top-level, loop-exit, */
//>> BB9
//! /* logical preds: BB2, BB4, BB8, / linear preds: BB2, BB5, BB8, / kind: uniform, top-level, loop-exit, */
nb->cursor = nir_after_phis(nir_loop_first_block(loop));
nir_builder_instr_insert(nb, &phi[0]->instr);
@ -613,10 +609,8 @@ BEGIN_TEST(isel.cf.unreachable_continue.nested_mixed_break)
nir_push_else(nb, NULL);
{
/* The contents of this branch is moved to the merge block. */
//>> BB11
//! /* logical preds: BB5, / linear preds: BB10, / kind: uniform, */
//>> BB13
//! /* logical preds: BB11, / linear preds: BB11, BB12, / kind: uniform, break, merge, */
//>> BB10
//! /* logical preds: BB5, / linear preds: BB8, BB9, / kind: uniform, break, merge, */
nir_jump(nb, nir_jump_break);
}
nir_pop_if(nb, NULL);
@ -698,8 +692,8 @@ BEGIN_TEST(isel.cf.divergent_if_branch_use)
nir_push_else(nb, NULL);
{
/* The contents of this branch is moved to the merge block. */
//>> BB9
//! /* logical preds: BB7, / linear preds: BB7, BB8, / kind: uniform, continue, merge, */
//>> BB6
//! /* logical preds: BB1, / linear preds: BB4, BB5, / kind: uniform, continue, merge, */
//! p_logical_start
//! s1: %val = p_unit_test 0
val = nir_unit_test_uniform_input(nb, 1, 32, .base=0);
@ -746,18 +740,18 @@ BEGIN_TEST(isel.cf.uniform_if_branch_use)
nir_def *val;
nir_push_if(nb, nir_unit_test_uniform_input(nb, 1, 1, .base=2));
{
//>> BB10
//! /* logical preds: BB9, / linear preds: BB9, / kind: break, */
//>> BB7
//! /* logical preds: BB6, / linear preds: BB6, / kind: break, */
nir_jump(nb, nir_jump_break);
}
nir_push_else(nb, NULL);
{
/* The contents of this branch is moved to the merge block. */
//>> BB14
//! /* logical preds: BB13, / linear preds: BB12, BB13, / kind: uniform, */
//>> BB11
//! /* logical preds: BB10, / linear preds: BB9, BB10, / kind: uniform, */
//>> p_cbranch_z %0:exec rarely_taken
//! BB15
//! /* logical preds: BB14, / linear preds: BB14, / kind: uniform, */
//! BB12
//! /* logical preds: BB11, / linear preds: BB11, / kind: uniform, */
//! p_logical_start
//! s1: %val = p_unit_test 0
val = nir_unit_test_uniform_input(nb, 1, 32, .base=0);
@ -767,8 +761,8 @@ BEGIN_TEST(isel.cf.uniform_if_branch_use)
//! p_unit_test 1, %val
nir_unit_test_output(nb, val, .base=1);
//>> BB17
//! /* logical preds: BB15, / linear preds: BB15, BB16, / kind: uniform, continue, */
//>> BB14
//! /* logical preds: BB12, / linear preds: BB12, BB13, / kind: uniform, continue, */
}
nir_pop_loop(nb, NULL);
@ -798,7 +792,7 @@ BEGIN_TEST(isel.cf.hidden_continue)
nir_loop* loop = nir_push_loop(nb);
{
//>> BB1
//! /* logical preds: BB0, BB2, / linear preds: BB0, BB3, BB11, / kind: loop-header, branch, */
//! /* logical preds: BB0, BB2, / linear preds: BB0, BB3, BB8, / kind: loop-header, branch, */
//! s1: %2 = p_linear_phi %init, %cont, %phi
phi = nir_phi_instr_create(nb->shader);
nir_def_init(&phi->instr, &phi->def, 1, 32);
@ -816,13 +810,11 @@ BEGIN_TEST(isel.cf.hidden_continue)
}
nir_pop_if(nb, NULL);
//>> BB6
//! /* logical preds: / linear preds: BB4, BB5, / kind: invert, */
//! /* logical preds: BB1, / linear preds: BB4, BB5, / kind: break, merge, */
//! s1: %phi = p_linear_phi %cont, s1: undef
//>> BB9
//! /* logical preds: BB7, / linear preds: BB7, BB8, / kind: break, merge, */
//>> BB11
//! /* logical preds: / linear preds: BB9, / kind: uniform, continue, */
//>> BB8
//! /* logical preds: / linear preds: BB6, / kind: uniform, continue, */
nir_jump(nb, nir_jump_break);
}
nir_pop_loop(nb, NULL);
@ -910,14 +902,14 @@ END_TEST
* b = phi(a);
* }
*/
BEGIN_TEST(isel.cf.divergent_if_undef.break)
BEGIN_TEST(isel.cf.divergent_if_phi.break)
if (!setup_nir_cs(GFX11))
return;
nir_push_loop(nb);
{
//>> BB1
//! /* logical preds: BB0, BB9, / linear preds: BB0, BB9, / kind: loop-header, branch, */
//! /* logical preds: BB0, BB6, / linear preds: BB0, BB6, / kind: loop-header, branch, */
//! p_logical_start
//! s1: %val = p_unit_test 0
//! s2: %_ = p_unit_test 2
@ -931,10 +923,11 @@ BEGIN_TEST(isel.cf.divergent_if_undef.break)
nir_push_else(nb, NULL);
{}
nir_pop_if(nb, NULL);
/* As the ELSE gets omitted, the logical predecessor dominates both linear predecessors. */
//>> BB9
//! /* logical preds: BB7, / linear preds: BB7, BB8, / kind: uniform, continue, merge, */
//! s1: %phi = p_linear_phi %val, s1: undef
//>> BB6
//! /* logical preds: BB1, / linear preds: BB4, BB5, / kind: uniform, continue, merge, */
//! s1: %phi = p_linear_phi %val, %val
nir_phi_instr* phi = nir_phi_instr_create(nb->shader);
nir_phi_instr_add_src(phi, nir_if_last_else_block(nif), val);
nir_def_init(&phi->instr, &phi->def, 1, 32);
@ -1165,24 +1158,24 @@ BEGIN_TEST(isel.cf.empty_exec.loop_break)
//>> BB3
//! /* logical preds: BB2, / linear preds: BB2, / kind: break, */
nir_break_if(nb, nir_unit_test_divergent_input(nb, 1, 1, .base = 2));
//>> BB10
//! /* logical preds: BB8, / linear preds: BB8, BB9, / kind: uniform, merge, */
//>> BB7
//! /* logical preds: BB2, / linear preds: BB5, BB6, / kind: uniform, merge, */
//>> p_cbranch_z %0:exec rarely_taken
//>> BB11
//>> BB8
//>> p_unit_test 3, %_
nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 3);
}
nir_pop_if(nb, NULL);
//>> BB18
//! /* logical preds: BB13, BB16, / linear preds: BB16, BB17, / kind: uniform, continue, merge, */
//>> BB15
//! /* logical preds: BB10, BB13, / linear preds: BB13, BB14, / kind: uniform, continue, merge, */
//! p_logical_start
//! p_unit_test 4, %_
nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 4);
}
nir_pop_loop(nb, NULL);
//>> BB19
//>> BB16
//! /* logical preds: BB3, / linear preds: BB4, / kind: uniform, top-level, loop-exit, */
//! p_logical_start
@ -1225,24 +1218,24 @@ BEGIN_TEST(isel.cf.empty_exec.loop_continue)
nir_jump(nb, nir_jump_continue);
}
nir_pop_if(nb, NULL);
//>> BB12
//! /* logical preds: BB10, / linear preds: BB10, BB11, / kind: uniform, merge, */
//>> BB9
//! /* logical preds: BB4, / linear preds: BB7, BB8, / kind: uniform, merge, */
//>> p_cbranch_z %0:exec rarely_taken
//>> BB13
//>> BB10
//>> p_unit_test 3, %_
nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 3);
}
nir_pop_if(nb, NULL);
//>> BB20
//! /* logical preds: BB15, BB18, / linear preds: BB18, BB19, / kind: uniform, continue, merge, */
//>> BB17
//! /* logical preds: BB12, BB15, / linear preds: BB15, BB16, / kind: uniform, continue, merge, */
//! p_logical_start
//! p_unit_test 4, %_
nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 4);
}
nir_pop_loop(nb, NULL);
//>> BB21
//>> BB18
//! /* logical preds: BB2, / linear preds: BB2, / kind: uniform, top-level, loop-exit, */
//! p_logical_start
@ -1270,7 +1263,7 @@ BEGIN_TEST(isel.cf.empty_exec.loop_continue_then_break)
nir_push_loop(nb);
{
//>> BB1
//! /* logical preds: BB0, BB2, BB20, / linear preds: BB0, BB3, BB20, / kind: loop-header, branch, */
//! /* logical preds: BB0, BB2, BB14, / linear preds: BB0, BB3, BB14, / kind: loop-header, branch, */
//>> p_unit_test 0, %_
nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 0);
@ -1283,30 +1276,30 @@ BEGIN_TEST(isel.cf.empty_exec.loop_continue_then_break)
}
nir_pop_if(nb, NULL);
//>> BB9
//! /* logical preds: BB7, / linear preds: BB7, BB8, / kind: branch, merge, */
//>> BB6
//! /* logical preds: BB1, / linear preds: BB4, BB5, / kind: branch, merge, */
//>> p_unit_test 2, %_
nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 2);
//>> s2: %_ = p_unit_test 3
//>> BB10
//! /* logical preds: BB9, / linear preds: BB9, / kind: break, */
//>> BB7
//! /* logical preds: BB6, / linear preds: BB6, / kind: break, */
nir_break_if(nb, nir_unit_test_divergent_input(nb, 1, 1, .base = 3));
//>> BB17
//! /* logical preds: BB15, / linear preds: BB15, BB16, / kind: uniform, merge, */
//>> BB11
//! /* logical preds: BB6, / linear preds: BB9, BB10, / kind: uniform, merge, */
//>> p_cbranch_z %0:exec rarely_taken
//>> BB18
//! /* logical preds: BB17, / linear preds: BB17, / kind: uniform, */
//>> BB12
//! /* logical preds: BB11, / linear preds: BB11, / kind: uniform, */
//>> p_unit_test 4, %_
nir_unit_test_output(nb, nir_undef(nb, 1, 32), .base = 4);
//>> BB20
//! /* logical preds: BB18, / linear preds: BB18, BB19, / kind: uniform, continue, */
//>> BB14
//! /* logical preds: BB12, / linear preds: BB12, BB13, / kind: uniform, continue, */
}
nir_pop_loop(nb, NULL);
//>> BB21
//! /* logical preds: BB10, / linear preds: BB11, / kind: uniform, top-level, loop-exit, */
//>> BB15
//! /* logical preds: BB7, / linear preds: BB8, / kind: uniform, top-level, loop-exit, */
//! p_logical_start
//! p_unit_test 5, %_