mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-04 11:20:20 +01:00
aco: skip code if exec is empty
This is safer and potentially faster. fossil-db (navi21): Totals from 690 (0.87% of 79395) affected shaders: Instrs: 4534778 -> 4535916 (+0.03%) CodeSize: 25268516 -> 25272080 (+0.01%); split: -0.00%, +0.01% Latency: 48482721 -> 48513907 (+0.06%); split: -0.00%, +0.07% InvThroughput: 13213965 -> 13217828 (+0.03%); split: -0.00%, +0.03% Copies: 432307 -> 432295 (-0.00%); split: -0.05%, +0.04% Branches: 187305 -> 188249 (+0.50%) VALU: 2904490 -> 2904508 (+0.00%); split: -0.00%, +0.00% SALU: 674962 -> 675133 (+0.03%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31143>
This commit is contained in:
parent
f00c3a14c0
commit
f35e229fae
4 changed files with 165 additions and 51 deletions
|
|
@ -56,6 +56,7 @@ This repairs SSA in the case of mismatches between the logical and linear CFG, w
|
|||
|
||||
Instruction selection might create mismatches between the logical CFG (the input NIR's CFG) and the linear CFG in the following situations:
|
||||
- We add a break at the end of a loop in case it has no active invocations (an empty exec can prevent any logical breaks from being taken). This creates a linear edge but no logical edge, and SGPR uses outside the loop can require a phi.
|
||||
- We add an empty exec skip over a block. This is a branch which skips most contents of a sequence of instructions if exec is empty. To avoid critical edges, the inside of the construct logically dominates the merge but not linearly.
|
||||
- An SGPR is defined in one side of a divergent IF but it used in or after the merge block. If the other side of the IF ends in a branch, a phi is not necessary according to the logical CFG, but it is for the linear CFG. However, `sanitize_cf_list()` should already resolve this before translation from NIR for additional reasons.
|
||||
|
||||
#### Lower Phis
|
||||
|
|
|
|||
|
|
@ -50,22 +50,6 @@ _isel_err(isel_context* ctx, const char* file, unsigned line, const nir_instr* i
|
|||
free(out);
|
||||
}
|
||||
|
||||
struct if_context {
|
||||
Temp cond;
|
||||
|
||||
bool divergent_old;
|
||||
bool had_divergent_discard_old;
|
||||
bool had_divergent_discard_then;
|
||||
bool has_divergent_continue_old;
|
||||
bool has_divergent_continue_then;
|
||||
struct exec_info exec_old;
|
||||
|
||||
unsigned BB_if_idx;
|
||||
unsigned invert_idx;
|
||||
Block BB_invert;
|
||||
Block BB_endif;
|
||||
};
|
||||
|
||||
struct loop_context {
|
||||
Block loop_exit;
|
||||
|
||||
|
|
@ -8115,6 +8099,10 @@ visit_cmat_muladd(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
emit_split_vector(ctx, dst, instr->def.num_components);
|
||||
}
|
||||
|
||||
static void begin_empty_exec_skip(isel_context* ctx, nir_instr* instr, nir_block* block);
|
||||
|
||||
static void end_empty_exec_skip(isel_context* ctx);
|
||||
|
||||
void
|
||||
visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
{
|
||||
|
|
@ -8995,11 +8983,13 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
}
|
||||
|
||||
bld.pseudo(aco_opcode::p_discard_if, cond);
|
||||
|
||||
if (ctx->block->loop_nest_depth || ctx->cf_info.parent_if.is_divergent)
|
||||
ctx->cf_info.exec.potentially_empty_discard = true;
|
||||
ctx->cf_info.had_divergent_discard |= in_exec_divergent_or_in_loop(ctx);
|
||||
ctx->block->kind |= block_kind_uses_discard;
|
||||
|
||||
if (ctx->block->loop_nest_depth || ctx->cf_info.parent_if.is_divergent) {
|
||||
ctx->cf_info.exec.potentially_empty_discard = true;
|
||||
begin_empty_exec_skip(ctx, &instr->instr, instr->instr.block);
|
||||
}
|
||||
ctx->cf_info.had_divergent_discard |= in_exec_divergent_or_in_loop(ctx);
|
||||
ctx->program->needs_exact = true;
|
||||
break;
|
||||
}
|
||||
|
|
@ -10128,7 +10118,6 @@ visit_undef(isel_context* ctx, nir_undef_instr* instr)
|
|||
void
|
||||
begin_loop(isel_context* ctx, loop_context* lc)
|
||||
{
|
||||
// TODO: we might want to wrap the loop around a branch if exec.potentially_empty=true
|
||||
append_logical_end(ctx->block);
|
||||
ctx->block->kind |= block_kind_loop_preheader | block_kind_uniform;
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
|
|
@ -10338,6 +10327,8 @@ emit_loop_continue(isel_context* ctx)
|
|||
void
|
||||
visit_jump(isel_context* ctx, nir_jump_instr* instr)
|
||||
{
|
||||
end_empty_exec_skip(ctx);
|
||||
|
||||
switch (instr->type) {
|
||||
case nir_jump_break: emit_loop_break(ctx); break;
|
||||
case nir_jump_continue: emit_loop_continue(ctx); break;
|
||||
|
|
@ -10380,6 +10371,12 @@ visit_block(isel_context* ctx, nir_block* block)
|
|||
ctx->unended_linear_vgprs.clear();
|
||||
}
|
||||
|
||||
nir_foreach_phi (instr, block)
|
||||
visit_phi(ctx, instr);
|
||||
|
||||
nir_phi_instr* last_phi = nir_block_last_phi_instr(block);
|
||||
begin_empty_exec_skip(ctx, last_phi ? &last_phi->instr : NULL, block);
|
||||
|
||||
ctx->block->instructions.reserve(ctx->block->instructions.size() +
|
||||
exec_list_length(&block->instr_list) * 2);
|
||||
nir_foreach_instr (instr, block) {
|
||||
|
|
@ -10388,7 +10385,7 @@ visit_block(isel_context* ctx, nir_block* block)
|
|||
case nir_instr_type_load_const: visit_load_const(ctx, nir_instr_as_load_const(instr)); break;
|
||||
case nir_instr_type_intrinsic: visit_intrinsic(ctx, nir_instr_as_intrinsic(instr)); break;
|
||||
case nir_instr_type_tex: visit_tex(ctx, nir_instr_as_tex(instr)); break;
|
||||
case nir_instr_type_phi: visit_phi(ctx, nir_instr_as_phi(instr)); break;
|
||||
case nir_instr_type_phi: break;
|
||||
case nir_instr_type_undef: visit_undef(ctx, nir_instr_as_undef(instr)); break;
|
||||
case nir_instr_type_deref: break;
|
||||
case nir_instr_type_jump: visit_jump(ctx, nir_instr_as_jump(instr)); break;
|
||||
|
|
@ -10399,8 +10396,8 @@ visit_block(isel_context* ctx, nir_block* block)
|
|||
}
|
||||
|
||||
static void begin_uniform_if_then(isel_context* ctx, if_context* ic, Temp cond);
|
||||
static void begin_uniform_if_else(isel_context* ctx, if_context* ic);
|
||||
static void end_uniform_if(isel_context* ctx, if_context* ic);
|
||||
static void begin_uniform_if_else(isel_context* ctx, if_context* ic, bool logical_else = true);
|
||||
static void end_uniform_if(isel_context* ctx, if_context* ic, bool logical_else = true);
|
||||
|
||||
static void
|
||||
visit_loop(isel_context* ctx, nir_loop* loop)
|
||||
|
|
@ -10418,8 +10415,6 @@ static void
|
|||
begin_divergent_if_then(isel_context* ctx, if_context* ic, Temp cond,
|
||||
nir_selection_control sel_ctrl = nir_selection_control_none)
|
||||
{
|
||||
ic->cond = cond;
|
||||
|
||||
append_logical_end(ctx->block);
|
||||
ctx->block->kind |= block_kind_branch;
|
||||
|
||||
|
|
@ -10568,7 +10563,9 @@ end_divergent_if(isel_context* ctx, if_context* ic)
|
|||
static void
|
||||
begin_uniform_if_then(isel_context* ctx, if_context* ic, Temp cond)
|
||||
{
|
||||
assert(cond.regClass() == s1);
|
||||
assert(!cond.id() || cond.regClass() == s1);
|
||||
|
||||
ic->cond = cond;
|
||||
|
||||
append_logical_end(ctx->block);
|
||||
ctx->block->kind |= block_kind_uniform;
|
||||
|
|
@ -10577,8 +10574,13 @@ begin_uniform_if_then(isel_context* ctx, if_context* ic, Temp cond)
|
|||
aco_opcode branch_opcode = aco_opcode::p_cbranch_z;
|
||||
branch.reset(create_instruction(branch_opcode, Format::PSEUDO_BRANCH, 1, 1));
|
||||
branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
|
||||
branch->operands[0] = Operand(cond);
|
||||
branch->operands[0].setPrecolored(scc);
|
||||
if (cond.id()) {
|
||||
branch->operands[0] = Operand(cond);
|
||||
branch->operands[0].setPrecolored(scc);
|
||||
} else {
|
||||
branch->operands[0] = Operand(exec, ctx->program->lane_mask);
|
||||
branch->branch().rarely_taken = true;
|
||||
}
|
||||
ctx->block->instructions.emplace_back(std::move(branch));
|
||||
|
||||
ic->BB_if_idx = ctx->block->index;
|
||||
|
|
@ -10592,7 +10594,8 @@ begin_uniform_if_then(isel_context* ctx, if_context* ic, Temp cond)
|
|||
ic->has_divergent_continue_old = ctx->cf_info.parent_loop.has_divergent_continue;
|
||||
|
||||
/** emit then block */
|
||||
ctx->program->next_uniform_if_depth++;
|
||||
if (ic->cond.id())
|
||||
ctx->program->next_uniform_if_depth++;
|
||||
Block* BB_then = ctx->program->create_and_insert_block();
|
||||
add_edge(ic->BB_if_idx, BB_then);
|
||||
append_logical_start(BB_then);
|
||||
|
|
@ -10600,7 +10603,7 @@ begin_uniform_if_then(isel_context* ctx, if_context* ic, Temp cond)
|
|||
}
|
||||
|
||||
static void
|
||||
begin_uniform_if_else(isel_context* ctx, if_context* ic)
|
||||
begin_uniform_if_else(isel_context* ctx, if_context* ic, bool logical_else)
|
||||
{
|
||||
Block* BB_then = ctx->block;
|
||||
|
||||
|
|
@ -10628,25 +10631,30 @@ begin_uniform_if_else(isel_context* ctx, if_context* ic)
|
|||
|
||||
/** emit else block */
|
||||
Block* BB_else = ctx->program->create_and_insert_block();
|
||||
add_edge(ic->BB_if_idx, BB_else);
|
||||
append_logical_start(BB_else);
|
||||
if (logical_else) {
|
||||
add_edge(ic->BB_if_idx, BB_else);
|
||||
append_logical_start(BB_else);
|
||||
} else {
|
||||
add_linear_edge(ic->BB_if_idx, BB_else);
|
||||
}
|
||||
ctx->block = BB_else;
|
||||
}
|
||||
|
||||
static void
|
||||
end_uniform_if(isel_context* ctx, if_context* ic)
|
||||
end_uniform_if(isel_context* ctx, if_context* ic, bool logical_else)
|
||||
{
|
||||
Block* BB_else = ctx->block;
|
||||
|
||||
if (!ctx->cf_info.has_branch) {
|
||||
append_logical_end(BB_else);
|
||||
if (logical_else)
|
||||
append_logical_end(BB_else);
|
||||
/* branch from then block to endif block */
|
||||
aco_ptr<Instruction> branch;
|
||||
branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
|
||||
branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
|
||||
BB_else->instructions.emplace_back(std::move(branch));
|
||||
add_linear_edge(BB_else->index, &ic->BB_endif);
|
||||
if (!ctx->cf_info.parent_loop.has_divergent_branch)
|
||||
if (logical_else && !ctx->cf_info.parent_loop.has_divergent_branch)
|
||||
add_logical_edge(BB_else->index, &ic->BB_endif);
|
||||
BB_else->kind |= block_kind_uniform;
|
||||
}
|
||||
|
|
@ -10657,7 +10665,8 @@ end_uniform_if(isel_context* ctx, if_context* ic)
|
|||
ctx->cf_info.parent_loop.has_divergent_continue |= ic->has_divergent_continue_then;
|
||||
|
||||
/** emit endif merge block */
|
||||
ctx->program->next_uniform_if_depth--;
|
||||
if (ic->cond.id())
|
||||
ctx->program->next_uniform_if_depth--;
|
||||
ctx->block = ctx->program->insert_block(std::move(ic->BB_endif));
|
||||
append_logical_start(ctx->block);
|
||||
|
||||
|
|
@ -10665,6 +10674,74 @@ end_uniform_if(isel_context* ctx, if_context* ic)
|
|||
assert(!ctx->block->logical_preds.empty());
|
||||
}
|
||||
|
||||
static void
|
||||
end_empty_exec_skip(isel_context* ctx)
|
||||
{
|
||||
if (ctx->cf_info.skipping_empty_exec) {
|
||||
begin_uniform_if_else(ctx, &ctx->cf_info.empty_exec_skip, false);
|
||||
end_uniform_if(ctx, &ctx->cf_info.empty_exec_skip, false);
|
||||
ctx->cf_info.skipping_empty_exec = false;
|
||||
|
||||
ctx->cf_info.exec.combine(ctx->cf_info.empty_exec_skip.exec_old);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If necessary, begin a branch which skips over instructions if exec is empty.
|
||||
*
|
||||
* The linear CFG:
|
||||
* BB_IF
|
||||
* / \
|
||||
* BB_THEN (logical) BB_ELSE (linear)
|
||||
* \ /
|
||||
* BB_ENDIF
|
||||
*
|
||||
* The logical CFG:
|
||||
* BB_IF
|
||||
* |
|
||||
* BB_THEN (logical)
|
||||
* |
|
||||
* BB_ENDIF
|
||||
*
|
||||
* BB_THEN should not end with a branch, since that would make BB_ENDIF unreachable.
|
||||
*/
|
||||
static void
|
||||
begin_empty_exec_skip(isel_context* ctx, nir_instr* after_instr, nir_block* block)
|
||||
{
|
||||
if (!ctx->cf_info.exec.potentially_empty_discard && !ctx->cf_info.exec.potentially_empty_break &&
|
||||
!ctx->cf_info.exec.potentially_empty_continue)
|
||||
return;
|
||||
|
||||
assert(!(ctx->block->kind & block_kind_top_level));
|
||||
|
||||
bool further_cf_empty = !nir_cf_node_next(&block->cf_node);
|
||||
|
||||
bool rest_of_block_empty = false;
|
||||
if (after_instr) {
|
||||
rest_of_block_empty =
|
||||
nir_instr_is_last(after_instr) || nir_instr_next(after_instr)->type == nir_instr_type_jump;
|
||||
} else {
|
||||
rest_of_block_empty = exec_list_is_empty(&block->instr_list) ||
|
||||
nir_block_first_instr(block)->type == nir_instr_type_jump;
|
||||
}
|
||||
|
||||
assert(!(ctx->block->kind & block_kind_export_end) || rest_of_block_empty);
|
||||
|
||||
if (rest_of_block_empty && further_cf_empty)
|
||||
return;
|
||||
|
||||
/* Don't nest these skipping branches. It is not worth the complexity. */
|
||||
end_empty_exec_skip(ctx);
|
||||
|
||||
begin_uniform_if_then(ctx, &ctx->cf_info.empty_exec_skip, Temp());
|
||||
ctx->cf_info.skipping_empty_exec = true;
|
||||
|
||||
ctx->cf_info.empty_exec_skip.exec_old = ctx->cf_info.exec;
|
||||
ctx->cf_info.exec = exec_info();
|
||||
|
||||
ctx->program->should_repair_ssa = true;
|
||||
}
|
||||
|
||||
static void
|
||||
visit_if(isel_context* ctx, nir_if* if_stmt)
|
||||
{
|
||||
|
|
@ -10739,6 +10816,13 @@ visit_if(isel_context* ctx, nir_if* if_stmt)
|
|||
static void
|
||||
visit_cf_list(isel_context* ctx, struct exec_list* list)
|
||||
{
|
||||
if (nir_cf_list_is_empty_block(list))
|
||||
return;
|
||||
|
||||
bool skipping_empty_exec_old = ctx->cf_info.skipping_empty_exec;
|
||||
if_context empty_exec_skip_old = std::move(ctx->cf_info.empty_exec_skip);
|
||||
ctx->cf_info.skipping_empty_exec = false;
|
||||
|
||||
foreach_list_typed (nir_cf_node, node, node, list) {
|
||||
switch (node->type) {
|
||||
case nir_cf_node_block: visit_block(ctx, nir_cf_node_as_block(node)); break;
|
||||
|
|
@ -10747,6 +10831,10 @@ visit_cf_list(isel_context* ctx, struct exec_list* list)
|
|||
default: unreachable("unimplemented cf list type");
|
||||
}
|
||||
}
|
||||
|
||||
end_empty_exec_skip(ctx);
|
||||
ctx->cf_info.skipping_empty_exec = skipping_empty_exec_old;
|
||||
ctx->cf_info.empty_exec_skip = std::move(empty_exec_skip_old);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -60,6 +60,22 @@ struct exec_info {
|
|||
}
|
||||
};
|
||||
|
||||
struct if_context {
|
||||
Temp cond;
|
||||
|
||||
bool divergent_old;
|
||||
bool had_divergent_discard_old;
|
||||
bool had_divergent_discard_then;
|
||||
bool has_divergent_continue_old;
|
||||
bool has_divergent_continue_then;
|
||||
struct exec_info exec_old;
|
||||
|
||||
unsigned BB_if_idx;
|
||||
unsigned invert_idx;
|
||||
Block BB_invert;
|
||||
Block BB_endif;
|
||||
};
|
||||
|
||||
struct isel_context {
|
||||
const struct aco_compiler_options* options;
|
||||
const struct ac_shader_args* args;
|
||||
|
|
@ -85,6 +101,9 @@ struct isel_context {
|
|||
bool had_divergent_discard = false;
|
||||
|
||||
struct exec_info exec;
|
||||
|
||||
bool skipping_empty_exec = false;
|
||||
if_context empty_exec_skip;
|
||||
} cf_info;
|
||||
|
||||
/* NIR range analysis. */
|
||||
|
|
|
|||
|
|
@ -772,7 +772,10 @@ BEGIN_TEST(isel.cf.uniform_if_branch_use)
|
|||
{
|
||||
/* The contents of this branch is moved to the merge block. */
|
||||
//>> BB14
|
||||
//! /* logical preds: BB13, / linear preds: BB12, BB13, / kind: uniform, continue, */
|
||||
//! /* logical preds: BB13, / linear preds: BB12, BB13, / kind: uniform, */
|
||||
//>> s2: %_ = p_cbranch_z %0:exec rarely_taken
|
||||
//! BB15
|
||||
//! /* logical preds: BB14, / linear preds: BB14, / kind: uniform, */
|
||||
//! p_logical_start
|
||||
//! s1: %val = p_unit_test 0
|
||||
val = nir_unit_test_uniform_amd(nb, 1, 32, .base=0);
|
||||
|
|
@ -781,6 +784,9 @@ BEGIN_TEST(isel.cf.uniform_if_branch_use)
|
|||
|
||||
//! p_unit_test 1, %val
|
||||
nir_unit_test_amd(nb, val, .base=1);
|
||||
|
||||
//>> BB17
|
||||
//! /* logical preds: BB15, / linear preds: BB15, BB16, / kind: uniform, continue, */
|
||||
}
|
||||
nir_pop_loop(nb, NULL);
|
||||
|
||||
|
|
@ -944,8 +950,8 @@ END_TEST
|
|||
/**
|
||||
* loop {
|
||||
* use(phi(, a))
|
||||
* terminate_if
|
||||
* loop {
|
||||
* terminate_if
|
||||
* if (uniform) {
|
||||
* a = loop_invariant_sgpr
|
||||
* break
|
||||
|
|
@ -967,7 +973,7 @@ BEGIN_TEST(isel.cf.hidden_break_no_lcssa_header_phi)
|
|||
nir_loop *loop = nir_push_loop(nb);
|
||||
{
|
||||
//>> BB1
|
||||
//! /* logical preds: BB0, BB11, / linear preds: BB0, BB13, / kind: uniform, loop-preheader, loop-header, discard, */
|
||||
//! /* logical preds: BB0, BB17, / linear preds: BB0, BB19, / kind: uniform, loop-preheader, loop-header, */
|
||||
//! s1: %phi = p_linear_phi %init, %val_lcssa
|
||||
//! p_logical_start
|
||||
//! p_unit_test 1, %phi
|
||||
|
|
@ -976,31 +982,31 @@ BEGIN_TEST(isel.cf.hidden_break_no_lcssa_header_phi)
|
|||
nir_phi_instr_add_src(phi, init->parent_instr->block, init);
|
||||
nir_unit_test_amd(nb, &phi->def, .base = 1);
|
||||
|
||||
nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 2));
|
||||
|
||||
//>> BB2
|
||||
//! /* logical preds: BB1, BB5, / linear preds: BB1, BB7, / kind: uniform, loop-header, */
|
||||
//! /* logical preds: BB1, BB8, / linear preds: BB1, BB10, / kind: uniform, loop-header, discard, */
|
||||
nir_push_loop(nb);
|
||||
{
|
||||
nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 2));
|
||||
|
||||
nir_push_if(nb, nir_unit_test_uniform_amd(nb, 1, 1, .base = 3));
|
||||
{
|
||||
//>> BB3
|
||||
//! /* logical preds: BB2, / linear preds: BB2, / kind: uniform, break, */
|
||||
//>> BB4
|
||||
//! /* logical preds: BB3, / linear preds: BB3, / kind: uniform, break, */
|
||||
//! p_logical_start
|
||||
//! s1: %val = p_parallelcopy 0
|
||||
val = nir_imm_zero(nb, 1, 32);
|
||||
nir_jump(nb, nir_jump_break);
|
||||
}
|
||||
nir_pop_if(nb, NULL);
|
||||
//>> BB5
|
||||
//! /* logical preds: BB4, / linear preds: BB4, / kind: uniform, continue_or_break, */
|
||||
//>> BB8
|
||||
//! /* logical preds: BB6, / linear preds: BB6, BB7, / kind: uniform, continue_or_break, */
|
||||
}
|
||||
nir_pop_loop(nb, NULL);
|
||||
//>> BB8
|
||||
//! /* logical preds: BB3, / linear preds: BB3, BB6, / kind: uniform, loop-exit, */
|
||||
//! s1: %val_lcssa = p_linear_phi %val, s1: undef
|
||||
//>> BB11
|
||||
//! /* logical preds: BB10, / linear preds: BB10, / kind: uniform, continue_or_break, */
|
||||
//! /* logical preds: BB4, / linear preds: BB4, BB9, / kind: uniform, loop-exit, */
|
||||
//! s1: %val_lcssa = p_linear_phi %val, s1: undef
|
||||
//>> BB17
|
||||
//! /* logical preds: BB15, / linear preds: BB15, BB16, / kind: uniform, continue_or_break, */
|
||||
|
||||
nir_phi_instr_add_src(phi, nir_cursor_current_block(nb->cursor), val);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue