From 61fa007e4842f8e0e7c777d4fdb23b3c7118d48f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Fri, 24 Jan 2025 10:05:34 +0100 Subject: [PATCH] aco/isel: fix empty exec tracking for uniform branches Totals from 5 (0.01% of 79395) affected shaders: (Navi31) Instrs: 54730 -> 54715 (-0.03%) CodeSize: 276928 -> 276852 (-0.03%) Latency: 215212 -> 214874 (-0.16%) InvThroughput: 40154 -> 40150 (-0.01%) Copies: 6824 -> 6821 (-0.04%); split: -0.06%, +0.01% Branches: 1625 -> 1615 (-0.62%) SALU: 5682 -> 5678 (-0.07%) Part-of: --- .../compiler/aco_instruction_selection.cpp | 12 ++-- src/amd/compiler/tests/test_isel.cpp | 55 +++++++++++++++++++ 2 files changed, 60 insertions(+), 7 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 285c3f0c5e6..120675adea3 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -10186,12 +10186,11 @@ begin_uniform_if_then(isel_context* ctx, if_context* ic, Temp cond) ic->BB_if_idx = ctx->block->index; ic->BB_endif = Block(); ic->BB_endif.kind |= ctx->block->kind & block_kind_top_level; - - ctx->cf_info.has_branch = false; - ctx->cf_info.parent_loop.has_divergent_branch = false; + assert(!ctx->cf_info.has_branch && !ctx->cf_info.parent_loop.has_divergent_branch); ic->had_divergent_discard_old = ctx->cf_info.had_divergent_discard; ic->has_divergent_continue_old = ctx->cf_info.parent_loop.has_divergent_continue; + ic->exec_old = ctx->cf_info.exec; /** emit then block */ if (ic->cond.id()) @@ -10228,6 +10227,8 @@ begin_uniform_if_else(isel_context* ctx, if_context* ic, bool logical_else) ic->has_divergent_continue_then = ctx->cf_info.parent_loop.has_divergent_continue; ctx->cf_info.parent_loop.has_divergent_continue = ic->has_divergent_continue_old; + std::swap(ctx->cf_info.exec, ic->exec_old); + /** emit else block */ Block* BB_else = ctx->program->create_and_insert_block(); if (logical_else) { @@ -10261,6 +10262,7 @@ end_uniform_if(isel_context* ctx, if_context* ic, bool logical_else) ctx->cf_info.parent_loop.has_divergent_branch = false; ctx->cf_info.had_divergent_discard |= ic->had_divergent_discard_then; ctx->cf_info.parent_loop.has_divergent_continue |= ic->has_divergent_continue_then; + ctx->cf_info.exec.combine(ic->exec_old); /** emit endif merge block */ if (ic->cond.id()) @@ -10279,8 +10281,6 @@ end_empty_exec_skip(isel_context* ctx) begin_uniform_if_else(ctx, &ctx->cf_info.empty_exec_skip, false); end_uniform_if(ctx, &ctx->cf_info.empty_exec_skip, false); ctx->cf_info.skipping_empty_exec = false; - - ctx->cf_info.exec.combine(ctx->cf_info.empty_exec_skip.exec_old); } } @@ -10333,8 +10333,6 @@ begin_empty_exec_skip(isel_context* ctx, nir_instr* after_instr, nir_block* bloc begin_uniform_if_then(ctx, &ctx->cf_info.empty_exec_skip, Temp()); ctx->cf_info.skipping_empty_exec = true; - - ctx->cf_info.empty_exec_skip.exec_old = ctx->cf_info.exec; ctx->cf_info.exec = exec_info(); ctx->program->should_repair_ssa = true; diff --git a/src/amd/compiler/tests/test_isel.cpp b/src/amd/compiler/tests/test_isel.cpp index c6514a01fee..7322f253df8 100644 --- a/src/amd/compiler/tests/test_isel.cpp +++ b/src/amd/compiler/tests/test_isel.cpp @@ -1185,6 +1185,61 @@ BEGIN_TEST(isel.cf.empty_exec.uniform_if) finish_isel_test(); END_TEST +/* + * if (divergent) { + * if (uniform) { + * terminate_if + * // exec potentially empty + * } else { + * } + * // exec potentially empty + * } + */ +BEGIN_TEST(isel.cf.empty_exec.nested_uniform_if) + if (!setup_nir_cs(GFX11)) + return; + + //>> BB0 + //>> s2: %_ = p_unit_test 0 + //>> p_cbranch_z %_ + nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 0)); + { + //>> BB1 + //>> s2: %_ = p_unit_test 1 + //>> p_cbranch_z %_:scc + nir_push_if(nb, nir_unit_test_uniform_amd(nb, 1, 1, .base = 1)); + { + //>> BB2 + //>> s2: %_ = p_unit_test 2 + //>> p_discard_if %_ + nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 2)); + + //>> p_cbranch_z %0:exec rarely_taken + //>> p_unit_test 3, %_ + nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 3); + } + nir_push_else(nb, NULL); + { + //>> BB6 + //>> p_unit_test 4, %_ + nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 4); + } + nir_pop_if(nb, NULL); + + //>> BB7 + //>> p_cbranch_z %0:exec rarely_taken + //>> p_unit_test 5, %_ + nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 5); + } + nir_pop_if(nb, NULL); + //>> BB15 + //! /* logical preds: BB10, BB13, / linear preds: BB13, BB14, / kind: uniform, top-level, merge, */ + //>> p_unit_test 6, %_ + nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 6); + + finish_isel_test(); +END_TEST + /* * if (divergent) { * terminate_if