From 82b474c3fb458be7068f3ec2ffb171b8dff07552 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Tue, 3 Dec 2024 17:05:58 +1100 Subject: [PATCH] nir: remove is_only_uniform_src() restriction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Loop analysis seems to have assumed we needed a const here to be a useful loop, however this isn't true so drop the restriction. This allows the optimisation from 6ca81adffc4a to become more powerful. Shader-db results radeonsi: TOTALS FROM AFFECTED SHADERS (19/168079) SGPRS: 904.00 -> 848.00 (-6.19 %) VGPRS: 712.00 -> 684.00 (-3.93 %) Spilled SGPRs: 0.00 -> 0.00 (0.00 %) Spilled VGPRs: 0.00 -> 0.00 (0.00 %) Private memory VGPRs: 0.00 -> 0.00 (0.00 %) Scratch size: 0.00 -> 0.00 (0.00 %) dwords per thread Code Size: 80340.00 -> 92980.00 (15.73 %) bytes Max Waves: 236.00 -> 238.00 (0.85 %) Outputs: 0.00 -> 0.00 (0.00 %) Patch Outputs: 0.00 -> 0.00 (0.00 %) Reviewed-by: Daniel Schürmann Part-of: --- src/compiler/nir/nir_loop_analyze.c | 8 +- src/compiler/nir/tests/loop_analyze_tests.cpp | 94 +++++++++++-------- 2 files changed, 61 insertions(+), 41 deletions(-) diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c index 099c55bffb5..49d35171dc8 100644 --- a/src/compiler/nir/nir_loop_analyze.c +++ b/src/compiler/nir/nir_loop_analyze.c @@ -292,8 +292,7 @@ compute_induction_information(loop_info_state *state) } } - if (var.update_src && var.init_src && - is_only_uniform_src(var.init_src)) { + if (var.update_src && var.init_src) { /* Insert induction variable into hash table. */ struct hash_table *vars = state->loop->info->induction_vars; nir_loop_induction_variable *induction_var = ralloc(vars, nir_loop_induction_variable); @@ -1246,8 +1245,11 @@ find_trip_count(loop_info_state *state, unsigned execution_mode, * Try to find one. */ if ((!nir_scalar_is_const(initial_s) && !can_find_max_trip_count) || - !nir_scalar_is_const(alu_s)) + !nir_scalar_is_const(alu_s)) { + trip_count_known = false; + terminator->exact_trip_count_unknown = true; continue; + } nir_const_value initial_val; if (nir_scalar_is_const(initial_s)) diff --git a/src/compiler/nir/tests/loop_analyze_tests.cpp b/src/compiler/nir/tests/loop_analyze_tests.cpp index 25b95d3b5e7..4de9863fe70 100644 --- a/src/compiler/nir/tests/loop_analyze_tests.cpp +++ b/src/compiler/nir/tests/loop_analyze_tests.cpp @@ -62,6 +62,7 @@ struct loop_builder_param { nir_def *, nir_def *); bool use_unknown_init_value; + bool use_uniform_unknown_init_value; bool invert_exit_condition_and_continue_branch; }; @@ -80,9 +81,14 @@ loop_builder(nir_builder *b, loop_builder_param p) */ nir_def *ssa_0; if (p.use_unknown_init_value) { - nir_def *one = nir_imm_int(b, 1); - nir_def *twelve = nir_imm_int(b, 12); - ssa_0 = nir_load_ubo(b, 1, 32, one, twelve, (gl_access_qualifier)0, 0, 0, 0, 16); + if (p.use_uniform_unknown_init_value) { + nir_def *one = nir_imm_int(b, 1); + nir_def *twelve = nir_imm_int(b, 12); + ssa_0 = nir_load_ubo(b, 1, 32, one, twelve, (gl_access_qualifier)0, 0, 0, 0, 16); + } else { + nir_def *zero = nir_imm_int(b, 0); + ssa_0 = nir_load_input(b, 1, 32, zero); + } } else ssa_0 = nir_imm_int(b, p.init_value); @@ -364,39 +370,40 @@ INOT_COMPARE(ilt_imin_rev) } \ } -#define INEXACT_COUNT_TEST_UNKNOWN_INIT(_cond_value, _incr_value, cond, incr, count, invert) \ - TEST_F(nir_loop_analyze_test, incr##_##cond##_inexact_count_##count##_invert_##invert) \ - { \ - nir_loop *loop = \ - loop_builder(&b, { .init_value = 0, \ - .cond_value = _cond_value, \ - .incr_value = _incr_value, \ - .cond_instr = nir_##cond, \ - .incr_instr = nir_##incr, \ - .use_unknown_init_value = true, \ - .invert_exit_condition_and_continue_branch = invert }); \ - \ - nir_validate_shader(b.shader, "input"); \ - \ - nir_loop_analyze_impl(b.impl, nir_var_all, false); \ - \ - ASSERT_NE((void *)0, loop->info); \ - EXPECT_NE((void *)0, loop->info->limiting_terminator); \ - EXPECT_EQ(count, loop->info->max_trip_count); \ - EXPECT_FALSE(loop->info->exact_trip_count_known); \ - \ - ASSERT_NE((void *)0, loop->info->induction_vars); \ - EXPECT_EQ(2, _mesa_hash_table_num_entries(loop->info->induction_vars)); \ - \ - hash_table_foreach(loop->info->induction_vars, entry) { \ - nir_loop_induction_variable *ivar = (nir_loop_induction_variable *)entry->data; \ - EXPECT_NE((void *)0, ivar->basis); \ - EXPECT_NE((void *)0, ivar->def); \ - ASSERT_NE((void *)0, ivar->init_src); \ - EXPECT_FALSE(nir_src_is_const(*ivar->init_src)); \ - ASSERT_NE((void *)0, ivar->update_src); \ - EXPECT_TRUE(nir_src_is_const(ivar->update_src->src)); \ - } \ +#define INEXACT_COUNT_TEST_UNKNOWN_INIT(_cond_value, _incr_value, cond, incr, count, invert, uni_init) \ + TEST_F(nir_loop_analyze_test, incr##_##cond##_inexact_count_##count##_invert_##invert##_uniform_init_##uni_init)\ + { \ + nir_loop *loop = \ + loop_builder(&b, { .init_value = 0, \ + .cond_value = _cond_value, \ + .incr_value = _incr_value, \ + .cond_instr = nir_##cond, \ + .incr_instr = nir_##incr, \ + .use_unknown_init_value = true, \ + .use_uniform_unknown_init_value = uni_init, \ + .invert_exit_condition_and_continue_branch = invert }); \ + \ + nir_validate_shader(b.shader, "input"); \ + \ + nir_loop_analyze_impl(b.impl, nir_var_all, false); \ + \ + ASSERT_NE((void *)0, loop->info); \ + EXPECT_NE((void *)0, loop->info->limiting_terminator); \ + EXPECT_EQ(count, loop->info->max_trip_count); \ + EXPECT_FALSE(loop->info->exact_trip_count_known); \ + \ + ASSERT_NE((void *)0, loop->info->induction_vars); \ + EXPECT_EQ(2, _mesa_hash_table_num_entries(loop->info->induction_vars)); \ + \ + hash_table_foreach(loop->info->induction_vars, entry) { \ + nir_loop_induction_variable *ivar = (nir_loop_induction_variable *)entry->data; \ + EXPECT_NE((void *)0, ivar->basis); \ + EXPECT_NE((void *)0, ivar->def); \ + ASSERT_NE((void *)0, ivar->init_src); \ + EXPECT_FALSE(nir_src_is_const(*ivar->init_src)); \ + ASSERT_NE((void *)0, ivar->update_src); \ + EXPECT_TRUE(nir_src_is_const(ivar->update_src->src)); \ + } \ } #define INEXACT_COUNT_TEST(_init_value, _cond_value, _incr_value, cond, incr, count) \ @@ -1704,7 +1711,7 @@ INEXACT_COUNT_TEST(0x00000001, 0x00000100, 0x00000001, uge_umin, ishl, 8) * i += 6; * } */ -INEXACT_COUNT_TEST_UNKNOWN_INIT(0x00000004, 0x00000006, uge, iadd, 1, 0) +INEXACT_COUNT_TEST_UNKNOWN_INIT(0x00000004, 0x00000006, uge, iadd, 1, 0, 1) /* uniform uint x; * uint i = x; @@ -1717,4 +1724,15 @@ INEXACT_COUNT_TEST_UNKNOWN_INIT(0x00000004, 0x00000006, uge, iadd, 1, 0) * i += 6; * } */ -INEXACT_COUNT_TEST_UNKNOWN_INIT(0x00000004, 0x00000006, uge, iadd, 1, 1) +INEXACT_COUNT_TEST_UNKNOWN_INIT(0x00000004, 0x00000006, uge, iadd, 1, 1, 1) + +/* in uint x; + * uint i = x; + * while (true) { + * if (i >= 4) + * break; + * + * i += 6; + * } + */ +INEXACT_COUNT_TEST_UNKNOWN_INIT(0x00000004, 0x00000006, uge, iadd, 1, 0, 0)