nir: remove is_only_uniform_src() restriction

Loop analysis seems to have assumed we needed a const here to be
a useful loop, however this isn't true so drop the restriction.

This allows the optimisation from 6ca81adffc to become more powerful.

Shader-db results radeonsi:

TOTALS FROM AFFECTED SHADERS (19/168079)
  SGPRS: 904.00 -> 848.00 (-6.19 %)
  VGPRS: 712.00 -> 684.00 (-3.93 %)
  Spilled SGPRs: 0.00 -> 0.00 (0.00 %)
  Spilled VGPRs: 0.00 -> 0.00 (0.00 %)
  Private memory VGPRs: 0.00 -> 0.00 (0.00 %)
  Scratch size: 0.00 -> 0.00 (0.00 %) dwords per thread
  Code Size: 80340.00 -> 92980.00 (15.73 %) bytes
  Max Waves: 236.00 -> 238.00 (0.85 %)
  Outputs: 0.00 -> 0.00 (0.00 %)
  Patch Outputs: 0.00 -> 0.00 (0.00 %)

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32473>
This commit is contained in:
Timothy Arceri 2024-12-03 17:05:58 +11:00 committed by Marge Bot
parent 6b26cc2df3
commit 82b474c3fb
2 changed files with 61 additions and 41 deletions

View file

@ -292,8 +292,7 @@ compute_induction_information(loop_info_state *state)
}
}
if (var.update_src && var.init_src &&
is_only_uniform_src(var.init_src)) {
if (var.update_src && var.init_src) {
/* Insert induction variable into hash table. */
struct hash_table *vars = state->loop->info->induction_vars;
nir_loop_induction_variable *induction_var = ralloc(vars, nir_loop_induction_variable);
@ -1246,8 +1245,11 @@ find_trip_count(loop_info_state *state, unsigned execution_mode,
* Try to find one.
*/
if ((!nir_scalar_is_const(initial_s) && !can_find_max_trip_count) ||
!nir_scalar_is_const(alu_s))
!nir_scalar_is_const(alu_s)) {
trip_count_known = false;
terminator->exact_trip_count_unknown = true;
continue;
}
nir_const_value initial_val;
if (nir_scalar_is_const(initial_s))

View file

@ -62,6 +62,7 @@ struct loop_builder_param {
nir_def *,
nir_def *);
bool use_unknown_init_value;
bool use_uniform_unknown_init_value;
bool invert_exit_condition_and_continue_branch;
};
@ -80,9 +81,14 @@ loop_builder(nir_builder *b, loop_builder_param p)
*/
nir_def *ssa_0;
if (p.use_unknown_init_value) {
nir_def *one = nir_imm_int(b, 1);
nir_def *twelve = nir_imm_int(b, 12);
ssa_0 = nir_load_ubo(b, 1, 32, one, twelve, (gl_access_qualifier)0, 0, 0, 0, 16);
if (p.use_uniform_unknown_init_value) {
nir_def *one = nir_imm_int(b, 1);
nir_def *twelve = nir_imm_int(b, 12);
ssa_0 = nir_load_ubo(b, 1, 32, one, twelve, (gl_access_qualifier)0, 0, 0, 0, 16);
} else {
nir_def *zero = nir_imm_int(b, 0);
ssa_0 = nir_load_input(b, 1, 32, zero);
}
} else
ssa_0 = nir_imm_int(b, p.init_value);
@ -364,39 +370,40 @@ INOT_COMPARE(ilt_imin_rev)
} \
}
#define INEXACT_COUNT_TEST_UNKNOWN_INIT(_cond_value, _incr_value, cond, incr, count, invert) \
TEST_F(nir_loop_analyze_test, incr##_##cond##_inexact_count_##count##_invert_##invert) \
{ \
nir_loop *loop = \
loop_builder(&b, { .init_value = 0, \
.cond_value = _cond_value, \
.incr_value = _incr_value, \
.cond_instr = nir_##cond, \
.incr_instr = nir_##incr, \
.use_unknown_init_value = true, \
.invert_exit_condition_and_continue_branch = invert }); \
\
nir_validate_shader(b.shader, "input"); \
\
nir_loop_analyze_impl(b.impl, nir_var_all, false); \
\
ASSERT_NE((void *)0, loop->info); \
EXPECT_NE((void *)0, loop->info->limiting_terminator); \
EXPECT_EQ(count, loop->info->max_trip_count); \
EXPECT_FALSE(loop->info->exact_trip_count_known); \
\
ASSERT_NE((void *)0, loop->info->induction_vars); \
EXPECT_EQ(2, _mesa_hash_table_num_entries(loop->info->induction_vars)); \
\
hash_table_foreach(loop->info->induction_vars, entry) { \
nir_loop_induction_variable *ivar = (nir_loop_induction_variable *)entry->data; \
EXPECT_NE((void *)0, ivar->basis); \
EXPECT_NE((void *)0, ivar->def); \
ASSERT_NE((void *)0, ivar->init_src); \
EXPECT_FALSE(nir_src_is_const(*ivar->init_src)); \
ASSERT_NE((void *)0, ivar->update_src); \
EXPECT_TRUE(nir_src_is_const(ivar->update_src->src)); \
} \
#define INEXACT_COUNT_TEST_UNKNOWN_INIT(_cond_value, _incr_value, cond, incr, count, invert, uni_init) \
TEST_F(nir_loop_analyze_test, incr##_##cond##_inexact_count_##count##_invert_##invert##_uniform_init_##uni_init)\
{ \
nir_loop *loop = \
loop_builder(&b, { .init_value = 0, \
.cond_value = _cond_value, \
.incr_value = _incr_value, \
.cond_instr = nir_##cond, \
.incr_instr = nir_##incr, \
.use_unknown_init_value = true, \
.use_uniform_unknown_init_value = uni_init, \
.invert_exit_condition_and_continue_branch = invert }); \
\
nir_validate_shader(b.shader, "input"); \
\
nir_loop_analyze_impl(b.impl, nir_var_all, false); \
\
ASSERT_NE((void *)0, loop->info); \
EXPECT_NE((void *)0, loop->info->limiting_terminator); \
EXPECT_EQ(count, loop->info->max_trip_count); \
EXPECT_FALSE(loop->info->exact_trip_count_known); \
\
ASSERT_NE((void *)0, loop->info->induction_vars); \
EXPECT_EQ(2, _mesa_hash_table_num_entries(loop->info->induction_vars)); \
\
hash_table_foreach(loop->info->induction_vars, entry) { \
nir_loop_induction_variable *ivar = (nir_loop_induction_variable *)entry->data; \
EXPECT_NE((void *)0, ivar->basis); \
EXPECT_NE((void *)0, ivar->def); \
ASSERT_NE((void *)0, ivar->init_src); \
EXPECT_FALSE(nir_src_is_const(*ivar->init_src)); \
ASSERT_NE((void *)0, ivar->update_src); \
EXPECT_TRUE(nir_src_is_const(ivar->update_src->src)); \
} \
}
#define INEXACT_COUNT_TEST(_init_value, _cond_value, _incr_value, cond, incr, count) \
@ -1704,7 +1711,7 @@ INEXACT_COUNT_TEST(0x00000001, 0x00000100, 0x00000001, uge_umin, ishl, 8)
* i += 6;
* }
*/
INEXACT_COUNT_TEST_UNKNOWN_INIT(0x00000004, 0x00000006, uge, iadd, 1, 0)
INEXACT_COUNT_TEST_UNKNOWN_INIT(0x00000004, 0x00000006, uge, iadd, 1, 0, 1)
/* uniform uint x;
* uint i = x;
@ -1717,4 +1724,15 @@ INEXACT_COUNT_TEST_UNKNOWN_INIT(0x00000004, 0x00000006, uge, iadd, 1, 0)
* i += 6;
* }
*/
INEXACT_COUNT_TEST_UNKNOWN_INIT(0x00000004, 0x00000006, uge, iadd, 1, 1)
INEXACT_COUNT_TEST_UNKNOWN_INIT(0x00000004, 0x00000006, uge, iadd, 1, 1, 1)
/* in uint x;
* uint i = x;
* while (true) {
* if (i >= 4)
* break;
*
* i += 6;
* }
*/
INEXACT_COUNT_TEST_UNKNOWN_INIT(0x00000004, 0x00000006, uge, iadd, 1, 0, 0)