nir/lower_indirect_derefs: Add a threshold

Instead of always lowering everything, we add a threshold such that if the total indirected array size (AoA size) is above that threshold, it won't lower. It's assumed that the driver will sort things out somehow by, for instance, lowering to scratch. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5909>
2026-05-08 19:58:09 +02:00 · 2020-07-14 13:55:19 -05:00 · 2020-07-14 13:55:19 -05:00 · 38a83a3048
commit 38a83a3048
parent c897cd0278
7 changed files with 30 additions and 17 deletions
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@ -5571,7 +5571,7 @@ ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class)
 	 */
 	indirect_mask |= nir_var_function_temp;

-	progress |= nir_lower_indirect_derefs(nir, indirect_mask);
+	progress |= nir_lower_indirect_derefs(nir, indirect_mask, UINT32_MAX);
 	return progress;
 }

--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@ -4076,7 +4076,8 @@ typedef enum {
 bool nir_lower_array_deref_of_vec(nir_shader *shader, nir_variable_mode modes,
                                  nir_lower_array_deref_of_vec_options options);

-bool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes);
+bool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes,
+                               uint32_t max_lower_array_len);

 bool nir_lower_locals_to_regs(nir_shader *shader);

--- a/src/compiler/nir/nir_lower_indirect_derefs.c
+++ b/src/compiler/nir/nir_lower_indirect_derefs.c
@ -113,7 +113,8 @@ emit_load_store_deref(nir_builder *b, nir_intrinsic_instr *orig_instr,

 static bool
 lower_indirect_derefs_block(nir_block *block, nir_builder *b,
-                            nir_variable_mode modes)
+                            nir_variable_mode modes,
+                            uint32_t max_lower_array_len)
 {
   bool progress = false;

@ -133,17 +134,21 @@ lower_indirect_derefs_block(nir_block *block, nir_builder *b,
      nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);

      /* Walk the deref chain back to the base and look for indirects */
+      uint32_t indirect_array_len = 1;
      bool has_indirect = false;
      nir_deref_instr *base = deref;
      while (base && base->deref_type != nir_deref_type_var) {
+         nir_deref_instr *parent = nir_deref_instr_parent(base);
         if (base->deref_type == nir_deref_type_array &&
-             !nir_src_is_const(base->arr.index))
+             !nir_src_is_const(base->arr.index)) {
+            indirect_array_len *= glsl_get_length(parent->type);
            has_indirect = true;
+         }

-         base = nir_deref_instr_parent(base);
+         base = parent;
      }

-      if (!has_indirect || !base)
+      if (!has_indirect || !base || indirect_array_len > max_lower_array_len)
         continue;

      /* Only lower variables whose mode is in the mask, or compact
@ -179,14 +184,16 @@ lower_indirect_derefs_block(nir_block *block, nir_builder *b,
 }

 static bool
-lower_indirects_impl(nir_function_impl *impl, nir_variable_mode modes)
+lower_indirects_impl(nir_function_impl *impl, nir_variable_mode modes,
+                     uint32_t max_lower_array_len)
 {
   nir_builder builder;
   nir_builder_init(&builder, impl);
   bool progress = false;

   nir_foreach_block_safe(block, impl) {
-      progress |= lower_indirect_derefs_block(block, &builder, modes);
+      progress |= lower_indirect_derefs_block(block, &builder, modes,
+                                              max_lower_array_len);
   }

   if (progress)
@ -203,13 +210,16 @@ lower_indirects_impl(nir_function_impl *impl, nir_variable_mode modes)
 * that does a binary search on the array index.
 */
 bool
-nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes)
+nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes,
+                          uint32_t max_lower_array_len)
 {
   bool progress = false;

   nir_foreach_function(function, shader) {
-      if (function->impl)
-         progress = lower_indirects_impl(function->impl, modes) || progress;
+      if (function->impl) {
+         progress = lower_indirects_impl(function->impl, modes,
+                                         max_lower_array_len) || progress;
+      }
   }

   return progress;
--- a/src/freedreno/vulkan/tu_shader.c
+++ b/src/freedreno/vulkan/tu_shader.c
@ -757,7 +757,7 @@ tu_shader_create(struct tu_device *dev,
    * a global BO, they can be directly accessed via stg and ldg.
    * nir_lower_indirect_derefs will instead generate a big if-ladder which
    * isn't *incorrect* but is much less efficient. */
-   NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out);
+   NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out, UINT32_MAX);

   NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);

--- a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
@ -1093,7 +1093,7 @@ etna_compile_shader_nir(struct etna_shader_variant *v)

   NIR_PASS_V(s, nir_lower_regs_to_ssa);
   NIR_PASS_V(s, nir_lower_vars_to_ssa);
-   NIR_PASS_V(s, nir_lower_indirect_derefs, nir_var_all);
+   NIR_PASS_V(s, nir_lower_indirect_derefs, nir_var_all, UINT32_MAX);
   NIR_PASS_V(s, nir_lower_tex, &(struct nir_lower_tex_options) { .lower_txp = ~0u });
   NIR_PASS_V(s, nir_lower_alu_to_scalar, etna_alu_to_scalar_filter_cb, specs);

--- a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
+++ b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
@ -113,7 +113,7 @@ ir2_optimize_nir(nir_shader *s, bool lower)

 	OPT_V(s, nir_lower_regs_to_ssa);
 	OPT_V(s, nir_lower_vars_to_ssa);
-	OPT_V(s, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out);
+	OPT_V(s, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out, UINT32_MAX);

 	if (lower) {
 		OPT_V(s, ir3_nir_apply_trig_workarounds);
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@ -763,7 +763,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir,

   nir_variable_mode indirect_mask =
      brw_nir_no_indirect_mask(compiler, nir->info.stage);
-   OPT(nir_lower_indirect_derefs, indirect_mask);
+   OPT(nir_lower_indirect_derefs, indirect_mask, UINT32_MAX);

   /* Lower array derefs of vectors for SSBO and UBO loads.  For both UBOs and
    * SSBOs, our back-end is capable of loading an entire vec4 at a time and
@ -813,9 +813,11 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
       * varyings we have demoted here.
       */
      NIR_PASS_V(producer, nir_lower_indirect_derefs,
-                 brw_nir_no_indirect_mask(compiler, producer->info.stage));
+                 brw_nir_no_indirect_mask(compiler, producer->info.stage),
+                 UINT32_MAX);
      NIR_PASS_V(consumer, nir_lower_indirect_derefs,
-                 brw_nir_no_indirect_mask(compiler, consumer->info.stage));
+                 brw_nir_no_indirect_mask(compiler, consumer->info.stage),
+                 UINT32_MAX);

      brw_nir_optimize(producer, compiler, p_is_scalar, false);
      brw_nir_optimize(consumer, compiler, c_is_scalar, false);