diff --git a/src/amd/common/nir/ac_nir.c b/src/amd/common/nir/ac_nir.c
index c4606738d80..581c97a4041 100644
--- a/src/amd/common/nir/ac_nir.c
+++ b/src/amd/common/nir/ac_nir.c
@@ -218,6 +218,39 @@ ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct a
    return ac_nir_unpack_value(b, value, rshift, bitwidth);
 }
 
+/* This lowers small indirect array derefs to if-else trees. We might want to do this before
+ * ac_nir_lower_indirect_derefs() to lower small array derefs to if-else trees earlier than
+ * lowering large array derefs to scratch. This is because we want to do the scratch lowering
+ * as late as possible (because scratch access isn't very optimizable), but the if-else tree
+ * lowering can be optimized. For example, an indirect access where we can know that all
+ * elements that might be accessed are equal could be replaced with a use of that element, or
+ * nir_opt_peephole_select() can flatten some of the if-else tree. */
+bool
+ac_nir_lower_indirect_derefs_early(nir_shader *shader)
+{
+   struct set vars;
+   _mesa_pointer_set_init(&vars, NULL);
+   nir_foreach_function_impl(impl, shader) {
+      nir_foreach_function_temp_variable(var, impl) {
+         unsigned var_size, var_align;
+         glsl_get_natural_size_align_bytes(var->type, &var_size, &var_align);
+         if (var_size < 256)
+            _mesa_set_add(&vars, var);
+      }
+   }
+
+   bool progress = false;
+   if (vars.entries)
+      NIR_PASS(progress, shader, nir_lower_indirect_var_derefs_to_if_else_trees, &vars);
+
+   _mesa_set_fini(&vars, NULL);
+
+   return progress;
+}
+
+/* This lowers all indirect array derefs to either scratch adccess or if-else trees, ensuring
+ * that none remains.
+ */
 bool
 ac_nir_lower_indirect_derefs(nir_shader *shader)
 {
diff --git a/src/amd/common/nir/ac_nir.h b/src/amd/common/nir/ac_nir.h
index eb785c3c84a..b7e6c313e2a 100644
--- a/src/amd/common/nir/ac_nir.h
+++ b/src/amd/common/nir/ac_nir.h
@@ -176,6 +176,9 @@ ac_nir_lower_gs_inputs_to_mem(nir_shader *shader,
                               enum amd_gfx_level gfx_level,
                               bool triangle_strip_adjacency_fix);
 
+bool
+ac_nir_lower_indirect_derefs_early(nir_shader *shader);
+
 bool
 ac_nir_lower_indirect_derefs(nir_shader *shader);
 
diff --git a/src/amd/vulkan/radv_pipeline_compute.c b/src/amd/vulkan/radv_pipeline_compute.c
index 93223d811bd..d1b67a0c3e1 100644
--- a/src/amd/vulkan/radv_pipeline_compute.c
+++ b/src/amd/vulkan/radv_pipeline_compute.c
@@ -106,6 +106,9 @@ radv_compile_cs(const struct radv_compiler_info *compiler_info, struct radv_shad
    /* Compile SPIR-V shader to NIR. */
    cs_stage->nir = radv_shader_spirv_to_nir(compiler_info, cs_stage, NULL, is_internal);
 
+   NIR_PASS(_, cs_stage->nir, ac_nir_lower_indirect_derefs);
+   NIR_PASS(_, cs_stage->nir, nir_lower_vars_to_ssa);
+
    radv_optimize_nir(cs_stage->nir, cs_stage->key.optimisations_disabled);
 
    /* Run the shader info pass. */
diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c
index 7a61709e6ff..54ec910224d 100644
--- a/src/amd/vulkan/radv_pipeline_graphics.c
+++ b/src/amd/vulkan/radv_pipeline_graphics.c
@@ -2611,6 +2611,9 @@ radv_graphics_shaders_compile(const struct radv_compiler_info *compiler_info, st
    radv_foreach_stage (i, active_nir_stages) {
       int64_t stage_start = os_time_get_nano();
 
+      ac_nir_lower_indirect_derefs_early(stages[i].nir);
+      NIR_PASS(_, stages[i].nir, nir_lower_vars_to_ssa);
+
       radv_optimize_nir(stages[i].nir, stages[i].key.optimisations_disabled);
 
       stages[i].feedback.duration += os_time_get_nano() - stage_start;
@@ -2635,6 +2638,28 @@ radv_graphics_shaders_compile(const struct radv_compiler_info *compiler_info, st
       stages[i].feedback.duration += os_time_get_nano() - stage_start;
    }
 
+   radv_foreach_stage (i, active_nir_stages) {
+      int64_t stage_start = os_time_get_nano();
+
+      /* Indirect lowering must be called after the radv_optimize_nir() loop
+       * has been called at least once. Otherwise indirect lowering can
+       * bloat the instruction count of the loop and cause it to be
+       * considered too large for unrolling.
+       *
+       * We want to do this as late as possible because scratch access isn't
+       * very optimizable. We lower smaller arrays to SSA earlier with
+       * ac_nir_lower_indirect_derefs_early, because that can actually enable
+       * optimizations.
+       */
+      bool indirect_derefs_lowered = false;
+      NIR_PASS(indirect_derefs_lowered, stages[i].nir, ac_nir_lower_indirect_derefs);
+      NIR_PASS(_, stages[i].nir, nir_lower_vars_to_ssa);
+      if (indirect_derefs_lowered && !stages[i].key.optimisations_disabled)
+         radv_optimize_nir(stages[i].nir, false);
+
+      stages[i].feedback.duration += os_time_get_nano() - stage_start;
+   }
+
    radv_fill_shader_info(compiler_info, RADV_PIPELINE_GRAPHICS, gfx_state, stages, active_nir_stages);
 
    radv_declare_pipeline_args(compiler_info, stages, gfx_state, active_nir_stages, debug);
diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c
index d1ab88c6aaf..4a849e688ce 100644
--- a/src/amd/vulkan/radv_pipeline_rt.c
+++ b/src/amd/vulkan/radv_pipeline_rt.c
@@ -657,6 +657,12 @@ radv_rt_spirv_to_nir(const struct radv_compiler_info *compiler_info, struct radv
 {
    stage->nir = radv_shader_spirv_to_nir(compiler_info, stage, NULL, false);
 
+   bool indirect_derefs_lowered = false;
+   NIR_PASS(indirect_derefs_lowered, stage->nir, ac_nir_lower_indirect_derefs);
+   NIR_PASS(_, stage->nir, nir_lower_vars_to_ssa);
+   if (indirect_derefs_lowered && !stage->key.optimisations_disabled)
+      radv_optimize_nir(stage->nir, false);
+
    nir_foreach_variable_with_modes (var, stage->nir, nir_var_ray_hit_attrib) {
       unsigned size, alignment;
       glsl_get_natural_size_align_bytes(var->type, &size, &alignment);
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 707d167862f..533120f9b12 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -877,21 +877,6 @@ radv_shader_spirv_to_nir(const struct radv_compiler_info *compiler_info, struct
       }
    }
 
-   /* Indirect lowering must be called after the radv_optimize_nir() loop
-    * has been called at least once. Otherwise indirect lowering can
-    * bloat the instruction count of the loop and cause it to be
-    * considered too large for unrolling.
-    */
-   bool indirect_derefs_lowered = false;
-   NIR_PASS(indirect_derefs_lowered, nir, ac_nir_lower_indirect_derefs);
-   NIR_PASS(_, nir, nir_lower_vars_to_ssa);
-
-   if (indirect_derefs_lowered && !stage->key.optimisations_disabled &&
-       nir->info.stage != MESA_SHADER_COMPUTE) {
-      /* Optimize the lowered code before the linking optimizations. */
-      radv_optimize_nir(nir, false);
-   }
-
    return nir;
 }
 
diff --git a/src/compiler/nir/nir_lower_indirect_derefs_to_if_else_trees.c b/src/compiler/nir/nir_lower_indirect_derefs_to_if_else_trees.c
index 2f65315c571..57b148fb1e0 100644
--- a/src/compiler/nir/nir_lower_indirect_derefs_to_if_else_trees.c
+++ b/src/compiler/nir/nir_lower_indirect_derefs_to_if_else_trees.c
@@ -250,7 +250,7 @@ nir_lower_indirect_var_derefs_to_if_else_trees(nir_shader *shader,
    bool progress = false;
 
    nir_foreach_function_impl(impl, shader) {
-      progress = lower_indirects_impl(impl, nir_var_uniform, vars, UINT_MAX) ||
+      progress = lower_indirects_impl(impl, nir_var_all, vars, UINT_MAX) ||
                  progress;
    }