diff --git a/src/intel/compiler/brw/brw_nir.c b/src/intel/compiler/brw/brw_nir.c
index df2f050138e..f14fcdbfa02 100644
--- a/src/intel/compiler/brw/brw_nir.c
+++ b/src/intel/compiler/brw/brw_nir.c
@@ -1377,27 +1377,6 @@ brw_nir_optimize(nir_shader *nir,
       if (nir->info.stage != MESA_SHADER_KERNEL)
          LOOP_OPT(nir_split_array_vars, nir_var_function_temp);
       LOOP_OPT(nir_shrink_vec_array_vars, nir_var_function_temp);
-      LOOP_OPT(nir_opt_deref);
-      if (LOOP_OPT(nir_opt_memcpy))
-         LOOP_OPT(nir_split_var_copies);
-      LOOP_OPT(nir_lower_vars_to_ssa);
-      if (!nir->info.var_copies_lowered) {
-         /* Only run this pass if nir_lower_var_copies was not called
-          * yet. That would lower away any copy_deref instructions and we
-          * don't want to introduce any more.
-          */
-         LOOP_OPT(nir_opt_find_array_copies);
-      }
-      LOOP_OPT(nir_opt_copy_prop_vars);
-      LOOP_OPT(nir_opt_dead_write_vars);
-      LOOP_OPT(nir_opt_combine_stores, nir_var_all);
-
-      LOOP_OPT(nir_opt_ray_queries);
-      LOOP_OPT(nir_opt_ray_query_ranges);
-
-      LOOP_OPT(nir_lower_alu_to_scalar, NULL, NULL);
-
-      LOOP_OPT(nir_opt_copy_prop);
 
       LOOP_OPT(nir_lower_phis_to_scalar, NULL, NULL);
 
@@ -1406,39 +1385,21 @@ brw_nir_optimize(nir_shader *nir,
       LOOP_OPT(nir_opt_cse);
       LOOP_OPT(nir_opt_combine_stores, nir_var_all);
 
-      /* Passing 0 to the peephole select pass causes it to convert
-       * if-statements that contain only move instructions in the branches
-       * regardless of the count.
-       *
-       * Passing 1 to the peephole select pass causes it to convert
-       * if-statements that contain at most a single ALU instruction (total)
-       * in both branches.  Before Gfx6, some math instructions were
-       * prohibitively expensive and the results of compare operations need an
-       * extra resolve step.  For these reasons, this pass is more harmful
-       * than good on those platforms.
-       *
-       * For indirect loads of uniforms (push constants), we assume that array
+      /* For indirect loads of uniforms (push constants), we assume that array
        * indices will nearly always be in bounds and the cost of the load is
-       * low.  Therefore there shouldn't be a performance benefit to avoid it.
+       * low. Therefore there shouldn't be a performance benefit to avoid it.
        */
       nir_opt_peephole_select_options peephole_select_options = {
-         .limit = 0,
+         .limit = 8,
          .indirect_load_ok = true,
+         .expensive_alu_ok = true,
+         .discard_ok = true,
       };
       LOOP_OPT(nir_opt_peephole_select, &peephole_select_options);
 
-      peephole_select_options.limit = 8;
-      peephole_select_options.expensive_alu_ok = true;
-      LOOP_OPT(nir_opt_peephole_select, &peephole_select_options);
-
       LOOP_OPT(nir_opt_intrinsics);
-      LOOP_OPT(nir_opt_idiv_const, 32);
       LOOP_OPT_NOT_IDEMPOTENT(nir_opt_algebraic);
 
-      LOOP_OPT(nir_opt_generate_bfi);
-      LOOP_OPT(nir_opt_reassociate_bfi);
-
-      LOOP_OPT(nir_lower_constant_convert_alu_types);
       LOOP_OPT(nir_opt_constant_folding);
 
       LOOP_OPT(nir_opt_dead_cf);
@@ -1452,24 +1413,13 @@ brw_nir_optimize(nir_shader *nir,
       }
       LOOP_OPT_NOT_IDEMPOTENT(nir_opt_if, nir_opt_if_optimize_phi_true_false);
 
-      nir_opt_peephole_select_options peephole_discard_options = {
-         .limit = 0,
-         .discard_ok = true,
-      };
-      LOOP_OPT(nir_opt_peephole_select, &peephole_discard_options);
       if (nir->options->max_unroll_iterations != 0) {
          LOOP_OPT_NOT_IDEMPOTENT(nir_opt_loop_unroll);
       }
       LOOP_OPT(nir_opt_remove_phis);
       LOOP_OPT(nir_opt_gcm, false);
       LOOP_OPT(nir_opt_undef);
-      LOOP_OPT(nir_lower_pack);
    } while (progress);
-
-   /* Workaround Gfxbench unused local sampler variable which will trigger an
-    * assert in the opt_large_constants pass.
-    */
-   OPT(nir_remove_dead_variables, nir_var_function_temp, NULL);
 }
 
 static unsigned
@@ -1681,12 +1631,34 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir,
    OPT(nir_normalize_cubemap_coords);
 
    OPT(nir_lower_global_vars_to_local);
+   OPT(nir_lower_pack);
+   OPT(nir_lower_constant_convert_alu_types);
 
    OPT(nir_split_var_copies);
    OPT(nir_split_struct_vars, nir_var_function_temp);
 
+   if (OPT(nir_opt_memcpy))
+      OPT(nir_split_var_copies);
+
+   OPT(nir_lower_vars_to_ssa);
+
+   /* Run this pass before nir_lower_var_copies: it introduces copy_derefs. */
+   OPT(nir_opt_find_array_copies);
+
    brw_nir_optimize(nir, devinfo);
 
+   if (nir->info.ray_queries) {
+      OPT(nir_opt_ray_queries);
+      OPT(nir_opt_ray_query_ranges);
+   }
+
+   OPT(nir_opt_deref);
+   OPT(nir_opt_copy_prop_vars);
+   OPT(nir_opt_dead_write_vars);
+
+   OPT(nir_lower_vars_to_ssa);
+   OPT(nir_remove_dead_variables, nir_var_function_temp, NULL);
+
    unsigned lower_flrp =
       (nir->options->lower_flrp16 ? 16 : 0) |
       (nir->options->lower_flrp32 ? 32 : 0) |
@@ -1740,7 +1712,8 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir,
 
    nir_variable_mode indirect_mask =
       brw_nir_no_indirect_mask(compiler, nir->info.stage);
-   OPT(nir_lower_indirect_derefs_to_if_else_trees, indirect_mask, UINT32_MAX);
+   if (OPT(nir_lower_indirect_derefs_to_if_else_trees, indirect_mask, UINT32_MAX))
+      OPT(nir_lower_vars_to_ssa);
 
    /* Even in cases where we can handle indirect temporaries via scratch, we
     * it can still be expensive.  Lower indirects on small arrays to
@@ -1755,8 +1728,10 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir,
     * issues are helped but nothing else in shader-db is hurt except for maybe
     * that one kerbal space program shader.
     */
-   if (!(indirect_mask & nir_var_function_temp))
-      OPT(nir_lower_indirect_derefs_to_if_else_trees, nir_var_function_temp, 16);
+   if (!(indirect_mask & nir_var_function_temp)) {
+      if (OPT(nir_lower_indirect_derefs_to_if_else_trees, nir_var_function_temp, 16))
+         OPT(nir_lower_vars_to_ssa);
+   }
 
    /* Lower array derefs of vectors for SSBO and UBO loads.  For both UBOs and
     * SSBOs, our back-end is capable of loading an entire vec4 at a time and
@@ -1765,9 +1740,12 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir,
     * optimizer to combine UBO and SSBO load operations and save us some send
     * messages.
     */
-   OPT(nir_lower_array_deref_of_vec,
+   if (OPT(nir_lower_array_deref_of_vec,
        nir_var_mem_ubo | nir_var_mem_ssbo, NULL,
-       nir_lower_direct_array_deref_of_vec_load);
+       nir_lower_direct_array_deref_of_vec_load)) {
+
+      OPT(nir_opt_copy_prop_vars);
+   }
 
    /* Clamp load_per_vertex_input of the TCS stage so that we do not generate
     * loads reading out of bounds. We can do this here because we called
@@ -1934,14 +1912,18 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
 
    NIR_PASS(_, producer, nir_lower_io_vars_to_scalar, nir_var_shader_out);
    NIR_PASS(_, consumer, nir_lower_io_vars_to_scalar, nir_var_shader_in);
+   NIR_PASS(_, producer, nir_opt_copy_prop_vars);
+   NIR_PASS(_, consumer, nir_opt_copy_prop_vars);
    brw_nir_optimize(producer, devinfo);
    brw_nir_optimize(consumer, devinfo);
 
    if (nir_link_opt_varyings(producer, consumer))
       brw_nir_optimize(consumer, devinfo);
 
-   NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
-   NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
+   NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out |
+                                                    nir_var_function_temp, NULL);
+   NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in |
+                                                    nir_var_function_temp, NULL);
 
    if (nir_remove_unused_varyings(producer, consumer)) {
       if (should_print_nir(producer)) {
@@ -1955,6 +1937,8 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
 
       NIR_PASS(_, producer, nir_lower_global_vars_to_local);
       NIR_PASS(_, consumer, nir_lower_global_vars_to_local);
+      NIR_PASS(_, producer, nir_opt_copy_prop_vars);
+      NIR_PASS(_, consumer, nir_opt_copy_prop_vars);
 
       brw_nir_optimize(producer, devinfo);
       brw_nir_optimize(consumer, devinfo);
@@ -1992,6 +1976,7 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
       NIR_PASS(_, producer, nir_lower_global_vars_to_local);
       NIR_PASS(_, producer, nir_split_var_copies);
       NIR_PASS(_, producer, nir_lower_var_copies);
+      NIR_PASS(_, producer, nir_lower_vars_to_ssa);
    }
 
    if (producer->info.stage == MESA_SHADER_TASK &&
@@ -2258,7 +2243,7 @@ brw_vectorize_lower_mem_access(nir_shader *nir,
                                const struct brw_compiler *compiler,
                                enum brw_robustness_flags robust_flags)
 {
-   bool progress = false;
+   UNUSED bool progress = false;
 
    nir_load_store_vectorize_options options = {
       .modes = nir_var_mem_ubo | nir_var_mem_ssbo |
@@ -2321,17 +2306,11 @@ brw_vectorize_lower_mem_access(nir_shader *nir,
       .cb_data = &cb_data,
    };
    OPT(nir_lower_mem_access_bit_sizes, &mem_access_options);
-
-   while (progress) {
-      progress = false;
-
-      OPT(nir_lower_pack);
-      OPT(nir_opt_copy_prop);
-      OPT(nir_opt_dce);
-      OPT(nir_opt_cse);
-      OPT(nir_opt_algebraic);
-      OPT(nir_opt_constant_folding);
-   }
+   OPT(nir_lower_pack);
+   OPT(nir_opt_copy_prop);
+   OPT(nir_opt_dce);
+   OPT(nir_opt_algebraic);
+   OPT(nir_opt_cse);
 
    /* Do this after the vectorization & brw_nir_rebase_const_offset_ubo_loads
     * so that we maximize the offset put into the messages.
@@ -2565,6 +2544,7 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
    OPT(brw_nir_lower_texture);
 
    OPT(nir_lower_bit_size, lower_bit_size_callback, (void *)compiler);
+   OPT(nir_lower_alu_to_scalar, NULL, NULL);
 
    OPT(nir_opt_combine_barriers, combine_all_memory_barriers, NULL);
 
@@ -2573,9 +2553,10 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
       OPT(nir_opt_algebraic_before_ffma);
    } while (progress);
 
+   OPT(nir_opt_idiv_const, 32);
+
    if (devinfo->verx10 >= 125) {
       /* Lower integer division by constants before nir_lower_idiv. */
-      OPT(nir_opt_idiv_const, 32);
       const nir_lower_idiv_options options = {
          .allow_fp16 = false
       };
@@ -2641,6 +2622,8 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
       OPT(nir_opt_shrink_vectors, false);
 
    OPT(intel_nir_opt_peephole_imul32x16);
+   OPT(nir_opt_generate_bfi);
+   OPT(nir_opt_reassociate_bfi);
 
    if (OPT(nir_opt_comparison_pre)) {
       OPT(nir_opt_copy_prop);
@@ -2653,33 +2636,22 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
        * might be under the threshold of conversion to bcsel.
        */
       nir_opt_peephole_select_options peephole_select_options = {
-         .limit = 0,
+         .limit = 1,
+         .expensive_alu_ok = true,
       };
       OPT(nir_opt_peephole_select, &peephole_select_options);
-
-      peephole_select_options.limit = 1;
-      peephole_select_options.expensive_alu_ok = true;
-      OPT(nir_opt_peephole_select, &peephole_select_options);
    }
 
-   do {
-      progress = false;
-
-      OPT(brw_nir_opt_fsat);
-      OPT(nir_opt_algebraic_late);
-      OPT(brw_nir_lower_fsign);
-
-      if (progress) {
-         OPT(nir_opt_constant_folding);
-         OPT(nir_opt_copy_prop);
-         OPT(nir_opt_dce);
-         OPT(nir_opt_cse);
-      }
-   } while (progress);
+   OPT(brw_nir_lower_fsign);
+   OPT(brw_nir_opt_fsat);
 
+   while (OPT(nir_opt_algebraic_late)) {
+      OPT(nir_opt_copy_prop);
+      OPT(nir_opt_dce);
+      OPT(nir_opt_cse);
+   }
 
    OPT(nir_lower_fp16_casts, nir_lower_fp16_split_fp64);
-
    OPT(nir_lower_alu_to_scalar, NULL, NULL);
 
    while (OPT(nir_opt_algebraic_distribute_src_mods)) {
@@ -2816,12 +2788,9 @@ brw_postprocess_nir_out_of_ssa(nir_shader *nir,
    }
 
    OPT(nir_convert_from_ssa, true, true);
-
+   OPT(nir_opt_rematerialize_compares);
    OPT(nir_opt_dce);
 
-   if (OPT(nir_opt_rematerialize_compares))
-      OPT(nir_opt_dce);
-
    nir_trivialize_registers(nir);
 
    nir_sweep(nir);