diff --git a/src/intel/compiler/brw/brw_compile_bs.cpp b/src/intel/compiler/brw/brw_compile_bs.cpp
index f279d9c2a77..2e92357019f 100644
--- a/src/intel/compiler/brw/brw_compile_bs.cpp
+++ b/src/intel/compiler/brw/brw_compile_bs.cpp
@@ -83,13 +83,17 @@ compile_single_bs(const struct brw_compiler *compiler,
     */
    const unsigned required_width = compiler->devinfo->ver >= 20 ? 16u : 8u;
 
-   brw_nir_apply_key(shader, compiler, &key->base, required_width);
+   brw_pass_tracker pt_ = {
+      .nir = shader,
+      .dispatch_width = required_width,
+      .compiler = compiler,
+      .archiver = params->base.archiver,
+   }, *pt = &pt_;
 
-   brw_debug_archive_nir(params->base.archiver, shader, required_width, "first");
+   BRW_NIR_SNAPSHOT("first");
+   brw_nir_apply_key(pt, &key->base, required_width);
 
-   brw_postprocess_nir(shader, compiler, required_width,
-                       params->base.archiver, debug_enabled,
-                       key->base.robust_flags);
+   brw_postprocess_nir(pt, debug_enabled, key->base.robust_flags);
 
    const brw_shader_params shader_params = {
       .compiler                = compiler,
diff --git a/src/intel/compiler/brw/brw_compile_cs.cpp b/src/intel/compiler/brw/brw_compile_cs.cpp
index 4325d4335b4..5ea2f1ce115 100644
--- a/src/intel/compiler/brw/brw_compile_cs.cpp
+++ b/src/intel/compiler/brw/brw_compile_cs.cpp
@@ -141,7 +141,15 @@ brw_compile_cs(const struct brw_compiler *compiler,
       prog_data->local_size[2] = nir->info.workgroup_size[2];
    }
 
-   brw_postprocess_nir_opts(nir, compiler, key->base.robust_flags);
+   brw_pass_tracker pt_ = {
+      .nir = nir,
+      .dispatch_width = 0,
+      .compiler = compiler,
+      .archiver = params->base.archiver,
+   }, *pt = &pt_;
+
+   BRW_NIR_SNAPSHOT("first");
+   brw_postprocess_nir_opts(pt, key->base.robust_flags);
 
    brw_simd_selection_state simd_state{
       .devinfo = compiler->devinfo,
@@ -170,18 +178,23 @@ brw_compile_cs(const struct brw_compiler *compiler,
       const unsigned dispatch_width = 8u << simd;
 
       nir_shader *shader = nir_shader_clone(params->base.mem_ctx, nir);
-      brw_debug_archive_nir(params->base.archiver, shader, dispatch_width, "first");
 
-      brw_nir_apply_key(shader, compiler, &key->base,
-                        dispatch_width);
+      pt_ = {
+         .nir = shader,
+         .dispatch_width = dispatch_width,
+         .compiler = compiler,
+         .archiver = params->base.archiver,
+      };
 
-      NIR_PASS(_, shader, brw_nir_lower_simd, dispatch_width);
+      BRW_NIR_SNAPSHOT("first");
+      brw_nir_apply_key(pt, &key->base, dispatch_width);
 
-      brw_nir_optimize(shader, devinfo);
+      BRW_NIR_PASS(brw_nir_lower_simd, dispatch_width);
+
+      brw_nir_optimize(pt);
       /* brw_nir_optimize undoes late lowerings. */
-      NIR_PASS(_, shader, nir_opt_algebraic_late);
-      brw_postprocess_nir_out_of_ssa(shader, dispatch_width,
-                                     params->base.archiver, debug_enabled);
+      BRW_NIR_PASS(nir_opt_algebraic_late);
+      brw_postprocess_nir_out_of_ssa(pt, debug_enabled);
 
       const brw_shader_params shader_params = {
          .compiler                = compiler,
diff --git a/src/intel/compiler/brw/brw_compile_fs.cpp b/src/intel/compiler/brw/brw_compile_fs.cpp
index eb2f3c27935..61c0320db21 100644
--- a/src/intel/compiler/brw/brw_compile_fs.cpp
+++ b/src/intel/compiler/brw/brw_compile_fs.cpp
@@ -1423,17 +1423,16 @@ brw_print_fs_urb_setup(FILE *fp, const struct brw_wm_prog_data *prog_data,
 }
 
 static void
-brw_nir_cleanup_pre_wm_prog_data(nir_shader *nir)
+brw_nir_cleanup_pre_wm_prog_data(brw_pass_tracker *pt)
 {
-   bool progress;
    do {
-      progress = false;
-      NIR_PASS(progress, nir, nir_opt_algebraic);
-      NIR_PASS(progress, nir, nir_opt_copy_prop);
-      NIR_PASS(progress, nir, nir_opt_constant_folding);
-      NIR_PASS(progress, nir, nir_opt_dce);
-      NIR_PASS(progress, nir, nir_opt_cse);
-   } while (progress);
+      pt->progress = false;
+      BRW_NIR_PASS(nir_opt_algebraic);
+      BRW_NIR_PASS(nir_opt_copy_prop);
+      BRW_NIR_PASS(nir_opt_constant_folding);
+      BRW_NIR_PASS(nir_opt_dce);
+      BRW_NIR_PASS(nir_opt_cse);
+   } while (pt->progress);
 }
 
 const unsigned *
@@ -1455,17 +1454,24 @@ brw_compile_fs(const struct brw_compiler *compiler,
    const unsigned max_subgroup_size = 32;
    unsigned max_polygons = MAX2(1, params->max_polygons);
 
-   brw_debug_archive_nir(params->base.archiver, nir, 0, "first");
+   brw_pass_tracker pt_ = {
+      .nir = nir,
+      .dispatch_width = 0,
+      .compiler = compiler,
+      .archiver = params->base.archiver,
+   }, *pt = &pt_;
 
-   brw_nir_apply_key(nir, compiler, &key->base, max_subgroup_size);
+   BRW_NIR_SNAPSHOT("first");
+
+   brw_nir_apply_key(pt, &key->base, max_subgroup_size);
 
    if (brw_nir_fragment_shader_needs_wa_18019110168(devinfo, key->mesh_input, nir)) {
       if (params->mue_map && params->mue_map->wa_18019110168_active) {
          brw_nir_frag_convert_attrs_prim_to_vert(
             nir, params->mue_map->per_primitive_offsets);
       } else {
-         NIR_PASS(_, nir, brw_nir_frag_convert_attrs_prim_to_vert_indirect,
-                  devinfo, params);
+         BRW_NIR_PASS(brw_nir_frag_convert_attrs_prim_to_vert_indirect,
+                      devinfo, params);
       }
       /* Remapping per-primitive inputs into unused per-vertex inputs cannot
        * work with multipolygon.
@@ -1476,16 +1482,18 @@ brw_compile_fs(const struct brw_compiler *compiler,
    brw_nir_lower_fs_inputs(nir, devinfo, key);
    brw_nir_lower_fs_outputs(nir);
 
+   BRW_NIR_SNAPSHOT("after_lower_io");
+
    if (!brw_can_coherent_fb_fetch(devinfo))
-      NIR_PASS(_, nir, brw_nir_lower_fs_load_output, key);
+      BRW_NIR_PASS(brw_nir_lower_fs_load_output, key);
 
    /* Do this lowering before brw_nir_populate_wm_prog_data(). */
-   NIR_PASS(_, nir, nir_opt_frag_coord_to_pixel_coord);
-   NIR_PASS(_, nir, nir_lower_frag_coord_to_pixel_coord);
+   BRW_NIR_PASS(nir_opt_frag_coord_to_pixel_coord);
+   BRW_NIR_PASS(nir_lower_frag_coord_to_pixel_coord);
 
-   NIR_PASS(_, nir, brw_nir_move_interpolation_to_top);
+   BRW_NIR_PASS(brw_nir_move_interpolation_to_top);
 
-   brw_nir_cleanup_pre_wm_prog_data(nir);
+   brw_nir_cleanup_pre_wm_prog_data(pt);
 
    int per_primitive_offsets[VARYING_SLOT_MAX];
    memset(per_primitive_offsets, -1, sizeof(per_primitive_offsets));
@@ -1503,12 +1511,12 @@ brw_compile_fs(const struct brw_compiler *compiler,
        * offset to determine render target 0 store instruction in
        * emit_alpha_to_coverage pass.
        */
-      NIR_PASS(_, nir, nir_opt_constant_folding);
-      NIR_PASS(_, nir, brw_nir_lower_alpha_to_coverage);
+      BRW_NIR_PASS(nir_opt_constant_folding);
+      BRW_NIR_PASS(brw_nir_lower_alpha_to_coverage);
    }
 
    if (prog_data->coarse_pixel_dispatch != INTEL_NEVER)
-      NIR_PASS(_, nir, brw_nir_lower_frag_coord_z, devinfo);
+      BRW_NIR_PASS(brw_nir_lower_frag_coord_z, devinfo);
 
    if (!brw_wm_prog_key_is_dynamic(key)) {
       uint32_t f = 0;
@@ -1530,10 +1538,10 @@ brw_compile_fs(const struct brw_compiler *compiler,
       if (prog_data->coarse_pixel_dispatch == INTEL_ALWAYS)
          f |= INTEL_MSAA_FLAG_COARSE_RT_WRITES;
 
-      NIR_PASS(_, nir, nir_inline_sysval, nir_intrinsic_load_fs_msaa_intel, f);
+      BRW_NIR_PASS(nir_inline_sysval, nir_intrinsic_load_fs_msaa_intel, f);
    }
 
-   brw_postprocess_nir_opts(nir, compiler, key->base.robust_flags);
+   brw_postprocess_nir_opts(pt, key->base.robust_flags);
 
    unsigned pressure[SIMD_COUNT];
    brw_nir_quick_pressure_estimate(nir, devinfo, pressure);
@@ -1544,8 +1552,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
          pressure[i] > compiler->register_pressure_threshold;
    }
 
-   brw_postprocess_nir_out_of_ssa(nir, 0, params->base.archiver,
-                                  debug_enabled);
+   brw_postprocess_nir_out_of_ssa(pt, debug_enabled);
 
    if (unlikely(debug_enabled))
       brw_print_fs_urb_setup(stderr, prog_data, per_primitive_offsets);
diff --git a/src/intel/compiler/brw/brw_compile_gs.cpp b/src/intel/compiler/brw/brw_compile_gs.cpp
index 3feb982dd66..b9c79742fa6 100644
--- a/src/intel/compiler/brw/brw_compile_gs.cpp
+++ b/src/intel/compiler/brw/brw_compile_gs.cpp
@@ -148,7 +148,14 @@ brw_compile_gs(const struct brw_compiler *compiler,
 
    const bool debug_enabled = brw_should_print_shader(nir, DEBUG_GS, params->base.source_hash);
 
-   brw_debug_archive_nir(params->base.archiver, nir, dispatch_width, "first");
+   brw_pass_tracker pt_ = {
+      .nir = nir,
+      .dispatch_width = dispatch_width,
+      .compiler = compiler,
+      .archiver = params->base.archiver,
+   }, *pt = &pt_;
+
+   BRW_NIR_SNAPSHOT("first");
 
    brw_prog_data_init(&prog_data->base.base, &params->base);
 
@@ -175,14 +182,15 @@ brw_compile_gs(const struct brw_compiler *compiler,
                        key->base.vue_layout,
                        pos_slots);
 
-   brw_nir_apply_key(nir, compiler, &key->base, dispatch_width);
+   brw_nir_apply_key(pt, &key->base, dispatch_width);
    brw_nir_lower_gs_inputs(nir, compiler->devinfo, &input_vue_map,
                            &prog_data->base.urb_read_length);
    brw_nir_lower_vue_outputs(nir);
-   brw_nir_opt_vectorize_urb(nir, compiler->devinfo);
-   brw_postprocess_nir(nir, compiler, dispatch_width,
-                       params->base.archiver, debug_enabled,
-                       key->base.robust_flags);
+
+   BRW_NIR_SNAPSHOT("after_lower_io");
+
+   brw_nir_opt_vectorize_urb(pt);
+   brw_postprocess_nir(pt, debug_enabled, key->base.robust_flags);
 
    prog_data->include_primitive_id =
       BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
diff --git a/src/intel/compiler/brw/brw_compile_mesh.cpp b/src/intel/compiler/brw/brw_compile_mesh.cpp
index 5379316c96a..38510232c40 100644
--- a/src/intel/compiler/brw/brw_compile_mesh.cpp
+++ b/src/intel/compiler/brw/brw_compile_mesh.cpp
@@ -90,21 +90,23 @@ brw_nir_lower_launch_mesh_workgroups(nir_shader *nir)
 #define BRW_PER_TASK_DATA_START_DW 8
 
 static void
-brw_nir_lower_tue_outputs(nir_shader *nir, brw_tue_map *map)
+brw_nir_lower_tue_outputs(brw_pass_tracker *pt, brw_tue_map *map)
 {
+   nir_shader *nir = pt->nir;
+
    memset(map, 0, sizeof(*map));
 
-   NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out,
-            type_size_scalar_dwords, nir_lower_io_lower_64bit_to_32);
+   BRW_NIR_PASS(nir_lower_io, nir_var_shader_out,
+                type_size_scalar_dwords, nir_lower_io_lower_64bit_to_32);
 
    /* Lowering to explicit types will start offsets from task_payload_size, so
     * set it to start after the header.
     */
    nir->info.task_payload_size = BRW_PER_TASK_DATA_START_DW * 4;
-   NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
-            nir_var_mem_task_payload, shared_type_info);
-   NIR_PASS(_, nir, nir_lower_explicit_io,
-            nir_var_mem_task_payload, nir_address_format_32bit_offset);
+   BRW_NIR_PASS(nir_lower_vars_to_explicit_types,
+                nir_var_mem_task_payload, shared_type_info);
+   BRW_NIR_PASS(nir_lower_explicit_io,
+                nir_var_mem_task_payload, nir_address_format_32bit_offset);
 
    map->size_dw = align(DIV_ROUND_UP(nir->info.task_payload_size, 4), 8);
 }
@@ -279,11 +281,18 @@ brw_compile_task(const struct brw_compiler *compiler,
    struct brw_task_prog_data *prog_data = params->prog_data;
    const bool debug_enabled = brw_should_print_shader(nir, DEBUG_TASK, params->base.source_hash);
 
-   brw_debug_archive_nir(params->base.archiver, nir, 0, "first");
+   brw_pass_tracker pt_ = {
+      .nir = nir,
+      .dispatch_width = 0,
+      .compiler = compiler,
+      .archiver = params->base.archiver,
+   }, *pt = &pt_;
 
-   brw_nir_lower_tue_outputs(nir, &prog_data->map);
+   BRW_NIR_SNAPSHOT("first");
 
-   NIR_PASS(_, nir, brw_nir_align_launch_mesh_workgroups);
+   brw_nir_lower_tue_outputs(pt, &prog_data->map);
+
+   BRW_NIR_PASS(brw_nir_align_launch_mesh_workgroups);
 
    nir_lower_task_shader_options lower_ts_opt = {
       .payload_to_shared_for_atomics = true,
@@ -293,9 +302,9 @@ brw_compile_task(const struct brw_compiler *compiler,
        */
       .payload_offset_in_bytes = BRW_PER_TASK_DATA_START_DW * 4,
    };
-   NIR_PASS(_, nir, nir_lower_task_shader, lower_ts_opt);
+   BRW_NIR_PASS(nir_lower_task_shader, lower_ts_opt);
 
-   NIR_PASS(_, nir, brw_nir_lower_launch_mesh_workgroups);
+   BRW_NIR_PASS(brw_nir_lower_launch_mesh_workgroups);
 
    NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics, compiler->devinfo,
             NULL);
@@ -312,7 +321,7 @@ brw_compile_task(const struct brw_compiler *compiler,
    prog_data->base.uses_inline_data = brw_nir_uses_inline_data(nir) ||
                                       key->base.uses_inline_push_addr;
 
-   brw_postprocess_nir_opts(nir, compiler, key->base.robust_flags);
+   brw_postprocess_nir_opts(pt, key->base.robust_flags);
 
    brw_simd_selection_state simd_state{
       .devinfo = compiler->devinfo,
@@ -320,7 +329,7 @@ brw_compile_task(const struct brw_compiler *compiler,
       .required_width = brw_required_dispatch_width(&nir->info),
    };
 
-   brw_debug_archive_nir(params->base.archiver, nir, 0, "before-simd");
+   BRW_NIR_SNAPSHOT("before_simd");
 
    unsigned pressure[SIMD_COUNT];
    brw_nir_quick_pressure_estimate(nir, devinfo, pressure);
@@ -341,17 +350,23 @@ brw_compile_task(const struct brw_compiler *compiler,
       const unsigned dispatch_width = 8 << simd;
 
       nir_shader *shader = nir_shader_clone(params->base.mem_ctx, nir);
-      brw_nir_apply_key(shader, compiler, &key->base, dispatch_width);
 
-      brw_debug_archive_nir(params->base.archiver, shader, dispatch_width, "first");
+      pt_ = {
+         .nir = shader,
+         .dispatch_width = dispatch_width,
+         .compiler = compiler,
+         .archiver = params->base.archiver,
+      };
 
-      NIR_PASS(_, shader, brw_nir_lower_simd, dispatch_width);
+      BRW_NIR_SNAPSHOT("first");
+      brw_nir_apply_key(pt, &key->base, dispatch_width);
 
-      brw_nir_optimize(shader, devinfo);
+      BRW_NIR_PASS(brw_nir_lower_simd, dispatch_width);
+
+      brw_nir_optimize(pt);
       /* brw_nir_optimize undoes late lowerings. */
-      NIR_PASS(_, shader, nir_opt_algebraic_late);
-      brw_postprocess_nir_out_of_ssa(shader, dispatch_width,
-                                     params->base.archiver, debug_enabled);
+      BRW_NIR_PASS(nir_opt_algebraic_late);
+      brw_postprocess_nir_out_of_ssa(pt, debug_enabled);
 
       const brw_shader_params shader_params = {
          .compiler                = compiler,
@@ -416,7 +431,7 @@ brw_compile_task(const struct brw_compiler *compiler,
 }
 
 static void
-brw_nir_lower_tue_inputs(nir_shader *nir, const brw_tue_map *map)
+brw_nir_lower_tue_inputs(brw_pass_tracker *pt, const brw_tue_map *map)
 {
    /* See brw_nir_lower_tue_outputs. If a task payload is read by this shader,
     * task_payload_size will be used to start offsets, and that's always
@@ -424,12 +439,11 @@ brw_nir_lower_tue_inputs(nir_shader *nir, const brw_tue_map *map)
     * We can't always use map, as it may not be present if task and mesh
     * shaders are not compiled together. This is possible with shader objects.
     */
+   nir_shader *nir = pt->nir;
    nir->info.task_payload_size = BRW_PER_TASK_DATA_START_DW * 4;
 
-   bool progress = false;
-
-   NIR_PASS(progress, nir, nir_lower_vars_to_explicit_types,
-            nir_var_mem_task_payload, shared_type_info);
+   bool progress = BRW_NIR_PASS(nir_lower_vars_to_explicit_types,
+                                nir_var_mem_task_payload, shared_type_info);
 
    if (progress) {
       /* The types for Task Output and Mesh Input should match, so their sizes
@@ -444,8 +458,8 @@ brw_nir_lower_tue_inputs(nir_shader *nir, const brw_tue_map *map)
       nir->info.task_payload_size = 0;
    }
 
-   NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_task_payload,
-            nir_address_format_32bit_offset);
+   BRW_NIR_PASS(nir_lower_explicit_io, nir_var_mem_task_payload,
+                nir_address_format_32bit_offset);
 }
 
 /* Attribute types. Flat attributes have to be a separate class because
@@ -612,11 +626,11 @@ brw_print_mue_map(FILE *fp, const struct brw_mue_map *map, struct nir_shader *ni
 }
 
 static void
-brw_nir_lower_mue_outputs(nir_shader *nir, const struct brw_mue_map *map)
+brw_nir_lower_mue_outputs(brw_pass_tracker *pt, const struct brw_mue_map *map)
 {
-   NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out,
-            type_size_vec4,
-            nir_lower_io_lower_64bit_to_32);
+   BRW_NIR_PASS(nir_lower_io, nir_var_shader_out,
+                type_size_vec4,
+                nir_lower_io_lower_64bit_to_32);
 }
 
 static bool
@@ -965,7 +979,14 @@ brw_compile_mesh(const struct brw_compiler *compiler,
    struct brw_mesh_prog_data *prog_data = params->prog_data;
    const bool debug_enabled = brw_should_print_shader(nir, DEBUG_MESH, params->base.source_hash);
 
-   brw_debug_archive_nir(params->base.archiver, nir, 0, "first");
+   brw_pass_tracker pt_ = {
+      .nir = nir,
+      .dispatch_width = 0,
+      .compiler = compiler,
+      .archiver = params->base.archiver,
+   }, *pt = &pt_;
+
+   BRW_NIR_SNAPSHOT("first");
 
    brw_prog_data_init(&prog_data->base.base, &params->base);
 
@@ -1000,23 +1021,23 @@ brw_compile_mesh(const struct brw_compiler *compiler,
    prog_data->uses_drawid =
       BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
 
-   brw_nir_lower_tue_inputs(nir, params->tue_map);
+   brw_nir_lower_tue_inputs(pt, params->tue_map);
 
-   NIR_PASS(_, nir, brw_nir_lower_mesh_primitive_count);
-   NIR_PASS(_, nir, nir_opt_dce);
-   NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_out, NULL);
+   BRW_NIR_PASS(brw_nir_lower_mesh_primitive_count);
+   BRW_NIR_PASS(nir_opt_dce);
+   BRW_NIR_PASS(nir_remove_dead_variables, nir_var_shader_out, NULL);
 
    brw_compute_mue_map(compiler, nir, &prog_data->map,
                        prog_data->index_format,
                        key->base.vue_layout,
                        apply_wa_18019110168 ? wa_18019110168_mapping : NULL);
-   brw_nir_lower_mue_outputs(nir, &prog_data->map);
+   brw_nir_lower_mue_outputs(pt, &prog_data->map);
 
    /* When Primitive Header is enabled, we may not generates writes to all
     * fields, so let's initialize everything.
     */
    if (prog_data->map.has_per_primitive_header)
-      NIR_PASS(_, nir, brw_nir_initialize_mue, &prog_data->map);
+      BRW_NIR_PASS(brw_nir_initialize_mue, &prog_data->map);
 
    NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics, compiler->devinfo,
             NULL);
@@ -1026,7 +1047,7 @@ brw_compile_mesh(const struct brw_compiler *compiler,
    prog_data->base.uses_inline_data = brw_nir_uses_inline_data(nir) ||
                                       key->base.uses_inline_push_addr;
 
-   brw_postprocess_nir_opts(nir, compiler, key->base.robust_flags);
+   brw_postprocess_nir_opts(pt, key->base.robust_flags);
 
    const struct brw_lower_urb_cb_data cb_data = {
       .devinfo = devinfo,
@@ -1039,10 +1060,10 @@ brw_compile_mesh(const struct brw_compiler *compiler,
          prog_data->map.per_primitive_indices_stride,
       .per_primitive_byte_offsets = prog_data->map.per_primitive_offsets,
    };
-   NIR_PASS(_, nir, brw_nir_lower_outputs_to_urb_intrinsics, &cb_data);
-   brw_nir_opt_vectorize_urb(nir, devinfo);
+   BRW_NIR_PASS(brw_nir_lower_outputs_to_urb_intrinsics, &cb_data);
+   brw_nir_opt_vectorize_urb(pt);
    struct nir_opt_offsets_options offset_options = {};
-   NIR_PASS(_, nir, nir_opt_offsets, &offset_options);
+   BRW_NIR_PASS(nir_opt_offsets, &offset_options);
 
    brw_simd_selection_state simd_state{
       .devinfo = compiler->devinfo,
@@ -1052,7 +1073,7 @@ brw_compile_mesh(const struct brw_compiler *compiler,
 
    std::unique_ptr<brw_shader> v[3];
 
-   brw_debug_archive_nir(params->base.archiver, nir, 0, "before-simd");
+   BRW_NIR_SNAPSHOT("before_simd");
 
    for (unsigned i = 0; i < 3; i++) {
       const unsigned simd = devinfo->ver >= 30 ? 2 - i : i;
@@ -1064,20 +1085,25 @@ brw_compile_mesh(const struct brw_compiler *compiler,
 
       nir_shader *shader = nir_shader_clone(params->base.mem_ctx, nir);
 
-      brw_debug_archive_nir(params->base.archiver, shader, dispatch_width, "first");
+      pt_ = {
+         .nir = shader,
+         .dispatch_width = dispatch_width,
+         .compiler = compiler,
+         .archiver = params->base.archiver,
+      };
 
-      brw_nir_apply_key(shader, compiler, &key->base, dispatch_width);
+      BRW_NIR_SNAPSHOT("first");
+      brw_nir_apply_key(pt, &key->base, dispatch_width);
 
       /* Load uniforms can do a better job for constants, so fold before it. */
-      NIR_PASS(_, shader, nir_opt_constant_folding);
+      BRW_NIR_PASS(nir_opt_constant_folding);
 
-      NIR_PASS(_, shader, brw_nir_lower_simd, dispatch_width);
+      BRW_NIR_PASS(brw_nir_lower_simd, dispatch_width);
 
-      brw_nir_optimize(shader, devinfo);
+      brw_nir_optimize(pt);
       /* brw_nir_optimize undoes late lowerings. */
-      NIR_PASS(_, shader, nir_opt_algebraic_late);
-      brw_postprocess_nir_out_of_ssa(shader, dispatch_width,
-                                     params->base.archiver, debug_enabled);
+      BRW_NIR_PASS(nir_opt_algebraic_late);
+      brw_postprocess_nir_out_of_ssa(pt, debug_enabled);
 
       const brw_shader_params shader_params = {
          .compiler                = compiler,
diff --git a/src/intel/compiler/brw/brw_compile_tcs.cpp b/src/intel/compiler/brw/brw_compile_tcs.cpp
index fb450065f16..dd62ee40c4d 100644
--- a/src/intel/compiler/brw/brw_compile_tcs.cpp
+++ b/src/intel/compiler/brw/brw_compile_tcs.cpp
@@ -211,7 +211,14 @@ brw_compile_tcs(const struct brw_compiler *compiler,
 
    const bool debug_enabled = brw_should_print_shader(nir, DEBUG_TCS, params->base.source_hash);
 
-   brw_debug_archive_nir(params->base.archiver, nir, dispatch_width, "first");
+   brw_pass_tracker pt_ = {
+      .nir = nir,
+      .dispatch_width = dispatch_width,
+      .compiler = compiler,
+      .archiver = params->base.archiver,
+   }, *pt = &pt_;
+
+   BRW_NIR_SNAPSHOT("first");
 
    brw_prog_data_init(&prog_data->base.base, &params->base);
 
@@ -229,16 +236,16 @@ brw_compile_tcs(const struct brw_compiler *compiler,
                             nir->info.patch_outputs_written,
                             key->separate_tess_vue_layout);
 
-   brw_nir_apply_key(nir, compiler, &key->base, dispatch_width);
+   brw_nir_apply_key(pt, &key->base, dispatch_width);
    brw_nir_lower_tcs_inputs(nir, devinfo, &input_vue_map);
    brw_nir_lower_tcs_outputs(nir, devinfo, &vue_prog_data->vue_map,
                              key->_tes_primitive_mode);
-   brw_nir_opt_vectorize_urb(nir, devinfo);
-   intel_nir_lower_patch_vertices_in(nir, key->input_vertices);
+   BRW_NIR_SNAPSHOT("after_lower_io");
 
-   brw_postprocess_nir(nir, compiler, dispatch_width,
-                       params->base.archiver, debug_enabled,
-                       key->base.robust_flags);
+   brw_nir_opt_vectorize_urb(pt);
+   BRW_NIR_PASS(intel_nir_lower_patch_vertices_in, key->input_vertices);
+
+   brw_postprocess_nir(pt, debug_enabled, key->base.robust_flags);
 
    bool has_primitive_id =
       BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
diff --git a/src/intel/compiler/brw/brw_compile_tes.cpp b/src/intel/compiler/brw/brw_compile_tes.cpp
index 21c52a98f3b..2ed998fb29f 100644
--- a/src/intel/compiler/brw/brw_compile_tes.cpp
+++ b/src/intel/compiler/brw/brw_compile_tes.cpp
@@ -88,7 +88,14 @@ brw_compile_tes(const struct brw_compiler *compiler,
 
    const bool debug_enabled = brw_should_print_shader(nir, DEBUG_TES, params->base.source_hash);
 
-   brw_debug_archive_nir(params->base.archiver, nir, dispatch_width, "first");
+   brw_pass_tracker pt_ = {
+      .nir = nir,
+      .dispatch_width = dispatch_width,
+      .compiler = compiler,
+      .archiver = params->base.archiver,
+   }, *pt = &pt_;
+
+   BRW_NIR_SNAPSHOT("first");
 
    brw_prog_data_init(&prog_data->base.base, &params->base);
 
@@ -104,14 +111,15 @@ brw_compile_tes(const struct brw_compiler *compiler,
                                nir->info.patch_inputs_read,
                                key->separate_tess_vue_layout);
    }
-
-   brw_nir_apply_key(nir, compiler, &key->base, dispatch_width);
+   brw_nir_apply_key(pt, &key->base, dispatch_width);
    brw_nir_lower_tes_inputs(nir, devinfo, &input_vue_map);
    brw_nir_lower_vue_outputs(nir);
-   brw_nir_opt_vectorize_urb(nir, devinfo);
-   NIR_PASS(_, nir, intel_nir_lower_patch_vertices_tes);
-   brw_postprocess_nir(nir, compiler, dispatch_width, params->base.archiver,
-                       debug_enabled, key->base.robust_flags);
+   BRW_NIR_SNAPSHOT("after_lower_io");
+
+   brw_nir_opt_vectorize_urb(pt);
+   BRW_NIR_PASS(intel_nir_lower_patch_vertices_tes);
+
+   brw_postprocess_nir(pt, debug_enabled, key->base.robust_flags);
 
    const uint32_t pos_slots =
       (nir->info.per_view_outputs & VARYING_BIT_POS) ?
diff --git a/src/intel/compiler/brw/brw_compile_vs.cpp b/src/intel/compiler/brw/brw_compile_vs.cpp
index e9bb184a4b5..526d0091ae7 100644
--- a/src/intel/compiler/brw/brw_compile_vs.cpp
+++ b/src/intel/compiler/brw/brw_compile_vs.cpp
@@ -253,7 +253,14 @@ brw_compile_vs(const struct brw_compiler *compiler,
     */
    assert(!key->no_vf_slot_compaction || key->vf_component_packing);
 
-   brw_debug_archive_nir(params->base.archiver, nir, dispatch_width, "first");
+   brw_pass_tracker pt_ = {
+      .nir = nir,
+      .dispatch_width = dispatch_width,
+      .compiler = compiler,
+      .archiver = params->base.archiver,
+   }, *pt = &pt_;
+
+   BRW_NIR_SNAPSHOT("first");
 
    brw_prog_data_init(&prog_data->base.base, &params->base);
 
@@ -271,7 +278,7 @@ brw_compile_vs(const struct brw_compiler *compiler,
                        &prog_data->base.vue_map, nir->info.outputs_written,
                        key->base.vue_layout, pos_slots);
 
-   brw_nir_apply_key(nir, compiler, &key->base, dispatch_width);
+   brw_nir_apply_key(pt, &key->base, dispatch_width);
 
    prog_data->inputs_read = nir->info.inputs_read;
    prog_data->double_inputs_read = nir->info.vs.double_inputs;
@@ -279,6 +286,7 @@ brw_compile_vs(const struct brw_compiler *compiler,
 
    brw_nir_lower_vs_inputs(nir);
    brw_nir_lower_vue_outputs(nir);
+   BRW_NIR_SNAPSHOT("after_lower_io");
 
    memset(prog_data->vf_component_packing, 0,
           sizeof(prog_data->vf_component_packing));
@@ -286,8 +294,7 @@ brw_compile_vs(const struct brw_compiler *compiler,
    if (key->vf_component_packing)
       nr_packed_regs = brw_nir_pack_vs_input(nir, prog_data);
 
-   brw_postprocess_nir(nir, compiler, dispatch_width,
-                       params->base.archiver, debug_enabled,
+   brw_postprocess_nir(pt, debug_enabled,
                        key->base.robust_flags);
 
    unsigned nr_attribute_slots = util_bitcount64(prog_data->inputs_read);
diff --git a/src/intel/compiler/brw/brw_nir.c b/src/intel/compiler/brw/brw_nir.c
index 772b50d6f2c..9c1c02bc4ce 100644
--- a/src/intel/compiler/brw/brw_nir.c
+++ b/src/intel/compiler/brw/brw_nir.c
@@ -816,10 +816,11 @@ lsc_urb_round_up_components(unsigned n)
 }
 
 void
-brw_nir_opt_vectorize_urb(nir_shader *nir,
-                          const struct intel_device_info *devinfo)
+brw_nir_opt_vectorize_urb(brw_pass_tracker *pt)
 {
-   NIR_PASS(_, nir, nir_opt_cse);
+   const struct intel_device_info *devinfo = pt->compiler->devinfo;
+
+   BRW_NIR_PASS(nir_opt_cse);
 
    nir_load_store_vectorize_options options = {
       .modes = nir_var_shader_in | nir_var_shader_out,
@@ -829,7 +830,7 @@ brw_nir_opt_vectorize_urb(nir_shader *nir,
          devinfo->ver >= 20 ? lsc_urb_round_up_components :
                               vec4_urb_round_up_components,
    };
-   NIR_PASS(_, nir, nir_opt_load_store_vectorize, &options);
+   BRW_NIR_PASS(nir_opt_load_store_vectorize, &options);
 }
 
 void
@@ -1423,45 +1424,19 @@ brw_nir_tag_speculative_access(nir_shader *nir)
                                      nir_metadata_all, NULL);
 }
 
-#define OPT(pass, ...) ({                                  \
-   bool this_progress = false;                             \
-   NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
-   if (this_progress)                                      \
-      progress = true;                                     \
-   this_progress;                                          \
-})
-
-#define LOOP_OPT(pass, ...) ({                             \
-   const unsigned long this_line = __LINE__;               \
-   bool this_progress = false;                             \
-   if (opt_line == this_line)                              \
-      break;                                               \
-   NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
-   if (this_progress) {                                    \
-      progress = true;                                     \
-      opt_line = this_line;                                \
-   }                                                       \
-   this_progress;                                          \
-})
-
-#define LOOP_OPT_NOT_IDEMPOTENT(pass, ...) ({              \
-   bool this_progress = false;                             \
-   NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
-   if (this_progress) {                                    \
-      progress = true;                                     \
-      opt_line = 0;                                        \
-   }                                                       \
-   this_progress;                                          \
-})
+#define OPT BRW_NIR_PASS
+#define LOOP_OPT BRW_NIR_LOOP_PASS
+#define LOOP_OPT_NOT_IDEMPOTENT BRW_NIR_LOOP_PASS_NOT_IDEMPOTENT
 
 void
-brw_nir_optimize(nir_shader *nir,
-                 const struct intel_device_info *devinfo)
+brw_nir_optimize(brw_pass_tracker *pt)
 {
-   bool progress;
-   unsigned long opt_line = 0;
+   nir_shader *nir = pt->nir;
+
+   pass_tracker_new_loop(pt);
    do {
-      progress = false;
+      pass_tracker_new_iteration(pt);
+
       /* This pass is causing problems with types used by OpenCL :
        *    https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13955
        *
@@ -1529,7 +1504,7 @@ brw_nir_optimize(nir_shader *nir,
       LOOP_OPT(nir_opt_gcm, false);
       LOOP_OPT(nir_opt_undef);
       LOOP_OPT(nir_lower_pack);
-   } while (progress);
+   } while (pt->progress);
 
    /* Workaround Gfxbench unused local sampler variable which will trigger an
     * assert in the opt_large_constants pass.
@@ -1718,7 +1693,15 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir,
                    const struct brw_nir_compiler_opts *opts)
 {
    const struct intel_device_info *devinfo = compiler->devinfo;
-   UNUSED bool progress; /* Written by OPT */
+
+   /* TODO: This is part of the "pre-processing" before the shader is fed to
+    * brw_compile_* functions, so there's no debug archiver available yet.
+    * In the future runtime/driver will create one for us to use here.
+    */
+   brw_pass_tracker pt_ = {
+      .nir = nir,
+      .compiler = compiler,
+   }, *pt = &pt_;
 
    nir_validate_ssa_dominance(nir, "before brw_preprocess_nir");
 
@@ -1753,7 +1736,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir,
    if (OPT(nir_opt_memcpy))
       OPT(nir_split_var_copies);
 
-   brw_nir_optimize(nir, devinfo);
+   brw_nir_optimize(pt);
 
    if (nir->info.ray_queries) {
       OPT(nir_opt_ray_queries);
@@ -1853,7 +1836,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir,
       OPT(intel_nir_clamp_per_vertex_loads);
 
    /* Get rid of split copies */
-   brw_nir_optimize(nir, devinfo);
+   brw_nir_optimize(pt);
 }
 
 static bool
@@ -1969,8 +1952,6 @@ void
 brw_nir_link_shaders(const struct brw_compiler *compiler,
                      nir_shader *producer, nir_shader *consumer)
 {
-   const struct intel_device_info *devinfo = compiler->devinfo;
-
    if (producer->info.stage == MESA_SHADER_MESH &&
        consumer->info.stage == MESA_SHADER_FRAGMENT) {
       uint64_t fs_inputs = 0, ms_outputs = 0;
@@ -2009,11 +1990,19 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
 
    NIR_PASS(_, producer, nir_lower_io_vars_to_scalar, nir_var_shader_out);
    NIR_PASS(_, consumer, nir_lower_io_vars_to_scalar, nir_var_shader_in);
-   brw_nir_optimize(producer, devinfo);
-   brw_nir_optimize(consumer, devinfo);
+
+   /* TODO: This is part of the "pre-processing" before the shader is fed to
+    * brw_compile_* functions, so there's no debug archiver available yet.
+    * In the future runtime/driver will create one for us to use here.
+    */
+   brw_pass_tracker pt_producer = { .nir = producer, .compiler = compiler };
+   brw_pass_tracker pt_consumer = { .nir = consumer, .compiler = compiler };
+
+   brw_nir_optimize(&pt_producer);
+   brw_nir_optimize(&pt_consumer);
 
    if (nir_link_opt_varyings(producer, consumer))
-      brw_nir_optimize(consumer, devinfo);
+      brw_nir_optimize(&pt_consumer);
 
    NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
    NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
@@ -2031,8 +2020,8 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
       NIR_PASS(_, producer, nir_lower_global_vars_to_local);
       NIR_PASS(_, consumer, nir_lower_global_vars_to_local);
 
-      brw_nir_optimize(producer, devinfo);
-      brw_nir_optimize(consumer, devinfo);
+      brw_nir_optimize(&pt_producer);
+      brw_nir_optimize(&pt_consumer);
 
       if (producer->info.stage == MESA_SHADER_MESH &&
             consumer->info.stage == MESA_SHADER_FRAGMENT) {
@@ -2329,11 +2318,10 @@ brw_nir_ssbo_intel(nir_shader *shader)
 }
 
 static void
-brw_vectorize_lower_mem_access(nir_shader *nir,
-                               const struct brw_compiler *compiler,
+brw_vectorize_lower_mem_access(brw_pass_tracker *pt,
                                enum brw_robustness_flags robust_flags)
 {
-   UNUSED bool progress = false;
+   const struct brw_compiler *compiler = pt->compiler;
 
    nir_load_store_vectorize_options options = {
       .modes = nir_var_mem_ubo | nir_var_mem_ssbo |
@@ -2569,10 +2557,8 @@ flag_fused_eu_disable_instr(nir_builder *b, nir_instr *instr, void *data)
 }
 
 static void
-brw_nir_lower_int64(nir_shader *nir, const struct intel_device_info *devinfo)
+brw_nir_lower_int64(brw_pass_tracker *pt)
 {
-   UNUSED bool progress; /* Written by OPT */
-
    /* Potentially perform this optimization pass twice because it can create
     * additional opportunities for itself.
     */
@@ -2580,7 +2566,7 @@ brw_nir_lower_int64(nir_shader *nir, const struct intel_device_info *devinfo)
       OPT(nir_opt_algebraic_before_lower_int64);
 
    if (OPT(nir_lower_int64))
-      brw_nir_optimize(nir, devinfo);
+      brw_nir_optimize(pt);
 }
 
 /* Prepare the given shader for codegen
@@ -2589,12 +2575,12 @@ brw_nir_lower_int64(nir_shader *nir, const struct intel_device_info *devinfo)
  * backend and is highly backend-specific.
  */
 void
-brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
+brw_postprocess_nir_opts(brw_pass_tracker *pt,
                          enum brw_robustness_flags robust_flags)
 {
+   const struct brw_compiler *compiler = pt->compiler;
    const struct intel_device_info *devinfo = compiler->devinfo;
-
-   UNUSED bool progress; /* Written by OPT */
+   nir_shader *nir = pt->nir;
 
    const nir_lower_tex_options tex_options = {
       .lower_txp = ~0,
@@ -2637,10 +2623,7 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
 
    OPT(nir_opt_combine_barriers, combine_all_memory_barriers, NULL);
 
-   do {
-      progress = false;
-      OPT(nir_opt_algebraic_before_ffma);
-   } while (progress);
+   while (OPT(nir_opt_algebraic_before_ffma)) {}
 
    OPT(nir_opt_idiv_const, 32);
 
@@ -2665,23 +2648,23 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
 
    OPT(brw_nir_tag_speculative_access);
 
-   brw_nir_optimize(nir, devinfo);
+   brw_nir_optimize(pt);
 
    if (nir_shader_has_local_variables(nir)) {
       OPT(nir_lower_vars_to_explicit_types, nir_var_function_temp,
           glsl_get_natural_size_align_bytes);
       OPT(nir_lower_explicit_io, nir_var_function_temp,
           nir_address_format_32bit_offset);
-      brw_nir_optimize(nir, devinfo);
+      brw_nir_optimize(pt);
    }
 
-   brw_vectorize_lower_mem_access(nir, compiler, robust_flags);
+   brw_vectorize_lower_mem_access(pt, robust_flags);
 
    /* Do this after lowering memory access bit-sizes */
    if (nir->info.stage == MESA_SHADER_MESH ||
        nir->info.stage == MESA_SHADER_TASK) {
       OPT(lower_task_payload_to_urb_intrinsics, devinfo);
-      brw_nir_opt_vectorize_urb(nir, devinfo);
+      brw_nir_opt_vectorize_urb(pt);
    }
 
    /* Needs to be prior int64 lower because it generates 64bit address
@@ -2689,7 +2672,7 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
     */
    OPT(intel_nir_lower_printf);
 
-   brw_nir_lower_int64(nir, devinfo);
+   brw_nir_lower_int64(pt);
 
    /* This pass specifically looks for sequences of fmul and fadd that
     * intel_nir_opt_peephole_ffma will try to eliminate. Call this
@@ -2736,18 +2719,17 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
    OPT(brw_nir_opt_fsat);
 
    do {
-      progress = false;
+      pt->progress = false;
 
       OPT(nir_opt_algebraic_late);
 
-      if (progress) {
+      if (pt->progress) {
          OPT(nir_opt_constant_folding);
          OPT(nir_opt_copy_prop);
          OPT(nir_opt_dce);
          OPT(nir_opt_cse);
       }
-   } while (progress);
-
+   } while (pt->progress);
 
    OPT(nir_lower_fp16_casts, nir_lower_fp16_split_fp64);
 
@@ -2791,7 +2773,7 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
        * allows the elimination of some loops over, say, a TXF instruction
        * with a non-uniform texture handle.
        */
-      brw_nir_optimize(nir, devinfo);
+      brw_nir_optimize(pt);
 
       OPT(nir_lower_subgroups, &subgroups_options);
    }
@@ -2801,7 +2783,7 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
     * and peephole_select may generate a 64-bit select.  So do another
     * round at the tail end.
     */
-   brw_nir_lower_int64(nir, devinfo);
+   brw_nir_lower_int64(pt);
 
    /* Deal with EU fusion */
    if (devinfo->ver == 12) {
@@ -2828,12 +2810,10 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
 }
 
 void
-brw_postprocess_nir_out_of_ssa(nir_shader *nir,
-                               unsigned dispatch_width,
-                               debug_archiver *archiver,
+brw_postprocess_nir_out_of_ssa(brw_pass_tracker *pt,
                                bool debug_enabled)
 {
-   UNUSED bool progress; /* Written by OPT */
+   nir_shader *nir = pt->nir;
 
    /* Run fsign lowering again after the last time brw_nir_optimize is called.
     * As is the case with conversion lowering (below), brw_nir_optimize can
@@ -2867,10 +2847,10 @@ brw_postprocess_nir_out_of_ssa(nir_shader *nir,
    /* Rerun the divergence analysis before convert_from_ssa as this pass has
     * some assert on consistent divergence flags.
     */
-   NIR_PASS(_, nir, nir_convert_to_lcssa, true, true);
+   OPT(nir_convert_to_lcssa, true, true);
    nir_divergence_analysis(nir);
 
-   if (unlikely(debug_enabled || archiver)) {
+   if (unlikely(debug_enabled || pt->archiver)) {
       /* Re-index SSA defs so we print more sensible numbers. */
       nir_foreach_function_impl(impl, nir) {
          nir_index_ssa_defs(impl);
@@ -2882,8 +2862,7 @@ brw_postprocess_nir_out_of_ssa(nir_shader *nir,
          nir_print_shader(nir, stderr);
       }
 
-      if (unlikely(archiver))
-         brw_debug_archive_nir(archiver, nir, dispatch_width, "ssa");
+      BRW_NIR_SNAPSHOT("ssa");
    }
 
    OPT(nir_convert_from_ssa, true, true);
@@ -2900,8 +2879,7 @@ brw_postprocess_nir_out_of_ssa(nir_shader *nir,
       nir_print_shader(nir, stderr);
    }
 
-   if (unlikely(archiver))
-      brw_debug_archive_nir(archiver, nir, dispatch_width, "out");
+   BRW_NIR_SNAPSHOT("out");
 }
 
 static unsigned
@@ -2944,12 +2922,13 @@ brw_nir_api_subgroup_size(const nir_shader *nir,
 }
 
 void
-brw_nir_apply_key(nir_shader *nir,
-                  const struct brw_compiler *compiler,
+brw_nir_apply_key(brw_pass_tracker *pt,
                   const struct brw_base_prog_key *key,
                   unsigned max_subgroup_size)
 {
-   bool progress = false;
+   nir_shader *nir = pt->nir;
+
+   pt->progress = false;
 
    const nir_lower_subgroups_options subgroups_options = {
       .subgroup_size = get_subgroup_size(&nir->info, max_subgroup_size),
@@ -2962,9 +2941,8 @@ brw_nir_apply_key(nir_shader *nir,
    if (key->limit_trig_input_range)
       OPT(brw_nir_limit_trig_input_range_workaround);
 
-   if (progress) {
-      brw_nir_optimize(nir, compiler->devinfo);
-   }
+   if (pt->progress)
+      brw_nir_optimize(pt);
 }
 
 enum brw_conditional_mod
diff --git a/src/intel/compiler/brw/brw_nir.h b/src/intel/compiler/brw/brw_nir.h
index 52a140750a0..978ad981e87 100644
--- a/src/intel/compiler/brw/brw_nir.h
+++ b/src/intel/compiler/brw/brw_nir.h
@@ -17,6 +17,7 @@ extern "C" {
 #define BRW_TEX_INSTR_FUSED_EU_DISABLE (1u << 30)
 
 extern const struct nir_shader_compiler_options brw_scalar_nir_options;
+struct brw_pass_tracker;
 
 void
 brw_fill_tess_info_from_shader_info(struct brw_tess_info *brw_info,
@@ -219,8 +220,7 @@ bool brw_nir_lower_inputs_to_urb_intrinsics(nir_shader *, const struct brw_lower
 
 bool brw_nir_lower_outputs_to_urb_intrinsics(nir_shader *, const struct brw_lower_urb_cb_data *);
 
-void brw_nir_opt_vectorize_urb(nir_shader *nir,
-                               const struct intel_device_info *devinfo);
+void brw_nir_opt_vectorize_urb(struct brw_pass_tracker *pt);
 
 void brw_nir_lower_vs_inputs(nir_shader *nir);
 void brw_nir_lower_gs_inputs(nir_shader *nir,
@@ -285,26 +285,19 @@ bool brw_nir_lower_mem_access_bit_sizes(nir_shader *shader,
 
 bool brw_nir_lower_simd(nir_shader *nir, unsigned dispatch_width);
 
-void brw_postprocess_nir_opts(nir_shader *nir,
-                              const struct brw_compiler *compiler,
+void brw_postprocess_nir_opts(struct brw_pass_tracker *pt,
                               enum brw_robustness_flags robust_flags);
 
-void brw_postprocess_nir_out_of_ssa(nir_shader *nir,
-                                    unsigned dispatch_width,
-                                    debug_archiver *archiver,
+void brw_postprocess_nir_out_of_ssa(struct brw_pass_tracker *pt,
                                     bool debug_enabled);
 
 static inline void
-brw_postprocess_nir(nir_shader *nir,
-                    const struct brw_compiler *compiler,
-                    unsigned dispatch_width,
-                    debug_archiver *archiver,
+brw_postprocess_nir(struct brw_pass_tracker *pt,
                     bool debug_enabled,
                     enum brw_robustness_flags robust_flags)
 {
-   brw_postprocess_nir_opts(nir, compiler, robust_flags);
-   brw_postprocess_nir_out_of_ssa(nir, dispatch_width, archiver,
-                                  debug_enabled);
+   brw_postprocess_nir_opts(pt, robust_flags);
+   brw_postprocess_nir_out_of_ssa(pt, debug_enabled);
 }
 
 bool brw_nir_apply_attribute_workarounds(nir_shader *nir,
@@ -320,8 +313,7 @@ bool brw_nir_lower_fsign(nir_shader *nir);
 
 bool brw_nir_opt_fsat(nir_shader *);
 
-void brw_nir_apply_key(nir_shader *nir,
-                       const struct brw_compiler *compiler,
+void brw_nir_apply_key(struct brw_pass_tracker *pt,
                        const struct brw_base_prog_key *key,
                        unsigned max_subgroup_size);
 
@@ -349,8 +341,7 @@ void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler,
 bool brw_nir_lower_ubo_ranges(nir_shader *nir,
                               struct brw_ubo_range out_ranges[4]);
 
-void brw_nir_optimize(nir_shader *nir,
-                      const struct intel_device_info *devinfo);
+void brw_nir_optimize(struct brw_pass_tracker *pt);
 
 nir_shader *brw_nir_create_passthrough_tcs(void *mem_ctx,
                                            const struct brw_compiler *compiler,
diff --git a/src/intel/compiler/brw/brw_nir_rt.c b/src/intel/compiler/brw/brw_nir_rt.c
index 4435d57a1e7..184657d5434 100644
--- a/src/intel/compiler/brw/brw_nir_rt.c
+++ b/src/intel/compiler/brw/brw_nir_rt.c
@@ -7,6 +7,7 @@
 #include "brw_nir_rt.h"
 #include "brw_nir_rt_builder.h"
 #include "intel_nir.h"
+#include "brw_private.h"
 
 static bool
 resize_deref(nir_builder *b, nir_deref_instr *deref,
@@ -499,7 +500,12 @@ brw_nir_create_raygen_trampoline(const struct brw_compiler *compiler,
 
    NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics, devinfo, NULL);
 
-   brw_nir_optimize(nir, devinfo);
+   brw_pass_tracker pt = {
+      .nir = nir,
+      .compiler = compiler,
+   };
+
+   brw_nir_optimize(&pt);
 
    return nir;
 }
diff --git a/src/intel/compiler/brw/brw_private.h b/src/intel/compiler/brw/brw_private.h
index 62f628ef856..398a5233130 100644
--- a/src/intel/compiler/brw/brw_private.h
+++ b/src/intel/compiler/brw/brw_private.h
@@ -19,15 +19,84 @@ void brw_alloc_reg_sets(struct brw_compiler *compiler);
 extern const char *const conditional_modifier[16];
 extern const char *const pred_ctrl_align16[16];
 
+typedef struct brw_pass_tracker {
+   nir_shader *nir;
+   unsigned dispatch_width;
+
+   const struct brw_compiler *compiler;
+
+   bool progress;
+
+   /* Filled with the last line that made progress.
+    * Used to perform early break in loops.
+    * See BRW_NIR_LOOP_PASS macros below.
+    */
+   unsigned long opt_line;
+
+   /* Tracking information for the debug archiver. */
+   unsigned pass_num;
+   debug_archiver *archiver;
+} brw_pass_tracker;
+
 #ifndef NDEBUG
-void brw_debug_archive_nir(debug_archiver *archiver, nir_shader *nir,
-                           unsigned dispatch_width, const char *step);
+void
+brw_pass_tracker_archive(brw_pass_tracker *pt, const char *pass_name);
 #else
 static inline void
-brw_debug_archive_nir(debug_archiver *archiver, nir_shader *nir,
-                      unsigned dispatch_width, const char *step) {}
+brw_pass_tracker_archive(brw_pass_tracker *pt, const char *pass_name)
+{}
 #endif
 
+/* To be used in conjunction to BRW_NIR_LOOP_* macros. */
+static inline void
+pass_tracker_new_loop(brw_pass_tracker *pt)
+{
+   pt->opt_line = 0;
+}
+
+/* To be used in conjunction to BRW_NIR_LOOP_* macros. */
+static inline void
+pass_tracker_new_iteration(brw_pass_tracker *pt)
+{
+   pt->progress = false;
+}
+
+#define BRW_NIR_SNAPSHOT(name) do {                        \
+   pt->pass_num++;                                         \
+   brw_pass_tracker_archive(pt, name);                     \
+} while (false);
+
+#define BRW_NIR_PASS(pass, ...) ({                         \
+   pt->pass_num++;                                         \
+   bool this_progress = false;                             \
+   NIR_PASS(this_progress, pt->nir, pass, ##__VA_ARGS__);  \
+   if (this_progress) {                                    \
+      pt->progress = true;                                 \
+      if (unlikely(pt->archiver))                          \
+         brw_pass_tracker_archive(pt, #pass);              \
+   }                                                       \
+   this_progress;                                          \
+})
+
+#define BRW_NIR_LOOP_PASS(pass, ...) ({                    \
+   const unsigned long this_line = __LINE__;               \
+   if (pt->opt_line == this_line) {                        \
+      pt->pass_num++;                                      \
+      break;                                               \
+   }                                                       \
+   bool this_progress = BRW_NIR_PASS(pass, ##__VA_ARGS__); \
+   if (this_progress)                                      \
+      pt->opt_line = this_line;                            \
+   this_progress;                                          \
+})
+
+#define BRW_NIR_LOOP_PASS_NOT_IDEMPOTENT(pass, ...) ({     \
+   bool this_progress = BRW_NIR_PASS(pass, ##__VA_ARGS__); \
+   if (this_progress)                                      \
+      pt->opt_line = 0;                                    \
+   this_progress;                                          \
+})
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/intel/compiler/brw/brw_shader.cpp b/src/intel/compiler/brw/brw_shader.cpp
index 265357ab81d..e541a1c0585 100644
--- a/src/intel/compiler/brw/brw_shader.cpp
+++ b/src/intel/compiler/brw/brw_shader.cpp
@@ -1317,21 +1317,18 @@ brw_allocate_registers(brw_shader &s, bool allow_spilling)
 
 #ifndef NDEBUG
 void
-brw_debug_archive_nir(debug_archiver *archiver, nir_shader *nir,
-                      unsigned dispatch_width, const char *step)
+brw_pass_tracker_archive(brw_pass_tracker *pt, const char *pass_name)
 {
-   if (!archiver)
+   if (!pt->archiver)
       return;
 
-   const bool prefix_dispatch_width =
-      dispatch_width > 0 && mesa_shader_stage_uses_workgroup(nir->info.stage);
-   const char *filename = prefix_dispatch_width ?
-         ralloc_asprintf(archiver, "NIR%d/%s", dispatch_width, step) :
-         ralloc_asprintf(archiver, "NIR/%s", step);
+   const char *filename =
+      ralloc_asprintf(pt->archiver, "NIR%d/%03d-%s",
+                      pt->dispatch_width, pt->pass_num, pass_name);
 
-   FILE *f = debug_archiver_start_file(archiver, filename);
-   nir_print_shader(nir, f);
-   debug_archiver_finish_file(archiver);
+   FILE *f = debug_archiver_start_file(pt->archiver, filename);
+   nir_print_shader(pt->nir, f);
+   debug_archiver_finish_file(pt->archiver);
 }
 #endif