brw: Have brw_nir_apply_key call brw_nir_lower_simd for all stages

brw_nir_apply_key typically knows the dispatch width (it's fixed for geometry stages, and we clone the NIR for compute and mesh shaders). For compute/mesh, this was the very next thing called. For the others, if we know the width, there's no reason not to lower it. Scratch lowering will start using load_simd_width_intel soon, so we need it to work in all stages. Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40843>
2026-05-06 20:18:12 +02:00 · 2026-04-08 01:47:32 -07:00 · 2026-04-08 01:47:32 -07:00 · e5598166b0
commit e5598166b0
parent 765d74eebe
4 changed files with 10 additions and 13 deletions
--- a/src/intel/compiler/brw/brw_compile_cs.cpp
+++ b/src/intel/compiler/brw/brw_compile_cs.cpp
@ -190,8 +190,6 @@ brw_compile_cs(const struct brw_compiler *compiler,
      BRW_NIR_SNAPSHOT("first");
      brw_nir_apply_key(pt, &key->base, dispatch_width);

-      BRW_NIR_PASS(brw_nir_lower_simd, dispatch_width);
-
      brw_nir_optimize(pt);
      /* brw_nir_optimize undoes late lowerings. */
      BRW_NIR_PASS(nir_opt_algebraic_late);
--- a/src/intel/compiler/brw/brw_compile_mesh.cpp
+++ b/src/intel/compiler/brw/brw_compile_mesh.cpp
@ -362,8 +362,6 @@ brw_compile_task(const struct brw_compiler *compiler,
      BRW_NIR_SNAPSHOT("first");
      brw_nir_apply_key(pt, &key->base, dispatch_width);

-      BRW_NIR_PASS(brw_nir_lower_simd, dispatch_width);
-
      brw_nir_optimize(pt);
      /* brw_nir_optimize undoes late lowerings. */
      BRW_NIR_PASS(nir_opt_algebraic_late);
@ -1100,8 +1098,6 @@ brw_compile_mesh(const struct brw_compiler *compiler,
      /* Load uniforms can do a better job for constants, so fold before it. */
      BRW_NIR_PASS(nir_opt_constant_folding);

-      BRW_NIR_PASS(brw_nir_lower_simd, dispatch_width);
-
      brw_nir_optimize(pt);
      /* brw_nir_optimize undoes late lowerings. */
      BRW_NIR_PASS(nir_opt_algebraic_late);
--- a/src/intel/compiler/brw/brw_nir.c
+++ b/src/intel/compiler/brw/brw_nir.c
@ -3038,8 +3038,6 @@ brw_nir_apply_key(brw_pass_tracker *pt,

   pt->progress = false;

-   unsigned subgroup_size = get_subgroup_size(&nir->info, max_subgroup_size);
-
   /* VS/TCS/TES/GS always run at a fixed SIMD width, which is what our
    * max_subgroup_size parameter represents.  Compute/Mesh can run at
    * different sizes, but we clone the NIR for each SIMD width, and pass
@ -3052,6 +3050,8 @@ brw_nir_apply_key(brw_pass_tracker *pt,
   if (nir->info.stage != MESA_SHADER_FRAGMENT) {
      nir->info.min_subgroup_size = max_subgroup_size;
      nir->info.max_subgroup_size = max_subgroup_size;
+
+      OPT(brw_nir_lower_simd);
   }

   const nir_lower_subgroups_options subgroups_options = {
@ -3442,13 +3442,16 @@ filter_simd(const nir_instr *instr, UNUSED const void *options)
 static nir_def *
 lower_simd(nir_builder *b, nir_instr *instr, void *options)
 {
-   uintptr_t simd_width = (uintptr_t)options;
+   unsigned simd_width = b->shader->info.max_subgroup_size;
+   assert(b->shader->info.min_subgroup_size == simd_width);

   switch (nir_instr_as_intrinsic(instr)->intrinsic) {
   case nir_intrinsic_load_simd_width_intel:
      return nir_imm_int(b, simd_width);

   case nir_intrinsic_load_subgroup_id:
+      assert(mesa_shader_stage_uses_workgroup(b->shader->info.stage));
+
      /* If the whole workgroup fits in one thread, we can lower subgroup_id
       * to a constant zero.
       */
@ -3464,10 +3467,10 @@ lower_simd(nir_builder *b, nir_instr *instr, void *options)
 }

 bool
-brw_nir_lower_simd(nir_shader *nir, unsigned dispatch_width)
+brw_nir_lower_simd(nir_shader *nir)
 {
-   return nir_shader_lower_instructions(nir, filter_simd, lower_simd,
-                                 (void *)(uintptr_t)dispatch_width);
+   return nir->info.min_subgroup_size == nir->info.max_subgroup_size &&
+          nir_shader_lower_instructions(nir, filter_simd, lower_simd, NULL);
 }

 nir_variable *
--- a/src/intel/compiler/brw/brw_nir.h
+++ b/src/intel/compiler/brw/brw_nir.h
@ -275,7 +275,7 @@ bool brw_nir_lower_mem_access_bit_sizes(nir_shader *shader,
                                        const struct
                                        intel_device_info *devinfo);

-bool brw_nir_lower_simd(nir_shader *nir, unsigned dispatch_width);
+bool brw_nir_lower_simd(nir_shader *nir);

 void brw_postprocess_nir_opts(struct brw_pass_tracker *pt);