From 79d02047b88d59ea6cfea1688b656d88796ed32d Mon Sep 17 00:00:00 2001
From: Georg Lehmann <dadschoorse@gmail.com>
Date: Tue, 9 Sep 2025 18:24:08 +0200
Subject: [PATCH] intel: switch to new subgroup size info
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Iván Briano <ivan.briano@intel.com>
Acked-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37258>
---
 src/intel/blorp/blorp_brw.c                   |  7 ++--
 src/intel/compiler/brw_compile_fs.cpp         | 11 +++---
 src/intel/compiler/brw_nir.c                  | 32 ++++-------------
 src/intel/compiler/brw_simd_selection.cpp     |  7 ++--
 src/intel/compiler/elk/elk_nir.c              | 34 ++++---------------
 src/intel/compiler/elk/elk_simd_selection.cpp |  8 ++---
 src/intel/vulkan/anv_shader_compile.c         | 26 +++++---------
 src/intel/vulkan/anv_util.c                   |  8 ++---
 src/intel/vulkan_hasvk/anv_pipeline.c         | 19 +++--------
 9 files changed, 44 insertions(+), 108 deletions(-)

diff --git a/src/intel/blorp/blorp_brw.c b/src/intel/blorp/blorp_brw.c
index 94b1a009b16..653fcd47b40 100644
--- a/src/intel/blorp/blorp_brw.c
+++ b/src/intel/blorp/blorp_brw.c
@@ -36,8 +36,11 @@ blorp_compile_fs_brw(struct blorp_context *blorp, void *mem_ctx,
    brw_preprocess_nir(compiler, nir, &opts);
    nir_remove_dead_variables(nir, nir_var_shader_in, NULL);
    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
-   if (is_fast_clear || use_repclear)
-      nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_16;
+   if (is_fast_clear || use_repclear) {
+      nir->info.api_subgroup_size = 16;
+      nir->info.max_subgroup_size = 16;
+      nir->info.min_subgroup_size = 16;
+   }
 
    struct brw_wm_prog_key wm_key;
    memset(&wm_key, 0, sizeof(wm_key));
diff --git a/src/intel/compiler/brw_compile_fs.cpp b/src/intel/compiler/brw_compile_fs.cpp
index c184225f546..2299899b5e8 100644
--- a/src/intel/compiler/brw_compile_fs.cpp
+++ b/src/intel/compiler/brw_compile_fs.cpp
@@ -1576,8 +1576,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
     * data clear shaders.
     */
    const unsigned reqd_dispatch_width = brw_required_dispatch_width(&nir->info);
-   assert(reqd_dispatch_width == SUBGROUP_SIZE_VARYING ||
-          reqd_dispatch_width == SUBGROUP_SIZE_REQUIRE_16);
+   assert(reqd_dispatch_width == 0 || reqd_dispatch_width == 16);
 
    /* Limit identified when first variant is compiled, see
     * brw_shader::limit_dispatch_width().
@@ -1750,7 +1749,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
 
    } else {
       if ((!has_spilled && dispatch_width_limit >= 16 && INTEL_SIMD(FS, 16)) ||
-          reqd_dispatch_width == SUBGROUP_SIZE_REQUIRE_16) {
+          reqd_dispatch_width == 16) {
          /* Try a SIMD16 compile */
          brw_shader_params shader_params = base_shader_params;
          shader_params.dispatch_width = 16;
@@ -1783,7 +1782,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
       /* Currently, the compiler only supports SIMD32 on SNB+ */
       if (!has_spilled &&
           dispatch_width_limit >= 32 &&
-          reqd_dispatch_width == SUBGROUP_SIZE_VARYING &&
+          reqd_dispatch_width == 0 &&
           !simd16_failed && INTEL_SIMD(FS, 32) &&
           !prog_data->base.ray_queries) {
          /* Try a SIMD32 compile */
@@ -1818,7 +1817,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
 
       if (devinfo->ver >= 12 && !has_spilled &&
           max_polygons >= 2 && !key->coarse_pixel &&
-          reqd_dispatch_width == SUBGROUP_SIZE_VARYING) {
+          reqd_dispatch_width == 0) {
 
          if (devinfo->ver >= 20 && max_polygons >= 4 &&
              dispatch_width_limit >= 32 &&
@@ -1890,7 +1889,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
    /* When the caller compiles a repclear or fast clear shader, they
     * want SIMD16-only.
     */
-   if (reqd_dispatch_width == SUBGROUP_SIZE_REQUIRE_16)
+   if (reqd_dispatch_width == 16)
       v8.reset();
 
    brw_generator g(compiler, &params->base, &prog_data->base,
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 9d495ffa3ca..0cd4e8dae88 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -2426,12 +2426,11 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
 static unsigned
 get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size)
 {
-   switch (info->subgroup_size) {
-   case SUBGROUP_SIZE_API_CONSTANT:
-      /* We have to use the global constant size. */
-      return BRW_SUBGROUP_SIZE;
-
-   case SUBGROUP_SIZE_UNIFORM:
+   if (info->api_subgroup_size) {
+      /* We have to use the global/required constant size. */
+      assert(info->api_subgroup_size >= 8 && info->api_subgroup_size <= 32);
+      return info->api_subgroup_size;
+   } else if (info->api_subgroup_size_draw_uniform) {
       /* It has to be uniform across all invocations but can vary per stage
        * if we want.  This gives us a bit more freedom.
        *
@@ -2441,8 +2440,7 @@ get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size)
        * to be uniform across invocations.
        */
       return max_subgroup_size;
-
-   case SUBGROUP_SIZE_VARYING:
+   } else {
       /* The subgroup size is allowed to be fully varying.  For geometry
        * stages, we know it's always 8 which is max_subgroup_size so we can
        * return that.  For compute, brw_nir_apply_key is called once per
@@ -2454,25 +2452,7 @@ get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size)
        * size.
        */
       return info->stage == MESA_SHADER_FRAGMENT ? 0 : max_subgroup_size;
-
-   case SUBGROUP_SIZE_REQUIRE_4:
-      UNREACHABLE("Unsupported subgroup size type");
-
-   case SUBGROUP_SIZE_REQUIRE_8:
-   case SUBGROUP_SIZE_REQUIRE_16:
-   case SUBGROUP_SIZE_REQUIRE_32:
-      /* These enum values are expressly chosen to be equal to the subgroup
-       * size that they require.
-       */
-      return info->subgroup_size;
-
-   case SUBGROUP_SIZE_FULL_SUBGROUPS:
-   case SUBGROUP_SIZE_REQUIRE_64:
-   case SUBGROUP_SIZE_REQUIRE_128:
-      break;
    }
-
-   UNREACHABLE("Invalid subgroup size type");
 }
 
 unsigned
diff --git a/src/intel/compiler/brw_simd_selection.cpp b/src/intel/compiler/brw_simd_selection.cpp
index 7498353a51a..e9ae4d3ae95 100644
--- a/src/intel/compiler/brw_simd_selection.cpp
+++ b/src/intel/compiler/brw_simd_selection.cpp
@@ -30,11 +30,8 @@
 unsigned
 brw_required_dispatch_width(const struct shader_info *info)
 {
-   if ((int)info->subgroup_size >= (int)SUBGROUP_SIZE_REQUIRE_8) {
-      /* These enum values are expressly chosen to be equal to the subgroup
-       * size that they require.
-       */
-      return (unsigned)info->subgroup_size;
+   if (info->min_subgroup_size == info->max_subgroup_size) {
+      return info->max_subgroup_size;
    } else {
       return 0;
    }
diff --git a/src/intel/compiler/elk/elk_nir.c b/src/intel/compiler/elk/elk_nir.c
index 4eff336b3b1..a94fe9d90d9 100644
--- a/src/intel/compiler/elk/elk_nir.c
+++ b/src/intel/compiler/elk/elk_nir.c
@@ -1683,12 +1683,11 @@ elk_nir_apply_sampler_key(nir_shader *nir,
 static unsigned
 get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size)
 {
-   switch (info->subgroup_size) {
-   case SUBGROUP_SIZE_API_CONSTANT:
-      /* We have to use the global constant size. */
-      return ELK_SUBGROUP_SIZE;
-
-   case SUBGROUP_SIZE_UNIFORM:
+   if (info->api_subgroup_size) {
+      /* We have to use the global/required constant size. */
+      assert(info->api_subgroup_size >= 8 && info->api_subgroup_size <= 32);
+      return info->api_subgroup_size;
+   } else if (info->api_subgroup_size_draw_uniform) {
       /* It has to be uniform across all invocations but can vary per stage
        * if we want.  This gives us a bit more freedom.
        *
@@ -1698,8 +1697,7 @@ get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size)
        * to be uniform across invocations.
        */
       return max_subgroup_size;
-
-   case SUBGROUP_SIZE_VARYING:
+   } else {
       /* The subgroup size is allowed to be fully varying.  For geometry
        * stages, we know it's always 8 which is max_subgroup_size so we can
        * return that.  For compute, elk_nir_apply_key is called once per
@@ -1711,27 +1709,7 @@ get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size)
        * size.
        */
       return info->stage == MESA_SHADER_FRAGMENT ? 0 : max_subgroup_size;
-
-   case SUBGROUP_SIZE_REQUIRE_4:
-      UNREACHABLE("Unsupported subgroup size type");
-
-   case SUBGROUP_SIZE_REQUIRE_8:
-   case SUBGROUP_SIZE_REQUIRE_16:
-   case SUBGROUP_SIZE_REQUIRE_32:
-      assert(mesa_shader_stage_uses_workgroup(info->stage) ||
-             (info->stage >= MESA_SHADER_RAYGEN && info->stage <= MESA_SHADER_CALLABLE));
-      /* These enum values are expressly chosen to be equal to the subgroup
-       * size that they require.
-       */
-      return info->subgroup_size;
-
-   case SUBGROUP_SIZE_FULL_SUBGROUPS:
-   case SUBGROUP_SIZE_REQUIRE_64:
-   case SUBGROUP_SIZE_REQUIRE_128:
-      break;
    }
-
-   UNREACHABLE("Invalid subgroup size type");
 }
 
 unsigned
diff --git a/src/intel/compiler/elk/elk_simd_selection.cpp b/src/intel/compiler/elk/elk_simd_selection.cpp
index a418c48b7f8..f737171a8e0 100644
--- a/src/intel/compiler/elk/elk_simd_selection.cpp
+++ b/src/intel/compiler/elk/elk_simd_selection.cpp
@@ -30,12 +30,8 @@
 unsigned
 elk_required_dispatch_width(const struct shader_info *info)
 {
-   if ((int)info->subgroup_size >= (int)SUBGROUP_SIZE_REQUIRE_8) {
-      assert(mesa_shader_stage_uses_workgroup(info->stage));
-      /* These enum values are expressly chosen to be equal to the subgroup
-       * size that they require.
-       */
-      return (unsigned)info->subgroup_size;
+   if (info->min_subgroup_size == info->max_subgroup_size) {
+      return info->max_subgroup_size;
    } else {
       return 0;
    }
diff --git a/src/intel/vulkan/anv_shader_compile.c b/src/intel/vulkan/anv_shader_compile.c
index b4c44b13055..e4b583a688f 100644
--- a/src/intel/vulkan/anv_shader_compile.c
+++ b/src/intel/vulkan/anv_shader_compile.c
@@ -652,29 +652,21 @@ anv_fixup_subgroup_size(struct anv_instance *instance, struct shader_info *info)
     */
    if (instance->assume_full_subgroups &&
        info->uses_wide_subgroup_intrinsics &&
-       info->subgroup_size == SUBGROUP_SIZE_API_CONSTANT &&
+       info->api_subgroup_size == BRW_SUBGROUP_SIZE &&
        local_size &&
-       local_size % BRW_SUBGROUP_SIZE == 0)
-      info->subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS;
-
-   /* If the client requests that we dispatch full subgroups but doesn't
-    * allow us to pick a subgroup size, we have to smash it to the API
-    * value of 32.  Performance will likely be terrible in this case but
-    * there's nothing we can do about that.  The client should have chosen
-    * a size.
-    */
-   if (info->subgroup_size == SUBGROUP_SIZE_FULL_SUBGROUPS)
-      info->subgroup_size =
-         instance->assume_full_subgroups != 0 ?
-         instance->assume_full_subgroups : BRW_SUBGROUP_SIZE;
+       local_size % BRW_SUBGROUP_SIZE == 0) {
+      info->max_subgroup_size = BRW_SUBGROUP_SIZE;
+      info->min_subgroup_size = BRW_SUBGROUP_SIZE;
+   }
 
    /* Cooperative matrix extension requires that all invocations in a subgroup
     * be active. As a result, when the application does not request a specific
     * subgroup size, we must use SIMD32.
     */
    if (info->stage == MESA_SHADER_COMPUTE && info->cs.has_cooperative_matrix &&
-       info->subgroup_size < SUBGROUP_SIZE_REQUIRE_8) {
-      info->subgroup_size = BRW_SUBGROUP_SIZE;
+       info->max_subgroup_size > info->min_subgroup_size) {
+      info->api_subgroup_size = info->max_subgroup_size;
+      info->min_subgroup_size = info->max_subgroup_size;
    }
 }
 
@@ -1244,7 +1236,7 @@ anv_shader_lower_nir(struct anv_device *device,
    if (nir->info.stage == MESA_SHADER_COMPUTE &&
        nir->info.cs.has_cooperative_matrix) {
       anv_fixup_subgroup_size(pdevice->instance, &nir->info);
-      NIR_PASS(_, nir, brw_nir_lower_cmat, nir->info.subgroup_size);
+      NIR_PASS(_, nir, brw_nir_lower_cmat, nir->info.api_subgroup_size);
       NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_function_temp, 16);
    }
 
diff --git a/src/intel/vulkan/anv_util.c b/src/intel/vulkan/anv_util.c
index efe0d8909d3..7b85aff1951 100644
--- a/src/intel/vulkan/anv_util.c
+++ b/src/intel/vulkan/anv_util.c
@@ -360,10 +360,10 @@ anv_device_init_rt_shaders(struct anv_device *device)
       nir_shader *trampoline_nir =
          brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx);
 
-      if (device->info->ver >= 20)
-         trampoline_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_16;
-      else
-         trampoline_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_8;
+      unsigned require_size = device->info->ver >= 20 ? 16 : 8;
+      trampoline_nir->info.api_subgroup_size = require_size;
+      trampoline_nir->info.max_subgroup_size = require_size;
+      trampoline_nir->info.min_subgroup_size = require_size;
 
       struct brw_cs_prog_data trampoline_prog_data = {
          .uses_btd_stack_ids = true,
diff --git a/src/intel/vulkan_hasvk/anv_pipeline.c b/src/intel/vulkan_hasvk/anv_pipeline.c
index aa3863962dd..50a2dc24486 100644
--- a/src/intel/vulkan_hasvk/anv_pipeline.c
+++ b/src/intel/vulkan_hasvk/anv_pipeline.c
@@ -1516,21 +1516,12 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
        */
       if (device->physical->instance->assume_full_subgroups &&
           stage.nir->info.uses_wide_subgroup_intrinsics &&
-          stage.nir->info.subgroup_size == SUBGROUP_SIZE_API_CONSTANT &&
+          stage.nir->info.api_subgroup_size == ELK_SUBGROUP_SIZE &&
           local_size &&
-          local_size % ELK_SUBGROUP_SIZE == 0)
-         stage.nir->info.subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS;
-
-      /* If the client requests that we dispatch full subgroups but doesn't
-       * allow us to pick a subgroup size, we have to smash it to the API
-       * value of 32.  Performance will likely be terrible in this case but
-       * there's nothing we can do about that.  The client should have chosen
-       * a size.
-       */
-      if (stage.nir->info.subgroup_size == SUBGROUP_SIZE_FULL_SUBGROUPS)
-         stage.nir->info.subgroup_size =
-            device->physical->instance->assume_full_subgroups != 0 ?
-            device->physical->instance->assume_full_subgroups : ELK_SUBGROUP_SIZE;
+          local_size % ELK_SUBGROUP_SIZE == 0) {
+         stage.nir->info.max_subgroup_size = ELK_SUBGROUP_SIZE;
+         stage.nir->info.min_subgroup_size = ELK_SUBGROUP_SIZE;
+      }
 
       stage.num_stats = 1;