pan/va: move allow_merging_workgroups decision to drivers

In panvk, we will need to decide whether we are merging workgroups early in shader compilation, before calling nir_lower_non_uniform_access. This is because nonuniform lowering introduces new subgroup intrinsics which would otherwise inhibit workgroup merging, and because the set of instructions that need to be lowered may be different with merged workgroups. Signed-off-by: Olivia Lee <olivia.lee@collabora.com> Reviewed-by: Eric R. Smith <eric.smith@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38586>
2026-05-07 07:08:04 +02:00 · 2025-12-09 17:33:15 -08:00 · 2025-12-09 17:33:15 -08:00 · a0f6c6d84d
commit a0f6c6d84d
parent 1f75299ebb
5 changed files with 20 additions and 5 deletions
--- a/src/gallium/drivers/panfrost/pan_shader.c
+++ b/src/gallium/drivers/panfrost/pan_shader.c
@ -18,6 +18,7 @@
 #include "pan_compiler.h"
 #include "pan_nir.h"
 #include "pan_trace.h"
+#include "compiler/bifrost/bifrost_compile.h"
 #include "shader_enums.h"

 static struct panfrost_uncompiled_shader *
@ -179,6 +180,10 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
               key->vs.noperspective_varyings);
   }

+   if (dev->arch >= 9 && mesa_shader_stage_is_compute(s->info.stage)) {
+      out->info.cs.allow_merging_workgroups = valhall_can_merge_workgroups(s);
+   }
+
   NIR_PASS(_, s, panfrost_nir_lower_sysvals, dev->arch, &out->sysvals);

   /* For now, we only allow pushing the default UBO 0, and the sysval UBO (if
--- a/src/panfrost/clc/pan_compile.c
+++ b/src/panfrost/clc/pan_compile.c
@ -439,6 +439,11 @@ main(int argc, const char **argv)
         struct util_dynarray shader_binary;
         struct pan_shader_info shader_info = {0};
         shader_binary = UTIL_DYNARRAY_INIT;
+
+         if (target_arch >= 9)
+            shader_info.cs.allow_merging_workgroups =
+               valhall_can_merge_workgroups(s);
+
         pan_shader_compile(clone, &inputs, &shader_binary, &shader_info);

         assert(shader_info.push.count * 4 <=
--- a/src/panfrost/compiler/bifrost/bifrost_compile.h
+++ b/src/panfrost/compiler/bifrost/bifrost_compile.h
@ -73,6 +73,8 @@ void bifrost_compile_shader_nir(nir_shader *nir,
                                struct util_dynarray *binary,
                                struct pan_shader_info *info);

+bool valhall_can_merge_workgroups(nir_shader *nir);
+
 #define VALHAL_EX_FIFO_VARYING_BITS \
   (VARYING_BIT_PSIZ | VARYING_BIT_LAYER | VARYING_BIT_PRIMITIVE_ID)

--- a/src/panfrost/compiler/bifrost/bifrost_nir.c
+++ b/src/panfrost/compiler/bifrost/bifrost_nir.c
@ -579,8 +579,8 @@ bi_lower_subgroups(nir_builder *b, nir_intrinsic_instr *intr, void *data)
 * visible. This is true if neither shared memory nor BARRIER instructions are
 * used. The hardware may be able to optimize compute shaders that set this
 * flag. */
-static bool
-bi_can_merge_workgroups(nir_shader *nir)
+bool
+valhall_can_merge_workgroups(nir_shader *nir)
 {
   if (nir->info.shared_size != 0)
      return false;
@ -1152,8 +1152,5 @@ bifrost_compile_shader_nir(nir_shader *nir,
      bi_compile_variant(nir, inputs, binary, info, BI_IDVS_NONE);
   }

-   if (mesa_shader_stage_is_compute(nir->info.stage))
-      info->cs.allow_merging_workgroups = bi_can_merge_workgroups(nir);
-
   info->ubo_mask &= (1 << nir->info.num_ubos) - 1;
 }
--- a/src/panfrost/vulkan/panvk_vX_shader.c
+++ b/src/panfrost/vulkan/panvk_vX_shader.c
@ -38,6 +38,7 @@
 #include "vk_ycbcr_conversion.h"

 #include "compiler/bifrost/bifrost_nir.h"
+#include "compiler/bifrost/bifrost_compile.h"
 #include "compiler/pan_compiler.h"
 #include "compiler/pan_nir.h"
 #include "pan_shader.h"
@ -1431,6 +1432,11 @@ panvk_compile_shader(struct panvk_device *dev,

      nir_shader *nir = info->nir;

+#if PAN_ARCH >= 9
+      variant->info.cs.allow_merging_workgroups =
+         valhall_can_merge_workgroups(nir);
+#endif
+
      panvk_lower_nir(dev, nir, info->set_layout_count, info->set_layouts,
                      info->robustness, state, &variant->desc_info);