panvk/csf: take merged workgroups into account for divergence

Merging workgroups affects divergence analysis, since subgroups can now contain extra threads from other workgroups. We already have divergence analysis flags to handle this case, but since the compiler options memory is static, we need to define an entirely separate option set for merged vs non-merged workgroups. In gallium, we don't have to switch options because opengl requires uniformity over the entire dispatch in application shaders. Signed-off-by: Olivia Lee <olivia.lee@collabora.com> Reviewed-by: Eric R. Smith <eric.smith@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38586>
2026-05-06 20:18:12 +02:00 · 2025-12-09 17:08:35 -08:00 · 2025-12-09 17:08:35 -08:00 · e9ca69b807
commit e9ca69b807
parent c42e124a66
10 changed files with 39 additions and 21 deletions
--- a/src/gallium/drivers/panfrost/pan_fb_preload.c
+++ b/src/gallium/drivers/panfrost/pan_fb_preload.c
@ -439,9 +439,10 @@ pan_preload_get_shader(struct pan_fb_preload_cache *cache,
                  key->surfaces[i].samples);
   }

+   const nir_shader_compiler_options *compiler_options =
+      pan_get_nir_shader_compiler_options(PAN_ARCH, false);
   nir_builder b = nir_builder_init_simple_shader(
-      MESA_SHADER_FRAGMENT, pan_get_nir_shader_compiler_options(PAN_ARCH),
-      "pan_preload(%s)", sig);
+      MESA_SHADER_FRAGMENT, compiler_options, "pan_preload(%s)", sig);

   nir_def *barycentric = nir_load_barycentric(
      &b, nir_intrinsic_load_barycentric_pixel, INTERP_MODE_SMOOTH);
--- a/src/gallium/drivers/panfrost/pan_mod_conv_cso.c
+++ b/src/gallium/drivers/panfrost/pan_mod_conv_cso.c
@ -183,8 +183,10 @@ panfrost_create_afbc_size_shader(struct panfrost_screen *screen,
   unsigned align = key->afbc.align;
   struct panfrost_device *dev = pan_device(&screen->base);

+   const nir_shader_compiler_options *compiler_options =
+      pan_get_nir_shader_compiler_options(dev->arch, false);
   nir_builder b = nir_builder_init_simple_shader(
-      MESA_SHADER_COMPUTE, pan_get_nir_shader_compiler_options(dev->arch),
+      MESA_SHADER_COMPUTE, compiler_options,
      "panfrost_afbc_size(uncompressed_size=%u, align=%u)",
      key->afbc.uncompressed_size, align);

@ -220,9 +222,10 @@ panfrost_create_afbc_pack_shader(struct panfrost_screen *screen,
 {
   unsigned align = key->afbc.align;
   struct panfrost_device *dev = pan_device(&screen->base);
+   const nir_shader_compiler_options *compiler_options =
+      pan_get_nir_shader_compiler_options(dev->arch, false);
   nir_builder b = nir_builder_init_simple_shader(
-      MESA_SHADER_COMPUTE, pan_get_nir_shader_compiler_options(dev->arch),
-      "panfrost_afbc_pack");
+      MESA_SHADER_COMPUTE, compiler_options, "panfrost_afbc_pack");

   panfrost_afbc_add_info_ubo(pack, b);

@ -265,9 +268,10 @@ panfrost_create_mtk_tiled_detile_shader(
 {
   const struct panfrost_device *device = &screen->dev;
   bool tint_yuv = (device->debug & PAN_DBG_YUV) != 0;
+   const nir_shader_compiler_options *compiler_options =
+      pan_get_nir_shader_compiler_options(device->arch, false);
   nir_builder b = nir_builder_init_simple_shader(
-      MESA_SHADER_COMPUTE, pan_get_nir_shader_compiler_options(device->arch),
-      "panfrost_mtk_detile");
+      MESA_SHADER_COMPUTE, compiler_options, "panfrost_mtk_detile");
   b.shader->info.workgroup_size[0] = 4;
   b.shader->info.workgroup_size[1] = 16;
   b.shader->info.workgroup_size[2] = 1;
--- a/src/gallium/drivers/panfrost/pan_screen.c
+++ b/src/gallium/drivers/panfrost/pan_screen.c
@ -1148,7 +1148,7 @@ panfrost_create_screen(int fd, const struct pipe_screen_config *config,

   for (unsigned i = 0; i <= MESA_SHADER_COMPUTE; i++)
      screen->base.nir_options[i] =
-         pan_get_nir_shader_compiler_options(dev->arch);
+         pan_get_nir_shader_compiler_options(dev->arch, false);

   switch (dev->arch) {
   case 4:
--- a/src/panfrost/compiler/bifrost/bifrost_compile.h
+++ b/src/panfrost/compiler/bifrost/bifrost_compile.h
@ -78,8 +78,8 @@ bool valhall_can_merge_workgroups(nir_shader *nir);
 #define VALHAL_EX_FIFO_VARYING_BITS \
   (VARYING_BIT_PSIZ | VARYING_BIT_LAYER | VARYING_BIT_PRIMITIVE_ID)

-#define DEFINE_OPTIONS(arch)                                                   \
-   static const nir_shader_compiler_options bifrost_nir_options_v##arch = {    \
+#define DEFINE_OPTIONS(name, arch, merge_workgroups)                           \
+   static const nir_shader_compiler_options name = {                           \
      .lower_scmp = true,                                                      \
      .lower_flrp16 = true,                                                    \
      .lower_flrp32 = true,                                                    \
@ -149,10 +149,17 @@ bool valhall_can_merge_workgroups(nir_shader *nir);
      .has_udot_4x8_sat = arch >= 9,                                           \
      .has_sdot_4x8 = arch >= 9,                                               \
      .has_sdot_4x8_sat = arch >= 9,                                           \
+                                                                               \
+      .divergence_analysis_options = merge_workgroups ?                        \
+         (nir_divergence_across_subgroups |                                    \
+          nir_divergence_multiple_workgroup_per_compute_subgroup)              \
+         : 0,                                                                  \
   };

-DEFINE_OPTIONS(6);
-DEFINE_OPTIONS(9);
-DEFINE_OPTIONS(11);
+DEFINE_OPTIONS(bifrost_nir_options_v6, 6, false);
+DEFINE_OPTIONS(bifrost_nir_options_v9, 9, false);
+DEFINE_OPTIONS(bifrost_nir_options_v9_merge_wg, 9, true);
+DEFINE_OPTIONS(bifrost_nir_options_v11, 11, false);
+DEFINE_OPTIONS(bifrost_nir_options_v11_merge_wg, 11, true);

 #endif
--- a/src/panfrost/compiler/pan_compiler.c
+++ b/src/panfrost/compiler/pan_compiler.c
@ -34,22 +34,26 @@ pan_want_debug_info(unsigned arch)
 }

 const nir_shader_compiler_options *
-pan_get_nir_shader_compiler_options(unsigned arch)
+pan_get_nir_shader_compiler_options(unsigned arch, bool merge_wg)
 {
   switch (arch) {
   case 4:
   case 5:
+      assert(!merge_wg);
      return &midgard_nir_options;
   case 6:
   case 7:
+      assert(!merge_wg);
      return &bifrost_nir_options_v6;
   case 9:
   case 10:
-      return &bifrost_nir_options_v9;
+      return merge_wg ? &bifrost_nir_options_v9_merge_wg :
+                        &bifrost_nir_options_v9;
   case 11:
   case 12:
   case 13:
-      return &bifrost_nir_options_v11;
+      return merge_wg ? &bifrost_nir_options_v11_merge_wg :
+                        &bifrost_nir_options_v11;
   default:
      assert(!"Unsupported arch");
      return NULL;
--- a/src/panfrost/compiler/pan_compiler.h
+++ b/src/panfrost/compiler/pan_compiler.h
@ -21,7 +21,7 @@ bool pan_will_dump_shaders(unsigned arch);
 bool pan_want_debug_info(unsigned arch);

 const nir_shader_compiler_options *
-pan_get_nir_shader_compiler_options(unsigned arch);
+pan_get_nir_shader_compiler_options(unsigned arch, bool merge_wg);

 void pan_preprocess_nir(nir_shader *nir, uint64_t gpu_id);
 void pan_optimize_nir(nir_shader *nir, uint64_t gpu_id);
--- a/src/panfrost/lib/pan_blend.c
+++ b/src/panfrost/lib/pan_blend.c
@ -804,8 +804,10 @@ GENX(pan_blend_create_shader)(const struct pan_blend_state *state,

   get_equation_str(rt_state, equation_str, sizeof(equation_str));

+   const nir_shader_compiler_options *compiler_options =
+      pan_get_nir_shader_compiler_options(PAN_ARCH, false);
   nir_builder builder = nir_builder_init_simple_shader(
-      MESA_SHADER_FRAGMENT, pan_get_nir_shader_compiler_options(PAN_ARCH),
+      MESA_SHADER_FRAGMENT, compiler_options,
      "pan_blend(rt=%d,fmt=%s,nr_samples=%d,%s=%s)", rt,
      util_format_name(rt_state->format), rt_state->nr_samples,
      state->logicop_enable ? "logicop" : "equation",
--- a/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c
+++ b/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c
@ -332,7 +332,7 @@ panvk_meta_desc_copy_rsd(struct panvk_device *dev)
   nir_builder b = nir_builder_init_simple_shader(
      MESA_SHADER_COMPUTE,
      pan_get_nir_shader_compiler_options(
-         pan_arch(phys_dev->kmod.dev->props.gpu_id)),
+         pan_arch(phys_dev->kmod.dev->props.gpu_id), false),
      "%s", "desc_copy");

   /* We actually customize that at execution time to issue the
--- a/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c
+++ b/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c
@ -206,7 +206,7 @@ get_frame_shader(struct panvk_device *dev,
      goto out;

   const struct nir_shader_compiler_options *nir_options =
-      pan_get_nir_shader_compiler_options(PAN_ARCH);
+      pan_get_nir_shader_compiler_options(PAN_ARCH, false);
   nir_shader *nir = GENX(pan_get_fb_shader)(&key->key, nir_options);

   NIR_PASS(_, nir, nir_shader_instructions_pass, lower_instr,
--- a/src/panfrost/vulkan/panvk_vX_shader.c
+++ b/src/panfrost/vulkan/panvk_vX_shader.c
@ -345,7 +345,7 @@ panvk_get_nir_options(UNUSED struct vk_physical_device *vk_pdev,
 {
   struct panvk_physical_device *phys_dev = to_panvk_physical_device(vk_pdev);
   return pan_get_nir_shader_compiler_options(
-      pan_arch(phys_dev->kmod.dev->props.gpu_id));
+      pan_arch(phys_dev->kmod.dev->props.gpu_id), false);
 }

 static struct spirv_to_nir_options