From e9ca69b8072e34529196fadeb787f3fcd111aa81 Mon Sep 17 00:00:00 2001 From: Olivia Lee Date: Tue, 9 Dec 2025 17:08:35 -0800 Subject: [PATCH] panvk/csf: take merged workgroups into account for divergence Merging workgroups affects divergence analysis, since subgroups can now contain extra threads from other workgroups. We already have divergence analysis flags to handle this case, but since the compiler options memory is static, we need to define an entirely separate option set for merged vs non-merged workgroups. In gallium, we don't have to switch options because opengl requires uniformity over the entire dispatch in application shaders. Signed-off-by: Olivia Lee Reviewed-by: Eric R. Smith Part-of: --- src/gallium/drivers/panfrost/pan_fb_preload.c | 5 +++-- src/gallium/drivers/panfrost/pan_mod_conv_cso.c | 14 +++++++++----- src/gallium/drivers/panfrost/pan_screen.c | 2 +- src/panfrost/compiler/bifrost/bifrost_compile.h | 17 ++++++++++++----- src/panfrost/compiler/pan_compiler.c | 10 +++++++--- src/panfrost/compiler/pan_compiler.h | 2 +- src/panfrost/lib/pan_blend.c | 4 +++- .../vulkan/bifrost/panvk_vX_meta_desc_copy.c | 2 +- .../vulkan/panvk_vX_cmd_frame_shaders.c | 2 +- src/panfrost/vulkan/panvk_vX_shader.c | 2 +- 10 files changed, 39 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_fb_preload.c b/src/gallium/drivers/panfrost/pan_fb_preload.c index 96dc7e95408..b934c3719ce 100644 --- a/src/gallium/drivers/panfrost/pan_fb_preload.c +++ b/src/gallium/drivers/panfrost/pan_fb_preload.c @@ -439,9 +439,10 @@ pan_preload_get_shader(struct pan_fb_preload_cache *cache, key->surfaces[i].samples); } + const nir_shader_compiler_options *compiler_options = + pan_get_nir_shader_compiler_options(PAN_ARCH, false); nir_builder b = nir_builder_init_simple_shader( - MESA_SHADER_FRAGMENT, pan_get_nir_shader_compiler_options(PAN_ARCH), - "pan_preload(%s)", sig); + MESA_SHADER_FRAGMENT, compiler_options, "pan_preload(%s)", sig); nir_def *barycentric = nir_load_barycentric( &b, nir_intrinsic_load_barycentric_pixel, INTERP_MODE_SMOOTH); diff --git a/src/gallium/drivers/panfrost/pan_mod_conv_cso.c b/src/gallium/drivers/panfrost/pan_mod_conv_cso.c index 206badd4945..03aa4009554 100644 --- a/src/gallium/drivers/panfrost/pan_mod_conv_cso.c +++ b/src/gallium/drivers/panfrost/pan_mod_conv_cso.c @@ -183,8 +183,10 @@ panfrost_create_afbc_size_shader(struct panfrost_screen *screen, unsigned align = key->afbc.align; struct panfrost_device *dev = pan_device(&screen->base); + const nir_shader_compiler_options *compiler_options = + pan_get_nir_shader_compiler_options(dev->arch, false); nir_builder b = nir_builder_init_simple_shader( - MESA_SHADER_COMPUTE, pan_get_nir_shader_compiler_options(dev->arch), + MESA_SHADER_COMPUTE, compiler_options, "panfrost_afbc_size(uncompressed_size=%u, align=%u)", key->afbc.uncompressed_size, align); @@ -220,9 +222,10 @@ panfrost_create_afbc_pack_shader(struct panfrost_screen *screen, { unsigned align = key->afbc.align; struct panfrost_device *dev = pan_device(&screen->base); + const nir_shader_compiler_options *compiler_options = + pan_get_nir_shader_compiler_options(dev->arch, false); nir_builder b = nir_builder_init_simple_shader( - MESA_SHADER_COMPUTE, pan_get_nir_shader_compiler_options(dev->arch), - "panfrost_afbc_pack"); + MESA_SHADER_COMPUTE, compiler_options, "panfrost_afbc_pack"); panfrost_afbc_add_info_ubo(pack, b); @@ -265,9 +268,10 @@ panfrost_create_mtk_tiled_detile_shader( { const struct panfrost_device *device = &screen->dev; bool tint_yuv = (device->debug & PAN_DBG_YUV) != 0; + const nir_shader_compiler_options *compiler_options = + pan_get_nir_shader_compiler_options(device->arch, false); nir_builder b = nir_builder_init_simple_shader( - MESA_SHADER_COMPUTE, pan_get_nir_shader_compiler_options(device->arch), - "panfrost_mtk_detile"); + MESA_SHADER_COMPUTE, compiler_options, "panfrost_mtk_detile"); b.shader->info.workgroup_size[0] = 4; b.shader->info.workgroup_size[1] = 16; b.shader->info.workgroup_size[2] = 1; diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c index 10912b6f65f..9b17330c5bf 100644 --- a/src/gallium/drivers/panfrost/pan_screen.c +++ b/src/gallium/drivers/panfrost/pan_screen.c @@ -1148,7 +1148,7 @@ panfrost_create_screen(int fd, const struct pipe_screen_config *config, for (unsigned i = 0; i <= MESA_SHADER_COMPUTE; i++) screen->base.nir_options[i] = - pan_get_nir_shader_compiler_options(dev->arch); + pan_get_nir_shader_compiler_options(dev->arch, false); switch (dev->arch) { case 4: diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.h b/src/panfrost/compiler/bifrost/bifrost_compile.h index 5b5b8ff55d4..70f6221d9b7 100644 --- a/src/panfrost/compiler/bifrost/bifrost_compile.h +++ b/src/panfrost/compiler/bifrost/bifrost_compile.h @@ -78,8 +78,8 @@ bool valhall_can_merge_workgroups(nir_shader *nir); #define VALHAL_EX_FIFO_VARYING_BITS \ (VARYING_BIT_PSIZ | VARYING_BIT_LAYER | VARYING_BIT_PRIMITIVE_ID) -#define DEFINE_OPTIONS(arch) \ - static const nir_shader_compiler_options bifrost_nir_options_v##arch = { \ +#define DEFINE_OPTIONS(name, arch, merge_workgroups) \ + static const nir_shader_compiler_options name = { \ .lower_scmp = true, \ .lower_flrp16 = true, \ .lower_flrp32 = true, \ @@ -149,10 +149,17 @@ bool valhall_can_merge_workgroups(nir_shader *nir); .has_udot_4x8_sat = arch >= 9, \ .has_sdot_4x8 = arch >= 9, \ .has_sdot_4x8_sat = arch >= 9, \ + \ + .divergence_analysis_options = merge_workgroups ? \ + (nir_divergence_across_subgroups | \ + nir_divergence_multiple_workgroup_per_compute_subgroup) \ + : 0, \ }; -DEFINE_OPTIONS(6); -DEFINE_OPTIONS(9); -DEFINE_OPTIONS(11); +DEFINE_OPTIONS(bifrost_nir_options_v6, 6, false); +DEFINE_OPTIONS(bifrost_nir_options_v9, 9, false); +DEFINE_OPTIONS(bifrost_nir_options_v9_merge_wg, 9, true); +DEFINE_OPTIONS(bifrost_nir_options_v11, 11, false); +DEFINE_OPTIONS(bifrost_nir_options_v11_merge_wg, 11, true); #endif diff --git a/src/panfrost/compiler/pan_compiler.c b/src/panfrost/compiler/pan_compiler.c index 9b65f3ee9ec..ce81a13e34f 100644 --- a/src/panfrost/compiler/pan_compiler.c +++ b/src/panfrost/compiler/pan_compiler.c @@ -34,22 +34,26 @@ pan_want_debug_info(unsigned arch) } const nir_shader_compiler_options * -pan_get_nir_shader_compiler_options(unsigned arch) +pan_get_nir_shader_compiler_options(unsigned arch, bool merge_wg) { switch (arch) { case 4: case 5: + assert(!merge_wg); return &midgard_nir_options; case 6: case 7: + assert(!merge_wg); return &bifrost_nir_options_v6; case 9: case 10: - return &bifrost_nir_options_v9; + return merge_wg ? &bifrost_nir_options_v9_merge_wg : + &bifrost_nir_options_v9; case 11: case 12: case 13: - return &bifrost_nir_options_v11; + return merge_wg ? &bifrost_nir_options_v11_merge_wg : + &bifrost_nir_options_v11; default: assert(!"Unsupported arch"); return NULL; diff --git a/src/panfrost/compiler/pan_compiler.h b/src/panfrost/compiler/pan_compiler.h index 57b09691970..a687b247161 100644 --- a/src/panfrost/compiler/pan_compiler.h +++ b/src/panfrost/compiler/pan_compiler.h @@ -21,7 +21,7 @@ bool pan_will_dump_shaders(unsigned arch); bool pan_want_debug_info(unsigned arch); const nir_shader_compiler_options * -pan_get_nir_shader_compiler_options(unsigned arch); +pan_get_nir_shader_compiler_options(unsigned arch, bool merge_wg); void pan_preprocess_nir(nir_shader *nir, uint64_t gpu_id); void pan_optimize_nir(nir_shader *nir, uint64_t gpu_id); diff --git a/src/panfrost/lib/pan_blend.c b/src/panfrost/lib/pan_blend.c index 96f7484f8d0..c9652ace645 100644 --- a/src/panfrost/lib/pan_blend.c +++ b/src/panfrost/lib/pan_blend.c @@ -804,8 +804,10 @@ GENX(pan_blend_create_shader)(const struct pan_blend_state *state, get_equation_str(rt_state, equation_str, sizeof(equation_str)); + const nir_shader_compiler_options *compiler_options = + pan_get_nir_shader_compiler_options(PAN_ARCH, false); nir_builder builder = nir_builder_init_simple_shader( - MESA_SHADER_FRAGMENT, pan_get_nir_shader_compiler_options(PAN_ARCH), + MESA_SHADER_FRAGMENT, compiler_options, "pan_blend(rt=%d,fmt=%s,nr_samples=%d,%s=%s)", rt, util_format_name(rt_state->format), rt_state->nr_samples, state->logicop_enable ? "logicop" : "equation", diff --git a/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c b/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c index d0f0c153e45..01837e44d47 100644 --- a/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c +++ b/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c @@ -332,7 +332,7 @@ panvk_meta_desc_copy_rsd(struct panvk_device *dev) nir_builder b = nir_builder_init_simple_shader( MESA_SHADER_COMPUTE, pan_get_nir_shader_compiler_options( - pan_arch(phys_dev->kmod.dev->props.gpu_id)), + pan_arch(phys_dev->kmod.dev->props.gpu_id), false), "%s", "desc_copy"); /* We actually customize that at execution time to issue the diff --git a/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c b/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c index 903b6762a5a..196614c3fe6 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c @@ -206,7 +206,7 @@ get_frame_shader(struct panvk_device *dev, goto out; const struct nir_shader_compiler_options *nir_options = - pan_get_nir_shader_compiler_options(PAN_ARCH); + pan_get_nir_shader_compiler_options(PAN_ARCH, false); nir_shader *nir = GENX(pan_get_fb_shader)(&key->key, nir_options); NIR_PASS(_, nir, nir_shader_instructions_pass, lower_instr, diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index ac2e62a2d9f..241dfaca870 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -345,7 +345,7 @@ panvk_get_nir_options(UNUSED struct vk_physical_device *vk_pdev, { struct panvk_physical_device *phys_dev = to_panvk_physical_device(vk_pdev); return pan_get_nir_shader_compiler_options( - pan_arch(phys_dev->kmod.dev->props.gpu_id)); + pan_arch(phys_dev->kmod.dev->props.gpu_id), false); } static struct spirv_to_nir_options