diff --git a/src/gallium/drivers/panfrost/pan_fb_preload.c b/src/gallium/drivers/panfrost/pan_fb_preload.c index 96dc7e95408..b934c3719ce 100644 --- a/src/gallium/drivers/panfrost/pan_fb_preload.c +++ b/src/gallium/drivers/panfrost/pan_fb_preload.c @@ -439,9 +439,10 @@ pan_preload_get_shader(struct pan_fb_preload_cache *cache, key->surfaces[i].samples); } + const nir_shader_compiler_options *compiler_options = + pan_get_nir_shader_compiler_options(PAN_ARCH, false); nir_builder b = nir_builder_init_simple_shader( - MESA_SHADER_FRAGMENT, pan_get_nir_shader_compiler_options(PAN_ARCH), - "pan_preload(%s)", sig); + MESA_SHADER_FRAGMENT, compiler_options, "pan_preload(%s)", sig); nir_def *barycentric = nir_load_barycentric( &b, nir_intrinsic_load_barycentric_pixel, INTERP_MODE_SMOOTH); diff --git a/src/gallium/drivers/panfrost/pan_mod_conv_cso.c b/src/gallium/drivers/panfrost/pan_mod_conv_cso.c index 206badd4945..03aa4009554 100644 --- a/src/gallium/drivers/panfrost/pan_mod_conv_cso.c +++ b/src/gallium/drivers/panfrost/pan_mod_conv_cso.c @@ -183,8 +183,10 @@ panfrost_create_afbc_size_shader(struct panfrost_screen *screen, unsigned align = key->afbc.align; struct panfrost_device *dev = pan_device(&screen->base); + const nir_shader_compiler_options *compiler_options = + pan_get_nir_shader_compiler_options(dev->arch, false); nir_builder b = nir_builder_init_simple_shader( - MESA_SHADER_COMPUTE, pan_get_nir_shader_compiler_options(dev->arch), + MESA_SHADER_COMPUTE, compiler_options, "panfrost_afbc_size(uncompressed_size=%u, align=%u)", key->afbc.uncompressed_size, align); @@ -220,9 +222,10 @@ panfrost_create_afbc_pack_shader(struct panfrost_screen *screen, { unsigned align = key->afbc.align; struct panfrost_device *dev = pan_device(&screen->base); + const nir_shader_compiler_options *compiler_options = + pan_get_nir_shader_compiler_options(dev->arch, false); nir_builder b = nir_builder_init_simple_shader( - MESA_SHADER_COMPUTE, pan_get_nir_shader_compiler_options(dev->arch), - "panfrost_afbc_pack"); + MESA_SHADER_COMPUTE, compiler_options, "panfrost_afbc_pack"); panfrost_afbc_add_info_ubo(pack, b); @@ -265,9 +268,10 @@ panfrost_create_mtk_tiled_detile_shader( { const struct panfrost_device *device = &screen->dev; bool tint_yuv = (device->debug & PAN_DBG_YUV) != 0; + const nir_shader_compiler_options *compiler_options = + pan_get_nir_shader_compiler_options(device->arch, false); nir_builder b = nir_builder_init_simple_shader( - MESA_SHADER_COMPUTE, pan_get_nir_shader_compiler_options(device->arch), - "panfrost_mtk_detile"); + MESA_SHADER_COMPUTE, compiler_options, "panfrost_mtk_detile"); b.shader->info.workgroup_size[0] = 4; b.shader->info.workgroup_size[1] = 16; b.shader->info.workgroup_size[2] = 1; diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c index 10912b6f65f..9b17330c5bf 100644 --- a/src/gallium/drivers/panfrost/pan_screen.c +++ b/src/gallium/drivers/panfrost/pan_screen.c @@ -1148,7 +1148,7 @@ panfrost_create_screen(int fd, const struct pipe_screen_config *config, for (unsigned i = 0; i <= MESA_SHADER_COMPUTE; i++) screen->base.nir_options[i] = - pan_get_nir_shader_compiler_options(dev->arch); + pan_get_nir_shader_compiler_options(dev->arch, false); switch (dev->arch) { case 4: diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.h b/src/panfrost/compiler/bifrost/bifrost_compile.h index 5b5b8ff55d4..70f6221d9b7 100644 --- a/src/panfrost/compiler/bifrost/bifrost_compile.h +++ b/src/panfrost/compiler/bifrost/bifrost_compile.h @@ -78,8 +78,8 @@ bool valhall_can_merge_workgroups(nir_shader *nir); #define VALHAL_EX_FIFO_VARYING_BITS \ (VARYING_BIT_PSIZ | VARYING_BIT_LAYER | VARYING_BIT_PRIMITIVE_ID) -#define DEFINE_OPTIONS(arch) \ - static const nir_shader_compiler_options bifrost_nir_options_v##arch = { \ +#define DEFINE_OPTIONS(name, arch, merge_workgroups) \ + static const nir_shader_compiler_options name = { \ .lower_scmp = true, \ .lower_flrp16 = true, \ .lower_flrp32 = true, \ @@ -149,10 +149,17 @@ bool valhall_can_merge_workgroups(nir_shader *nir); .has_udot_4x8_sat = arch >= 9, \ .has_sdot_4x8 = arch >= 9, \ .has_sdot_4x8_sat = arch >= 9, \ + \ + .divergence_analysis_options = merge_workgroups ? \ + (nir_divergence_across_subgroups | \ + nir_divergence_multiple_workgroup_per_compute_subgroup) \ + : 0, \ }; -DEFINE_OPTIONS(6); -DEFINE_OPTIONS(9); -DEFINE_OPTIONS(11); +DEFINE_OPTIONS(bifrost_nir_options_v6, 6, false); +DEFINE_OPTIONS(bifrost_nir_options_v9, 9, false); +DEFINE_OPTIONS(bifrost_nir_options_v9_merge_wg, 9, true); +DEFINE_OPTIONS(bifrost_nir_options_v11, 11, false); +DEFINE_OPTIONS(bifrost_nir_options_v11_merge_wg, 11, true); #endif diff --git a/src/panfrost/compiler/pan_compiler.c b/src/panfrost/compiler/pan_compiler.c index 9b65f3ee9ec..ce81a13e34f 100644 --- a/src/panfrost/compiler/pan_compiler.c +++ b/src/panfrost/compiler/pan_compiler.c @@ -34,22 +34,26 @@ pan_want_debug_info(unsigned arch) } const nir_shader_compiler_options * -pan_get_nir_shader_compiler_options(unsigned arch) +pan_get_nir_shader_compiler_options(unsigned arch, bool merge_wg) { switch (arch) { case 4: case 5: + assert(!merge_wg); return &midgard_nir_options; case 6: case 7: + assert(!merge_wg); return &bifrost_nir_options_v6; case 9: case 10: - return &bifrost_nir_options_v9; + return merge_wg ? &bifrost_nir_options_v9_merge_wg : + &bifrost_nir_options_v9; case 11: case 12: case 13: - return &bifrost_nir_options_v11; + return merge_wg ? &bifrost_nir_options_v11_merge_wg : + &bifrost_nir_options_v11; default: assert(!"Unsupported arch"); return NULL; diff --git a/src/panfrost/compiler/pan_compiler.h b/src/panfrost/compiler/pan_compiler.h index 57b09691970..a687b247161 100644 --- a/src/panfrost/compiler/pan_compiler.h +++ b/src/panfrost/compiler/pan_compiler.h @@ -21,7 +21,7 @@ bool pan_will_dump_shaders(unsigned arch); bool pan_want_debug_info(unsigned arch); const nir_shader_compiler_options * -pan_get_nir_shader_compiler_options(unsigned arch); +pan_get_nir_shader_compiler_options(unsigned arch, bool merge_wg); void pan_preprocess_nir(nir_shader *nir, uint64_t gpu_id); void pan_optimize_nir(nir_shader *nir, uint64_t gpu_id); diff --git a/src/panfrost/lib/pan_blend.c b/src/panfrost/lib/pan_blend.c index 96f7484f8d0..c9652ace645 100644 --- a/src/panfrost/lib/pan_blend.c +++ b/src/panfrost/lib/pan_blend.c @@ -804,8 +804,10 @@ GENX(pan_blend_create_shader)(const struct pan_blend_state *state, get_equation_str(rt_state, equation_str, sizeof(equation_str)); + const nir_shader_compiler_options *compiler_options = + pan_get_nir_shader_compiler_options(PAN_ARCH, false); nir_builder builder = nir_builder_init_simple_shader( - MESA_SHADER_FRAGMENT, pan_get_nir_shader_compiler_options(PAN_ARCH), + MESA_SHADER_FRAGMENT, compiler_options, "pan_blend(rt=%d,fmt=%s,nr_samples=%d,%s=%s)", rt, util_format_name(rt_state->format), rt_state->nr_samples, state->logicop_enable ? "logicop" : "equation", diff --git a/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c b/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c index d0f0c153e45..01837e44d47 100644 --- a/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c +++ b/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c @@ -332,7 +332,7 @@ panvk_meta_desc_copy_rsd(struct panvk_device *dev) nir_builder b = nir_builder_init_simple_shader( MESA_SHADER_COMPUTE, pan_get_nir_shader_compiler_options( - pan_arch(phys_dev->kmod.dev->props.gpu_id)), + pan_arch(phys_dev->kmod.dev->props.gpu_id), false), "%s", "desc_copy"); /* We actually customize that at execution time to issue the diff --git a/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c b/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c index 903b6762a5a..196614c3fe6 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c @@ -206,7 +206,7 @@ get_frame_shader(struct panvk_device *dev, goto out; const struct nir_shader_compiler_options *nir_options = - pan_get_nir_shader_compiler_options(PAN_ARCH); + pan_get_nir_shader_compiler_options(PAN_ARCH, false); nir_shader *nir = GENX(pan_get_fb_shader)(&key->key, nir_options); NIR_PASS(_, nir, nir_shader_instructions_pass, lower_instr, diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index ac2e62a2d9f..241dfaca870 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -345,7 +345,7 @@ panvk_get_nir_options(UNUSED struct vk_physical_device *vk_pdev, { struct panvk_physical_device *phys_dev = to_panvk_physical_device(vk_pdev); return pan_get_nir_shader_compiler_options( - pan_arch(phys_dev->kmod.dev->props.gpu_id)); + pan_arch(phys_dev->kmod.dev->props.gpu_id), false); } static struct spirv_to_nir_options