From d95076e581a6e07ea37d2ccfeb1696c81373812f Mon Sep 17 00:00:00 2001 From: Jose Maria Casanova Crespo Date: Tue, 28 Apr 2026 13:08:15 +0200 Subject: [PATCH] v3dv: lower oversized compute workgroups to 256 invocations V3D advertises maxComputeWorkGroupInvocations = 256 but ggml-vulkan in many cases ignores this limit an creates compute pipelines with over this limit. Although this is a bug in the application we can take advantage of nir_lower_workgroup_size and make the application work. This issue was causing an assertion failure at nir_to_vir.c: assert(c->local_invocation_index_bits <= 8); The solution is lowering the oversized workgroups to a 256-invocation workgroup loop, like radv and radeonsi are doing on GFX7, by running nir_lower_workgroup_size(256) for this scenario. Reviewed-by: Iago Toral Quiroga Part-of: --- src/broadcom/vulkan/v3dv_pipeline.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c index 99f0e17e26c..ac7a17d3cbc 100644 --- a/src/broadcom/vulkan/v3dv_pipeline.c +++ b/src/broadcom/vulkan/v3dv_pipeline.c @@ -3143,6 +3143,26 @@ lower_compute(struct nir_shader *nir) NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); + /* V3D can't execute workgroups with more than 256 invocations + * (maxComputeWorkGroupInvocations). If the shader requested a + * larger workgroup, serialize it into a 256-invocation one. + */ + const uint32_t wg_size = nir->info.workgroup_size[0] * + nir->info.workgroup_size[1] * + nir->info.workgroup_size[2]; + if (wg_size > V3D_MAX_CSD_WG_SIZE) { + perf_debug("Compute shader requested workgroup size %u (>256); " + "lowering to a 256-invocation workgroup wrapping an " + "outer loop (workgroup_size=(%u,%u,%u)).\n", + wg_size, + nir->info.workgroup_size[0], + nir->info.workgroup_size[1], + nir->info.workgroup_size[2]); + NIR_PASS(_, nir, nir_lower_workgroup_size, V3D_MAX_CSD_WG_SIZE); + v3d_optimize_nir(NULL, nir); + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + } + struct nir_lower_compute_system_values_options sysval_options = { .has_base_workgroup_id = true, };