diff --git a/src/broadcom/common/v3d_util.c b/src/broadcom/common/v3d_util.c index ec549d48584..424656fd8b1 100644 --- a/src/broadcom/common/v3d_util.c +++ b/src/broadcom/common/v3d_util.c @@ -29,11 +29,18 @@ */ uint32_t v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo, + bool has_subgroups, bool has_tsy_barrier, uint32_t threads, uint32_t num_wgs, uint32_t wg_size) { + /* FIXME: subgroups may restrict supergroup packing. For now, we disable it + * completely if the shader uses subgroups. + */ + if (has_subgroups) + return 1; + /* Compute maximum number of batches in a supergroup for this workgroup size. * Each batch is 16 elements, and we can have up to 16 work groups in a * supergroup: diff --git a/src/broadcom/common/v3d_util.h b/src/broadcom/common/v3d_util.h index fc304f55958..b9804f235ae 100644 --- a/src/broadcom/common/v3d_util.h +++ b/src/broadcom/common/v3d_util.h @@ -28,6 +28,7 @@ uint32_t v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo, + bool has_subgroups, bool has_tsy_barrier, uint32_t threads, uint32_t num_wgs, diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c index 0e4b124cb25..72092b2c47e 100644 --- a/src/broadcom/vulkan/v3dv_cmd_buffer.c +++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c @@ -3257,6 +3257,7 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer, uint32_t wgs_per_sg = v3d_csd_choose_workgroups_per_supergroup( &cmd_buffer->device->devinfo, + cs_variant->prog_data.cs->has_subgroups, cs_variant->prog_data.cs->base.has_control_barrier, cs_variant->prog_data.cs->base.threads, num_wgs, wg_size); diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c index cdf9d5d0878..91a1048a8b9 100644 --- a/src/gallium/drivers/v3d/v3dx_draw.c +++ b/src/gallium/drivers/v3d/v3dx_draw.c @@ -1556,6 +1556,7 @@ v3d_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info) uint32_t wgs_per_sg = v3d_csd_choose_workgroups_per_supergroup( &v3d->screen->devinfo, + compute->has_subgroups, compute->base.has_control_barrier, compute->base.threads, num_wgs, wg_size);