From cdef2c0b61d9d1a1b504374bab5c9b32b6f8619c Mon Sep 17 00:00:00 2001 From: Daivik Bhatia Date: Wed, 24 Sep 2025 22:38:30 +0530 Subject: [PATCH] broadcom/common: Add subgroup support to CSD super-group packing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Certain subgroup operations don’t impose constraints on CSD supergroup packing. Mark these as supported and account for them in v3d_csd_choose_workgroups_per_supergroup() so packing remains unchanged when they are present. Reviewed-by: Iago Toral Quiroga Part-of: --- src/broadcom/common/v3d_util.c | 9 +++++---- src/broadcom/common/v3d_util.h | 2 +- src/broadcom/compiler/v3d_compiler.h | 4 ++-- src/broadcom/compiler/vir.c | 11 +++++------ src/broadcom/vulkan/v3dv_cmd_buffer.c | 2 +- src/gallium/drivers/v3d/v3dx_draw.c | 2 +- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/broadcom/common/v3d_util.c b/src/broadcom/common/v3d_util.c index e69c7ba2354..65233dabfc4 100644 --- a/src/broadcom/common/v3d_util.c +++ b/src/broadcom/common/v3d_util.c @@ -29,16 +29,17 @@ */ uint32_t v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo, - bool has_subgroups, + bool can_use_supergroups, bool has_tsy_barrier, uint32_t threads, uint32_t num_wgs, uint32_t wg_size) { - /* FIXME: subgroups may restrict supergroup packing. For now, we disable it - * completely if the shader uses subgroups. + /* FIXME: Some subgroups may restrict supergroup packing. For now, + * if the shader has subgroups, we only allow the ones that support + * supergroup packing. */ - if (has_subgroups) + if (!can_use_supergroups) return 1; /* If the workgroup size is a multiple of 16 (elements per batch), diff --git a/src/broadcom/common/v3d_util.h b/src/broadcom/common/v3d_util.h index b33706cea29..32856a2335f 100644 --- a/src/broadcom/common/v3d_util.h +++ b/src/broadcom/common/v3d_util.h @@ -53,7 +53,7 @@ uint32_t v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo, - bool has_subgroups, + bool can_use_supergroups, bool has_tsy_barrier, uint32_t threads, uint32_t num_wgs, diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index 7d2914e553d..52c23aa5775 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -831,7 +831,7 @@ struct v3d_compile { struct qreg start_msf; /* If the shader uses subgroup functionality */ - bool has_subgroups; + bool can_use_supergroups; uint8_t vattr_sizes[V3D_MAX_VS_INPUTS / 4]; uint32_t vpm_output_size; @@ -1096,7 +1096,7 @@ struct v3d_compute_prog_data { uint32_t shared_size; uint16_t local_size[3]; /* If the shader uses subgroup functionality */ - bool has_subgroups; + bool can_use_supergroups; }; struct vpm_config { diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index a4dcabef5a1..39d3f5e454b 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -958,7 +958,7 @@ v3d_cs_set_prog_data(struct v3d_compile *c, prog_data->local_size[1] = c->s->info.workgroup_size[1]; prog_data->local_size[2] = c->s->info.workgroup_size[2]; - prog_data->has_subgroups = c->has_subgroups; + prog_data->can_use_supergroups = c->can_use_supergroups; } static void @@ -1679,7 +1679,7 @@ lower_subgroup_intrinsics(struct v3d_compile *c, bool progress = false; nir_foreach_instr_safe(inst, block) { if (inst->type != nir_instr_type_intrinsic) - continue;; + continue; nir_intrinsic_instr *intr = nir_instr_as_intrinsic(inst); @@ -1690,9 +1690,7 @@ lower_subgroup_intrinsics(struct v3d_compile *c, case nir_intrinsic_load_num_subgroups: lower_load_num_subgroups(c, b, intr); progress = true; - FALLTHROUGH; - case nir_intrinsic_load_subgroup_id: - case nir_intrinsic_load_subgroup_size: + break; case nir_intrinsic_load_subgroup_invocation: case nir_intrinsic_elect: case nir_intrinsic_ballot: @@ -1725,7 +1723,7 @@ lower_subgroup_intrinsics(struct v3d_compile *c, case nir_intrinsic_quad_swap_horizontal: case nir_intrinsic_quad_swap_vertical: case nir_intrinsic_quad_swap_diagonal: - c->has_subgroups = true; + c->can_use_supergroups = false; break; default: break; @@ -1739,6 +1737,7 @@ static bool v3d_nir_lower_subgroup_intrinsics(nir_shader *s, struct v3d_compile *c) { bool progress = false; + c->can_use_supergroups = true; nir_foreach_function_impl(impl, s) { nir_builder b = nir_builder_create(impl); diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c index 26c9d49085e..f33c99f0f1d 100644 --- a/src/broadcom/vulkan/v3dv_cmd_buffer.c +++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c @@ -4322,7 +4322,7 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer, uint32_t wgs_per_sg = v3d_csd_choose_workgroups_per_supergroup( &cmd_buffer->device->devinfo, - cs_variant->prog_data.cs->has_subgroups, + cs_variant->prog_data.cs->can_use_supergroups, cs_variant->prog_data.cs->base.has_control_barrier, cs_variant->prog_data.cs->base.threads, num_wgs, wg_size); diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c index 402977c5e0e..aabcd2f0121 100644 --- a/src/gallium/drivers/v3d/v3dx_draw.c +++ b/src/gallium/drivers/v3d/v3dx_draw.c @@ -1501,7 +1501,7 @@ v3d_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info) uint32_t wgs_per_sg = v3d_csd_choose_workgroups_per_supergroup( &v3d->screen->devinfo, - compute->has_subgroups, + compute->can_use_supergroups, compute->base.has_control_barrier, compute->base.threads, num_wgs, wg_size);