broadcom/common: Add subgroup support to CSD super-group packing

Certain subgroup operations don’t impose constraints on
CSD supergroup packing. Mark these as supported
and account for them in v3d_csd_choose_workgroups_per_supergroup()
so packing remains unchanged when they are present.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37836>
This commit is contained in:
Daivik Bhatia 2025-09-24 22:38:30 +05:30 committed by Iago Toral Quiroga
parent 1326d52d23
commit cdef2c0b61
6 changed files with 15 additions and 15 deletions

View file

@ -29,16 +29,17 @@
*/
uint32_t
v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo,
bool has_subgroups,
bool can_use_supergroups,
bool has_tsy_barrier,
uint32_t threads,
uint32_t num_wgs,
uint32_t wg_size)
{
/* FIXME: subgroups may restrict supergroup packing. For now, we disable it
* completely if the shader uses subgroups.
/* FIXME: Some subgroups may restrict supergroup packing. For now,
* if the shader has subgroups, we only allow the ones that support
* supergroup packing.
*/
if (has_subgroups)
if (!can_use_supergroups)
return 1;
/* If the workgroup size is a multiple of 16 (elements per batch),

View file

@ -53,7 +53,7 @@
uint32_t
v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo,
bool has_subgroups,
bool can_use_supergroups,
bool has_tsy_barrier,
uint32_t threads,
uint32_t num_wgs,

View file

@ -831,7 +831,7 @@ struct v3d_compile {
struct qreg start_msf;
/* If the shader uses subgroup functionality */
bool has_subgroups;
bool can_use_supergroups;
uint8_t vattr_sizes[V3D_MAX_VS_INPUTS / 4];
uint32_t vpm_output_size;
@ -1096,7 +1096,7 @@ struct v3d_compute_prog_data {
uint32_t shared_size;
uint16_t local_size[3];
/* If the shader uses subgroup functionality */
bool has_subgroups;
bool can_use_supergroups;
};
struct vpm_config {

View file

@ -958,7 +958,7 @@ v3d_cs_set_prog_data(struct v3d_compile *c,
prog_data->local_size[1] = c->s->info.workgroup_size[1];
prog_data->local_size[2] = c->s->info.workgroup_size[2];
prog_data->has_subgroups = c->has_subgroups;
prog_data->can_use_supergroups = c->can_use_supergroups;
}
static void
@ -1679,7 +1679,7 @@ lower_subgroup_intrinsics(struct v3d_compile *c,
bool progress = false;
nir_foreach_instr_safe(inst, block) {
if (inst->type != nir_instr_type_intrinsic)
continue;;
continue;
nir_intrinsic_instr *intr =
nir_instr_as_intrinsic(inst);
@ -1690,9 +1690,7 @@ lower_subgroup_intrinsics(struct v3d_compile *c,
case nir_intrinsic_load_num_subgroups:
lower_load_num_subgroups(c, b, intr);
progress = true;
FALLTHROUGH;
case nir_intrinsic_load_subgroup_id:
case nir_intrinsic_load_subgroup_size:
break;
case nir_intrinsic_load_subgroup_invocation:
case nir_intrinsic_elect:
case nir_intrinsic_ballot:
@ -1725,7 +1723,7 @@ lower_subgroup_intrinsics(struct v3d_compile *c,
case nir_intrinsic_quad_swap_horizontal:
case nir_intrinsic_quad_swap_vertical:
case nir_intrinsic_quad_swap_diagonal:
c->has_subgroups = true;
c->can_use_supergroups = false;
break;
default:
break;
@ -1739,6 +1737,7 @@ static bool
v3d_nir_lower_subgroup_intrinsics(nir_shader *s, struct v3d_compile *c)
{
bool progress = false;
c->can_use_supergroups = true;
nir_foreach_function_impl(impl, s) {
nir_builder b = nir_builder_create(impl);

View file

@ -4322,7 +4322,7 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t wgs_per_sg =
v3d_csd_choose_workgroups_per_supergroup(
&cmd_buffer->device->devinfo,
cs_variant->prog_data.cs->has_subgroups,
cs_variant->prog_data.cs->can_use_supergroups,
cs_variant->prog_data.cs->base.has_control_barrier,
cs_variant->prog_data.cs->base.threads,
num_wgs, wg_size);

View file

@ -1501,7 +1501,7 @@ v3d_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
uint32_t wgs_per_sg =
v3d_csd_choose_workgroups_per_supergroup(
&v3d->screen->devinfo,
compute->has_subgroups,
compute->can_use_supergroups,
compute->base.has_control_barrier,
compute->base.threads,
num_wgs, wg_size);