diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index ea8e6e54672..2d2ff03ed72 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -6148,6 +6148,10 @@ typedef struct nir_lower_subgroups_options { */ nir_instr_filter_cb filter; + /* In case the exact subgroup size is not known, subgroup_size should be + * set to 0. In that case, the maximum subgroup size will be calculated by + * ballot_components * ballot_bit_size. + */ uint8_t subgroup_size; uint8_t ballot_bit_size; uint8_t ballot_components; diff --git a/src/compiler/nir/nir_lower_subgroups.c b/src/compiler/nir/nir_lower_subgroups.c index 20b486c2956..896cf11da36 100644 --- a/src/compiler/nir/nir_lower_subgroups.c +++ b/src/compiler/nir/nir_lower_subgroups.c @@ -30,6 +30,14 @@ * \file nir_opt_intrinsics.c */ +static unsigned +get_max_subgroup_size(const nir_lower_subgroups_options *options) +{ + return options->subgroup_size + ? options->subgroup_size + : options->ballot_components * options->ballot_bit_size; +} + static nir_intrinsic_instr * lower_subgroups_64bit_split_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, unsigned int component) @@ -494,8 +502,9 @@ lower_boolean_shuffle(nir_builder *b, nir_intrinsic_instr *intrin, case nir_intrinsic_rotate: { nir_def *delta = nir_as_uniform(b, intrin->src[1].ssa); uint32_t cluster_size = nir_intrinsic_cluster_size(intrin); - cluster_size = cluster_size ? cluster_size : options->subgroup_size; - cluster_size = MIN2(cluster_size, options->subgroup_size); + unsigned subgroup_size = get_max_subgroup_size(options); + cluster_size = cluster_size ? cluster_size : subgroup_size; + cluster_size = MIN2(cluster_size, subgroup_size); if (cluster_size == 1) { return intrin->src[0].ssa; } else if (cluster_size == 2) { @@ -845,7 +854,7 @@ lower_scan_reduce(nir_builder *b, nir_intrinsic_instr *intrin, const nir_lower_subgroups_options *options) { const nir_op red_op = nir_intrinsic_reduction_op(intrin); - unsigned subgroup_size = options->subgroup_size; + unsigned subgroup_size = get_max_subgroup_size(options); /* Grab the cluster size */ unsigned cluster_size = subgroup_size;