nir/lower_subgroups: support unknown subgroup size

Some targets (e.g., ir3) don't always know the exact subgroup size.
Calculate the maximum subgroup size in that case by multiplying
ballot_components and ballot_bit_size.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31731>
This commit is contained in:
Job Noorman 2024-11-29 17:09:15 +01:00 committed by Marge Bot
parent cee548095a
commit 60e1615ced
2 changed files with 16 additions and 3 deletions

View file

@ -6148,6 +6148,10 @@ typedef struct nir_lower_subgroups_options {
*/
nir_instr_filter_cb filter;
/* In case the exact subgroup size is not known, subgroup_size should be
* set to 0. In that case, the maximum subgroup size will be calculated by
* ballot_components * ballot_bit_size.
*/
uint8_t subgroup_size;
uint8_t ballot_bit_size;
uint8_t ballot_components;

View file

@ -30,6 +30,14 @@
* \file nir_opt_intrinsics.c
*/
static unsigned
get_max_subgroup_size(const nir_lower_subgroups_options *options)
{
return options->subgroup_size
? options->subgroup_size
: options->ballot_components * options->ballot_bit_size;
}
static nir_intrinsic_instr *
lower_subgroups_64bit_split_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
unsigned int component)
@ -494,8 +502,9 @@ lower_boolean_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,
case nir_intrinsic_rotate: {
nir_def *delta = nir_as_uniform(b, intrin->src[1].ssa);
uint32_t cluster_size = nir_intrinsic_cluster_size(intrin);
cluster_size = cluster_size ? cluster_size : options->subgroup_size;
cluster_size = MIN2(cluster_size, options->subgroup_size);
unsigned subgroup_size = get_max_subgroup_size(options);
cluster_size = cluster_size ? cluster_size : subgroup_size;
cluster_size = MIN2(cluster_size, subgroup_size);
if (cluster_size == 1) {
return intrin->src[0].ssa;
} else if (cluster_size == 2) {
@ -845,7 +854,7 @@ lower_scan_reduce(nir_builder *b, nir_intrinsic_instr *intrin,
const nir_lower_subgroups_options *options)
{
const nir_op red_op = nir_intrinsic_reduction_op(intrin);
unsigned subgroup_size = options->subgroup_size;
unsigned subgroup_size = get_max_subgroup_size(options);
/* Grab the cluster size */
unsigned cluster_size = subgroup_size;