pan/bi: lower some subgroup intrinsics

Lower vote_any, vote_all, load_subgroup_id, load_subgroup_size and
load_num_workgroups.

Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32710>
This commit is contained in:
Caterina Shablia 2025-01-15 13:59:25 +00:00 committed by Marge Bot
parent 1b59b9edee
commit d5c5528e06

View file

@ -5205,6 +5205,70 @@ bi_lower_load_output(nir_builder *b, nir_intrinsic_instr *intr,
return true;
}
static bool
bi_lower_subgroups(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
unsigned int gpu_id = *(unsigned int *)data;
unsigned int arch = pan_arch(gpu_id);
b->cursor = nir_before_instr(&intr->instr);
nir_def *val = NULL;
switch (intr->intrinsic) {
case nir_intrinsic_vote_any:
val = nir_ine_imm(b, nir_ballot(b, 1, 32, intr->src[0].ssa), 0);
break;
case nir_intrinsic_vote_all:
val = nir_ieq_imm(b, nir_ballot(b, 1, 32, nir_inot(b, intr->src[0].ssa)), 0);
break;
case nir_intrinsic_load_subgroup_id: {
nir_def *local_id = nir_load_local_invocation_id(b);
nir_def *local_size = nir_load_workgroup_size(b);
/* local_id.x + local_size.x * (local_id.y + local_size.y * local_id.z) */
nir_def *flat_local_id =
nir_iadd(b,
nir_channel(b, local_id, 0),
nir_imul(b,
nir_channel(b, local_size, 0),
nir_iadd(b,
nir_channel(b, local_id, 1),
nir_imul(b,
nir_channel(b, local_size, 1),
nir_channel(b, local_id, 2)))));
/*
* nir_udiv_imm with a power of two divisor, which pan_subgroup_size is,
* will construct a right shift instead of an udiv.
*/
val = nir_udiv_imm(b, flat_local_id, pan_subgroup_size(arch));
break;
}
case nir_intrinsic_load_subgroup_size:
val = nir_imm_int(b, pan_subgroup_size(arch));
break;
case nir_intrinsic_load_num_subgroups: {
uint32_t subgroup_size = pan_subgroup_size(arch);
assert(!b->shader->info.workgroup_size_variable);
uint32_t workgroup_size =
b->shader->info.workgroup_size[0] *
b->shader->info.workgroup_size[1] *
b->shader->info.workgroup_size[2];
uint32_t num_subgroups = DIV_ROUND_UP(workgroup_size, subgroup_size);
val = nir_imm_int(b, num_subgroups);
break;
}
default:
return false;
}
nir_def_rewrite_uses(&intr->def, val);
return true;
}
bool
bifrost_nir_lower_load_output(nir_shader *nir)
{
@ -5330,6 +5394,9 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
if (pan_arch(gpu_id) < 9)
NIR_PASS(_, nir, pan_nir_lower_image_ms);
NIR_PASS(_, nir, nir_shader_intrinsics_pass, bi_lower_subgroups,
nir_metadata_control_flow, &gpu_id);
NIR_PASS(_, nir, nir_lower_alu_to_scalar, bi_scalarize_filter, NULL);
NIR_PASS(_, nir, nir_lower_load_const_to_scalar);
NIR_PASS(_, nir, nir_lower_phis_to_scalar, true);