diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c index e798e6e12b4..a4cb5069e80 100644 --- a/src/panfrost/compiler/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost_compile.c @@ -5205,6 +5205,70 @@ bi_lower_load_output(nir_builder *b, nir_intrinsic_instr *intr, return true; } +static bool +bi_lower_subgroups(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + unsigned int gpu_id = *(unsigned int *)data; + unsigned int arch = pan_arch(gpu_id); + + b->cursor = nir_before_instr(&intr->instr); + + nir_def *val = NULL; + switch (intr->intrinsic) { + case nir_intrinsic_vote_any: + val = nir_ine_imm(b, nir_ballot(b, 1, 32, intr->src[0].ssa), 0); + break; + + case nir_intrinsic_vote_all: + val = nir_ieq_imm(b, nir_ballot(b, 1, 32, nir_inot(b, intr->src[0].ssa)), 0); + break; + + case nir_intrinsic_load_subgroup_id: { + nir_def *local_id = nir_load_local_invocation_id(b); + nir_def *local_size = nir_load_workgroup_size(b); + /* local_id.x + local_size.x * (local_id.y + local_size.y * local_id.z) */ + nir_def *flat_local_id = + nir_iadd(b, + nir_channel(b, local_id, 0), + nir_imul(b, + nir_channel(b, local_size, 0), + nir_iadd(b, + nir_channel(b, local_id, 1), + nir_imul(b, + nir_channel(b, local_size, 1), + nir_channel(b, local_id, 2))))); + /* + * nir_udiv_imm with a power of two divisor, which pan_subgroup_size is, + * will construct a right shift instead of an udiv. + */ + val = nir_udiv_imm(b, flat_local_id, pan_subgroup_size(arch)); + break; + } + + case nir_intrinsic_load_subgroup_size: + val = nir_imm_int(b, pan_subgroup_size(arch)); + break; + + case nir_intrinsic_load_num_subgroups: { + uint32_t subgroup_size = pan_subgroup_size(arch); + assert(!b->shader->info.workgroup_size_variable); + uint32_t workgroup_size = + b->shader->info.workgroup_size[0] * + b->shader->info.workgroup_size[1] * + b->shader->info.workgroup_size[2]; + uint32_t num_subgroups = DIV_ROUND_UP(workgroup_size, subgroup_size); + val = nir_imm_int(b, num_subgroups); + break; + } + + default: + return false; + } + + nir_def_rewrite_uses(&intr->def, val); + return true; +} + bool bifrost_nir_lower_load_output(nir_shader *nir) { @@ -5330,6 +5394,9 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id) if (pan_arch(gpu_id) < 9) NIR_PASS(_, nir, pan_nir_lower_image_ms); + NIR_PASS(_, nir, nir_shader_intrinsics_pass, bi_lower_subgroups, + nir_metadata_control_flow, &gpu_id); + NIR_PASS(_, nir, nir_lower_alu_to_scalar, bi_scalarize_filter, NULL); NIR_PASS(_, nir, nir_lower_load_const_to_scalar); NIR_PASS(_, nir, nir_lower_phis_to_scalar, true);