broadcom/compiler: lower nir_intrinsic_load_num_subgroups

The number of subgroups is the local workgroup size divided by the
dispatch width.

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11620>
This commit is contained in:
Iago Toral Quiroga 2021-06-22 12:00:55 +02:00
parent 30dec8b414
commit a9ad04f17d
2 changed files with 73 additions and 0 deletions

View file

@ -3241,6 +3241,10 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
break;
}
case nir_intrinsic_load_num_subgroups:
unreachable("Should have been lowered");
break;
default:
fprintf(stderr, "Unknown intrinsic: ");
nir_print_instr(&instr->instr, stderr);

View file

@ -25,6 +25,7 @@
#include "v3d_compiler.h"
#include "util/u_prim.h"
#include "compiler/nir/nir_schedule.h"
#include "compiler/nir/nir_builder.h"
int
vir_get_nsrc(struct qinst *inst)
@ -1350,6 +1351,72 @@ v3d_nir_sort_constant_ubo_loads(nir_shader *s, struct v3d_compile *c)
return c->sorted_any_ubo_loads;
}
static void
lower_load_num_subgroups(struct v3d_compile *c,
nir_builder *b,
nir_intrinsic_instr *intr)
{
assert(c->s->info.stage == MESA_SHADER_COMPUTE);
assert(intr->intrinsic == nir_intrinsic_load_num_subgroups);
b->cursor = nir_after_instr(&intr->instr);
uint32_t num_subgroups =
DIV_ROUND_UP(c->s->info.workgroup_size[0] *
c->s->info.workgroup_size[1] *
c->s->info.workgroup_size[2], V3D_CHANNELS);
nir_ssa_def *result = nir_imm_int(b, num_subgroups);
nir_ssa_def_rewrite_uses(&intr->dest.ssa, result);
nir_instr_remove(&intr->instr);
}
static bool
lower_subgroup_intrinsics(struct v3d_compile *c,
nir_block *block, nir_builder *b)
{
bool progress = false;
nir_foreach_instr_safe(inst, block) {
if (inst->type != nir_instr_type_intrinsic)
continue;;
nir_intrinsic_instr *intr =
nir_instr_as_intrinsic(inst);
if (!intr)
continue;
switch (intr->intrinsic) {
case nir_intrinsic_load_num_subgroups: {
lower_load_num_subgroups(c, b, intr);
progress = true;
break;
}
default:
break;
}
}
return progress;
}
static bool
v3d_nir_lower_subgroup_intrinsics(nir_shader *s, struct v3d_compile *c)
{
bool progress = false;
nir_foreach_function(function, s) {
if (function->impl) {
nir_builder b;
nir_builder_init(&b, function->impl);
nir_foreach_block(block, function->impl)
progress |= lower_subgroup_intrinsics(c, block, &b);
nir_metadata_preserve(function->impl,
nir_metadata_block_index |
nir_metadata_dominance);
}
}
return progress;
}
static void
v3d_attempt_compile(struct v3d_compile *c)
{
@ -1422,6 +1489,8 @@ v3d_attempt_compile(struct v3d_compile *c)
NIR_PASS_V(c->s, nir_lower_wrmasks, should_split_wrmask, c->s);
NIR_PASS_V(c->s, v3d_nir_lower_subgroup_intrinsics, c);
v3d_optimize_nir(c, c->s);
/* Do late algebraic optimization to turn add(a, neg(b)) back into