mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 11:48:06 +02:00
agx: flesh out subgroup lowering
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29179>
This commit is contained in:
parent
659db5049c
commit
54ec9512ef
1 changed files with 65 additions and 0 deletions
|
|
@ -8,6 +8,8 @@
|
|||
#include "agx_nir.h"
|
||||
#include "nir_builder_opcodes.h"
|
||||
#include "nir_intrinsics.h"
|
||||
#include "nir_intrinsics_indices.h"
|
||||
#include "nir_opcodes.h"
|
||||
|
||||
/* XXX: cribbed from nak, move to common */
|
||||
static nir_def *
|
||||
|
|
@ -113,19 +115,82 @@ lower(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
return true;
|
||||
}
|
||||
|
||||
case nir_intrinsic_inclusive_scan: {
|
||||
/* If we got here, we support the corresponding exclusive scan in
|
||||
* hardware, so just handle the last element.
|
||||
*/
|
||||
nir_op red_op = nir_intrinsic_reduction_op(intr);
|
||||
nir_def *data = intr->src[0].ssa;
|
||||
|
||||
b->cursor = nir_after_instr(&intr->instr);
|
||||
intr->intrinsic = nir_intrinsic_exclusive_scan;
|
||||
nir_def *accum = nir_build_alu2(b, red_op, data, &intr->def);
|
||||
nir_def_rewrite_uses_after(&intr->def, accum, accum->parent_instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_subgroup_filter(const nir_instr *instr, UNUSED const void *data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
/* Use default behaviour for everything but scans */
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
if (intr->intrinsic != nir_intrinsic_exclusive_scan &&
|
||||
intr->intrinsic != nir_intrinsic_inclusive_scan &&
|
||||
intr->intrinsic != nir_intrinsic_reduce)
|
||||
return true;
|
||||
|
||||
if (intr->def.num_components > 1 || intr->def.bit_size == 1)
|
||||
return true;
|
||||
|
||||
/* Hardware supports quad ops but no other support clustered reductions. */
|
||||
if (nir_intrinsic_has_cluster_size(intr)) {
|
||||
unsigned cluster = nir_intrinsic_cluster_size(intr);
|
||||
if (cluster && cluster != 4 && cluster < 32)
|
||||
return true;
|
||||
}
|
||||
|
||||
switch (nir_intrinsic_reduction_op(intr)) {
|
||||
case nir_op_imul:
|
||||
/* no imul hardware scan, always lower it */
|
||||
return true;
|
||||
|
||||
case nir_op_iadd:
|
||||
case nir_op_iand:
|
||||
case nir_op_ixor:
|
||||
case nir_op_ior:
|
||||
/* these have dedicated 64-bit lowering paths that use the 32-bit hardware
|
||||
* instructions so are likely better than the full lowering.
|
||||
*/
|
||||
return false;
|
||||
|
||||
default:
|
||||
/* otherwise, lower 64-bit, since the hw ops are at most 32-bit. */
|
||||
return intr->def.bit_size == 64;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
agx_nir_lower_subgroups(nir_shader *s)
|
||||
{
|
||||
/* First, do as much common lowering as we can */
|
||||
nir_lower_subgroups_options opts = {
|
||||
.filter = lower_subgroup_filter,
|
||||
.lower_read_first_invocation = true,
|
||||
.lower_inverse_ballot = true,
|
||||
.lower_to_scalar = true,
|
||||
.lower_relative_shuffle = true,
|
||||
.lower_rotate_to_shuffle = true,
|
||||
.lower_subgroup_masks = true,
|
||||
.lower_elect = true,
|
||||
.lower_reduce = true,
|
||||
.ballot_components = 1,
|
||||
.ballot_bit_size = 32,
|
||||
.subgroup_size = 32,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue