nak: Implement nir_intrinsic_reduce with REDUX
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34912>
This commit is contained in:
Mel Henning 2025-05-09 17:23:18 -04:00 committed by Marge Bot
parent b165c07b38
commit 295373f29f
4 changed files with 43 additions and 5 deletions

View file

@ -3449,6 +3449,31 @@ impl<'a> ShaderFromNir<'a> {
}); });
self.set_dst(&intrin.def, dst.into()); self.set_dst(&intrin.def, dst.into());
} }
nir_intrinsic_reduce => {
assert!(srcs[0].bit_size() == 32);
assert!(srcs[0].num_components() == 1);
let src = self.get_src(&srcs[0]);
let dst = b.alloc_ssa(RegFile::UGPR);
let op = match intrin.reduction_op() {
nir_op_iand => ReduxOp::And,
nir_op_ior => ReduxOp::Or,
nir_op_ixor => ReduxOp::Xor,
nir_op_iadd => ReduxOp::Sum,
nir_op_imin => ReduxOp::Min(IntCmpType::I32),
nir_op_imax => ReduxOp::Max(IntCmpType::I32),
nir_op_umin => ReduxOp::Min(IntCmpType::U32),
nir_op_umax => ReduxOp::Max(IntCmpType::U32),
_ => panic!("Unknown reduction op"),
};
b.push_op(OpRedux {
dst: dst.into(),
src: src,
op,
});
self.set_dst(&intrin.def, dst.into());
}
nir_intrinsic_shared_atomic => { nir_intrinsic_shared_atomic => {
let bit_size = intrin.def.bit_size(); let bit_size = intrin.def.bit_size();
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24); let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);

View file

@ -998,7 +998,7 @@ nak_postprocess_nir(nir_shader *nir,
nir_divergence_analysis(nir); nir_divergence_analysis(nir);
if (nir->info.stage == MESA_SHADER_FRAGMENT) if (nir->info.stage == MESA_SHADER_FRAGMENT)
OPT(nir, nir_opt_tex_skip_helpers, true); OPT(nir, nir_opt_tex_skip_helpers, true);
OPT(nir, nak_nir_lower_scan_reduce); OPT(nir, nak_nir_lower_scan_reduce, nak);
nak_optimize_nir(nir, nak); nak_optimize_nir(nir, nak);

View file

@ -193,8 +193,10 @@ build_scan_reduce(nir_builder *b, nir_intrinsic_op op, nir_op red_op,
static bool static bool
nak_nir_lower_scan_reduce_intrin(nir_builder *b, nak_nir_lower_scan_reduce_intrin(nir_builder *b,
nir_intrinsic_instr *intrin, nir_intrinsic_instr *intrin,
UNUSED void *_data) void *_nak)
{ {
const struct nak_compiler *nak = (const struct nak_compiler *) _nak;
switch (intrin->intrinsic) { switch (intrin->intrinsic) {
case nir_intrinsic_exclusive_scan: case nir_intrinsic_exclusive_scan:
case nir_intrinsic_inclusive_scan: case nir_intrinsic_inclusive_scan:
@ -221,6 +223,17 @@ nak_nir_lower_scan_reduce_intrin(nir_builder *b,
/* Simple case where we're not actually doing any reducing at all. */ /* Simple case where we're not actually doing any reducing at all. */
assert(intrin->intrinsic == nir_intrinsic_reduce); assert(intrin->intrinsic == nir_intrinsic_reduce);
data = intrin->src[0].ssa; data = intrin->src[0].ssa;
} else if (intrin->intrinsic == nir_intrinsic_reduce &&
nak->sm >= 80 &&
red_op != nir_op_imul &&
nir_op_infos[red_op].output_type != nir_type_float &&
intrin->src[0].ssa->bit_size == 32 &&
cluster_size == 32 &&
!intrin->instr.block->divergent) {
/* TODO: We could probably also use REDUX for the non-uniform case if we
* were allowed to write uregs from non-uniform control flow.
*/
return false;
} else if (intrin->src[0].ssa->bit_size == 1) { } else if (intrin->src[0].ssa->bit_size == 1) {
data = build_scan_bool(b, intrin->intrinsic, red_op, data = build_scan_bool(b, intrin->intrinsic, red_op,
intrin->src[0].ssa, cluster_size); intrin->src[0].ssa, cluster_size);
@ -252,8 +265,8 @@ nak_nir_lower_scan_reduce_intrin(nir_builder *b,
} }
bool bool
nak_nir_lower_scan_reduce(nir_shader *nir) nak_nir_lower_scan_reduce(nir_shader *nir, const struct nak_compiler *nak)
{ {
return nir_shader_intrinsics_pass(nir, nak_nir_lower_scan_reduce_intrin, return nir_shader_intrinsics_pass(nir, nak_nir_lower_scan_reduce_intrin,
nir_metadata_none, NULL); nir_metadata_none, (void*) nak);
} }

View file

@ -198,7 +198,7 @@ static_assert(sizeof(struct nak_nir_tex_flags) == 4,
_u; \ _u; \
}) })
bool nak_nir_lower_scan_reduce(nir_shader *shader); bool nak_nir_lower_scan_reduce(nir_shader *shader, const struct nak_compiler *nak);
bool nak_nir_lower_tex(nir_shader *nir, const struct nak_compiler *nak); bool nak_nir_lower_tex(nir_shader *nir, const struct nak_compiler *nak);
bool nak_nir_lower_gs_intrinsics(nir_shader *shader); bool nak_nir_lower_gs_intrinsics(nir_shader *shader);
bool nak_nir_lower_algebraic_late(nir_shader *nir, const struct nak_compiler *nak); bool nak_nir_lower_algebraic_late(nir_shader *nir, const struct nak_compiler *nak);