mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 15:40:11 +01:00
nak: Implement nir_intrinsic_reduce with REDUX
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34912>
This commit is contained in:
parent
b165c07b38
commit
295373f29f
4 changed files with 43 additions and 5 deletions
|
|
@ -3449,6 +3449,31 @@ impl<'a> ShaderFromNir<'a> {
|
||||||
});
|
});
|
||||||
self.set_dst(&intrin.def, dst.into());
|
self.set_dst(&intrin.def, dst.into());
|
||||||
}
|
}
|
||||||
|
nir_intrinsic_reduce => {
|
||||||
|
assert!(srcs[0].bit_size() == 32);
|
||||||
|
assert!(srcs[0].num_components() == 1);
|
||||||
|
let src = self.get_src(&srcs[0]);
|
||||||
|
let dst = b.alloc_ssa(RegFile::UGPR);
|
||||||
|
|
||||||
|
let op = match intrin.reduction_op() {
|
||||||
|
nir_op_iand => ReduxOp::And,
|
||||||
|
nir_op_ior => ReduxOp::Or,
|
||||||
|
nir_op_ixor => ReduxOp::Xor,
|
||||||
|
nir_op_iadd => ReduxOp::Sum,
|
||||||
|
nir_op_imin => ReduxOp::Min(IntCmpType::I32),
|
||||||
|
nir_op_imax => ReduxOp::Max(IntCmpType::I32),
|
||||||
|
nir_op_umin => ReduxOp::Min(IntCmpType::U32),
|
||||||
|
nir_op_umax => ReduxOp::Max(IntCmpType::U32),
|
||||||
|
_ => panic!("Unknown reduction op"),
|
||||||
|
};
|
||||||
|
|
||||||
|
b.push_op(OpRedux {
|
||||||
|
dst: dst.into(),
|
||||||
|
src: src,
|
||||||
|
op,
|
||||||
|
});
|
||||||
|
self.set_dst(&intrin.def, dst.into());
|
||||||
|
}
|
||||||
nir_intrinsic_shared_atomic => {
|
nir_intrinsic_shared_atomic => {
|
||||||
let bit_size = intrin.def.bit_size();
|
let bit_size = intrin.def.bit_size();
|
||||||
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
||||||
|
|
|
||||||
|
|
@ -998,7 +998,7 @@ nak_postprocess_nir(nir_shader *nir,
|
||||||
nir_divergence_analysis(nir);
|
nir_divergence_analysis(nir);
|
||||||
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||||
OPT(nir, nir_opt_tex_skip_helpers, true);
|
OPT(nir, nir_opt_tex_skip_helpers, true);
|
||||||
OPT(nir, nak_nir_lower_scan_reduce);
|
OPT(nir, nak_nir_lower_scan_reduce, nak);
|
||||||
|
|
||||||
nak_optimize_nir(nir, nak);
|
nak_optimize_nir(nir, nak);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -193,8 +193,10 @@ build_scan_reduce(nir_builder *b, nir_intrinsic_op op, nir_op red_op,
|
||||||
static bool
|
static bool
|
||||||
nak_nir_lower_scan_reduce_intrin(nir_builder *b,
|
nak_nir_lower_scan_reduce_intrin(nir_builder *b,
|
||||||
nir_intrinsic_instr *intrin,
|
nir_intrinsic_instr *intrin,
|
||||||
UNUSED void *_data)
|
void *_nak)
|
||||||
{
|
{
|
||||||
|
const struct nak_compiler *nak = (const struct nak_compiler *) _nak;
|
||||||
|
|
||||||
switch (intrin->intrinsic) {
|
switch (intrin->intrinsic) {
|
||||||
case nir_intrinsic_exclusive_scan:
|
case nir_intrinsic_exclusive_scan:
|
||||||
case nir_intrinsic_inclusive_scan:
|
case nir_intrinsic_inclusive_scan:
|
||||||
|
|
@ -221,6 +223,17 @@ nak_nir_lower_scan_reduce_intrin(nir_builder *b,
|
||||||
/* Simple case where we're not actually doing any reducing at all. */
|
/* Simple case where we're not actually doing any reducing at all. */
|
||||||
assert(intrin->intrinsic == nir_intrinsic_reduce);
|
assert(intrin->intrinsic == nir_intrinsic_reduce);
|
||||||
data = intrin->src[0].ssa;
|
data = intrin->src[0].ssa;
|
||||||
|
} else if (intrin->intrinsic == nir_intrinsic_reduce &&
|
||||||
|
nak->sm >= 80 &&
|
||||||
|
red_op != nir_op_imul &&
|
||||||
|
nir_op_infos[red_op].output_type != nir_type_float &&
|
||||||
|
intrin->src[0].ssa->bit_size == 32 &&
|
||||||
|
cluster_size == 32 &&
|
||||||
|
!intrin->instr.block->divergent) {
|
||||||
|
/* TODO: We could probably also use REDUX for the non-uniform case if we
|
||||||
|
* were allowed to write uregs from non-uniform control flow.
|
||||||
|
*/
|
||||||
|
return false;
|
||||||
} else if (intrin->src[0].ssa->bit_size == 1) {
|
} else if (intrin->src[0].ssa->bit_size == 1) {
|
||||||
data = build_scan_bool(b, intrin->intrinsic, red_op,
|
data = build_scan_bool(b, intrin->intrinsic, red_op,
|
||||||
intrin->src[0].ssa, cluster_size);
|
intrin->src[0].ssa, cluster_size);
|
||||||
|
|
@ -252,8 +265,8 @@ nak_nir_lower_scan_reduce_intrin(nir_builder *b,
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
nak_nir_lower_scan_reduce(nir_shader *nir)
|
nak_nir_lower_scan_reduce(nir_shader *nir, const struct nak_compiler *nak)
|
||||||
{
|
{
|
||||||
return nir_shader_intrinsics_pass(nir, nak_nir_lower_scan_reduce_intrin,
|
return nir_shader_intrinsics_pass(nir, nak_nir_lower_scan_reduce_intrin,
|
||||||
nir_metadata_none, NULL);
|
nir_metadata_none, (void*) nak);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -198,7 +198,7 @@ static_assert(sizeof(struct nak_nir_tex_flags) == 4,
|
||||||
_u; \
|
_u; \
|
||||||
})
|
})
|
||||||
|
|
||||||
bool nak_nir_lower_scan_reduce(nir_shader *shader);
|
bool nak_nir_lower_scan_reduce(nir_shader *shader, const struct nak_compiler *nak);
|
||||||
bool nak_nir_lower_tex(nir_shader *nir, const struct nak_compiler *nak);
|
bool nak_nir_lower_tex(nir_shader *nir, const struct nak_compiler *nak);
|
||||||
bool nak_nir_lower_gs_intrinsics(nir_shader *shader);
|
bool nak_nir_lower_gs_intrinsics(nir_shader *shader);
|
||||||
bool nak_nir_lower_algebraic_late(nir_shader *nir, const struct nak_compiler *nak);
|
bool nak_nir_lower_algebraic_late(nir_shader *nir, const struct nak_compiler *nak);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue