diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs
index acf0b8e6398..61e94ed1f91 100644
--- a/src/nouveau/compiler/nak/from_nir.rs
+++ b/src/nouveau/compiler/nak/from_nir.rs
@@ -3449,6 +3449,31 @@ impl<'a> ShaderFromNir<'a> {
                 });
                 self.set_dst(&intrin.def, dst.into());
             }
+            nir_intrinsic_reduce => {
+                assert!(srcs[0].bit_size() == 32);
+                assert!(srcs[0].num_components() == 1);
+                let src = self.get_src(&srcs[0]);
+                let dst = b.alloc_ssa(RegFile::UGPR);
+
+                let op = match intrin.reduction_op() {
+                    nir_op_iand => ReduxOp::And,
+                    nir_op_ior => ReduxOp::Or,
+                    nir_op_ixor => ReduxOp::Xor,
+                    nir_op_iadd => ReduxOp::Sum,
+                    nir_op_imin => ReduxOp::Min(IntCmpType::I32),
+                    nir_op_imax => ReduxOp::Max(IntCmpType::I32),
+                    nir_op_umin => ReduxOp::Min(IntCmpType::U32),
+                    nir_op_umax => ReduxOp::Max(IntCmpType::U32),
+                    _ => panic!("Unknown reduction op"),
+                };
+
+                b.push_op(OpRedux {
+                    dst: dst.into(),
+                    src: src,
+                    op,
+                });
+                self.set_dst(&intrin.def, dst.into());
+            }
             nir_intrinsic_shared_atomic => {
                 let bit_size = intrin.def.bit_size();
                 let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c
index b1f3e1167c8..5d6f43d2669 100644
--- a/src/nouveau/compiler/nak_nir.c
+++ b/src/nouveau/compiler/nak_nir.c
@@ -998,7 +998,7 @@ nak_postprocess_nir(nir_shader *nir,
    nir_divergence_analysis(nir);
    if (nir->info.stage == MESA_SHADER_FRAGMENT)
       OPT(nir, nir_opt_tex_skip_helpers, true);
-   OPT(nir, nak_nir_lower_scan_reduce);
+   OPT(nir, nak_nir_lower_scan_reduce, nak);
 
    nak_optimize_nir(nir, nak);
 
diff --git a/src/nouveau/compiler/nak_nir_lower_scan_reduce.c b/src/nouveau/compiler/nak_nir_lower_scan_reduce.c
index 26544ef117c..ddd0a9d7b81 100644
--- a/src/nouveau/compiler/nak_nir_lower_scan_reduce.c
+++ b/src/nouveau/compiler/nak_nir_lower_scan_reduce.c
@@ -193,8 +193,10 @@ build_scan_reduce(nir_builder *b, nir_intrinsic_op op, nir_op red_op,
 static bool
 nak_nir_lower_scan_reduce_intrin(nir_builder *b,
                                  nir_intrinsic_instr *intrin,
-                                 UNUSED void *_data)
+                                 void *_nak)
 {
+   const struct nak_compiler *nak = (const struct nak_compiler *) _nak;
+
    switch (intrin->intrinsic) {
    case nir_intrinsic_exclusive_scan:
    case nir_intrinsic_inclusive_scan:
@@ -221,6 +223,17 @@ nak_nir_lower_scan_reduce_intrin(nir_builder *b,
       /* Simple case where we're not actually doing any reducing at all. */
       assert(intrin->intrinsic == nir_intrinsic_reduce);
       data = intrin->src[0].ssa;
+   } else if (intrin->intrinsic == nir_intrinsic_reduce &&
+              nak->sm >= 80 &&
+              red_op != nir_op_imul &&
+              nir_op_infos[red_op].output_type != nir_type_float &&
+              intrin->src[0].ssa->bit_size == 32 &&
+              cluster_size == 32 &&
+              !intrin->instr.block->divergent) {
+      /* TODO: We could probably also use REDUX for the non-uniform case if we
+       *       were allowed to write uregs from non-uniform control flow.
+       */
+      return false;
    } else if (intrin->src[0].ssa->bit_size == 1) {
       data = build_scan_bool(b, intrin->intrinsic, red_op,
                              intrin->src[0].ssa, cluster_size);
@@ -252,8 +265,8 @@ nak_nir_lower_scan_reduce_intrin(nir_builder *b,
 }
 
 bool
-nak_nir_lower_scan_reduce(nir_shader *nir)
+nak_nir_lower_scan_reduce(nir_shader *nir, const struct nak_compiler *nak)
 {
    return nir_shader_intrinsics_pass(nir, nak_nir_lower_scan_reduce_intrin,
-                                     nir_metadata_none, NULL);
+                                     nir_metadata_none, (void*) nak);
 }
diff --git a/src/nouveau/compiler/nak_private.h b/src/nouveau/compiler/nak_private.h
index 63c080d262d..d1cf3e3f46c 100644
--- a/src/nouveau/compiler/nak_private.h
+++ b/src/nouveau/compiler/nak_private.h
@@ -198,7 +198,7 @@ static_assert(sizeof(struct nak_nir_tex_flags) == 4,
    _u; \
 })
 
-bool nak_nir_lower_scan_reduce(nir_shader *shader);
+bool nak_nir_lower_scan_reduce(nir_shader *shader, const struct nak_compiler *nak);
 bool nak_nir_lower_tex(nir_shader *nir, const struct nak_compiler *nak);
 bool nak_nir_lower_gs_intrinsics(nir_shader *shader);
 bool nak_nir_lower_algebraic_late(nir_shader *nir, const struct nak_compiler *nak);