nak: switch to derivative intrinsics

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30573>
2026-05-09 02:28:10 +02:00 · 2024-07-24 11:36:35 -04:00 · 2024-07-24 11:36:35 -04:00 · 8c501a121f
commit 8c501a121f
parent 185379ba81
2 changed files with 72 additions and 66 deletions
--- a/src/nouveau/compiler/nak/api.rs
+++ b/src/nouveau/compiler/nak/api.rs
@ -163,6 +163,8 @@ fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options {
    op.discard_is_demote = true;

    op.max_unroll_iterations = 32;
+    op.has_ddx_intrinsics = true;
+    op.scalarize_ddx = true;

    op
 }
--- a/src/nouveau/compiler/nak/from_nir.rs
+++ b/src/nouveau/compiler/nak/from_nir.rs
@ -1660,72 +1660,6 @@ impl<'a> ShaderFromNir<'a> {
                    b.shr(srcs[0], srcs[1], false)
                }
            }
-            nir_op_fddx | nir_op_fddx_coarse | nir_op_fddx_fine => {
-                // TODO: Real coarse derivatives
-
-                assert!(alu.def.bit_size() == 32);
-                let ftype = FloatType::F32;
-                let scratch = b.alloc_ssa(RegFile::GPR, 1);
-
-                b.push_op(OpShfl {
-                    dst: scratch[0].into(),
-                    in_bounds: Dst::None,
-                    src: srcs[0],
-                    lane: 1_u32.into(),
-                    c: (0x3_u32 | 0x1c_u32 << 8).into(),
-                    op: ShflOp::Bfly,
-                });
-
-                let dst = b.alloc_ssa(RegFile::GPR, 1);
-
-                b.push_op(OpFSwzAdd {
-                    dst: dst[0].into(),
-                    srcs: [scratch[0].into(), srcs[0]],
-                    ops: [
-                        FSwzAddOp::SubLeft,
-                        FSwzAddOp::SubRight,
-                        FSwzAddOp::SubLeft,
-                        FSwzAddOp::SubRight,
-                    ],
-                    rnd_mode: self.float_ctl[ftype].rnd_mode,
-                    ftz: self.float_ctl[ftype].ftz,
-                });
-
-                dst
-            }
-            nir_op_fddy | nir_op_fddy_coarse | nir_op_fddy_fine => {
-                // TODO: Real coarse derivatives
-
-                assert!(alu.def.bit_size() == 32);
-                let ftype = FloatType::F32;
-                let scratch = b.alloc_ssa(RegFile::GPR, 1);
-
-                b.push_op(OpShfl {
-                    dst: scratch[0].into(),
-                    in_bounds: Dst::None,
-                    src: srcs[0],
-                    lane: 2_u32.into(),
-                    c: (0x3_u32 | 0x1c_u32 << 8).into(),
-                    op: ShflOp::Bfly,
-                });
-
-                let dst = b.alloc_ssa(RegFile::GPR, 1);
-
-                b.push_op(OpFSwzAdd {
-                    dst: dst[0].into(),
-                    srcs: [scratch[0].into(), srcs[0]],
-                    ops: [
-                        FSwzAddOp::SubLeft,
-                        FSwzAddOp::SubLeft,
-                        FSwzAddOp::SubRight,
-                        FSwzAddOp::SubRight,
-                    ],
-                    rnd_mode: self.float_ctl[ftype].rnd_mode,
-                    ftz: self.float_ctl[ftype].ftz,
-                });
-
-                dst
-            }
            _ => panic!("Unsupported ALU instruction: {}", alu.info().name()),
        };
        self.set_dst(&alu.def, dst);
@ -2132,6 +2066,76 @@ impl<'a> ShaderFromNir<'a> {
                }
                self.set_ssa(&intrin.def, dst);
            }
+            nir_intrinsic_ddx
+            | nir_intrinsic_ddx_coarse
+            | nir_intrinsic_ddx_fine => {
+                // TODO: Real coarse derivatives
+
+                assert!(intrin.def.bit_size() == 32);
+                let ftype = FloatType::F32;
+                let scratch = b.alloc_ssa(RegFile::GPR, 1);
+
+                b.push_op(OpShfl {
+                    dst: scratch[0].into(),
+                    in_bounds: Dst::None,
+                    src: self.get_src(&srcs[0]),
+                    lane: 1_u32.into(),
+                    c: (0x3_u32 | 0x1c_u32 << 8).into(),
+                    op: ShflOp::Bfly,
+                });
+
+                let dst = b.alloc_ssa(RegFile::GPR, 1);
+
+                b.push_op(OpFSwzAdd {
+                    dst: dst[0].into(),
+                    srcs: [scratch[0].into(), self.get_src(&srcs[0])],
+                    ops: [
+                        FSwzAddOp::SubLeft,
+                        FSwzAddOp::SubRight,
+                        FSwzAddOp::SubLeft,
+                        FSwzAddOp::SubRight,
+                    ],
+                    rnd_mode: self.float_ctl[ftype].rnd_mode,
+                    ftz: self.float_ctl[ftype].ftz,
+                });
+
+                self.set_dst(&intrin.def, dst);
+            }
+            nir_intrinsic_ddy
+            | nir_intrinsic_ddy_coarse
+            | nir_intrinsic_ddy_fine => {
+                // TODO: Real coarse derivatives
+
+                assert!(intrin.def.bit_size() == 32);
+                let ftype = FloatType::F32;
+                let scratch = b.alloc_ssa(RegFile::GPR, 1);
+
+                b.push_op(OpShfl {
+                    dst: scratch[0].into(),
+                    in_bounds: Dst::None,
+                    src: self.get_src(&srcs[0]),
+                    lane: 2_u32.into(),
+                    c: (0x3_u32 | 0x1c_u32 << 8).into(),
+                    op: ShflOp::Bfly,
+                });
+
+                let dst = b.alloc_ssa(RegFile::GPR, 1);
+
+                b.push_op(OpFSwzAdd {
+                    dst: dst[0].into(),
+                    srcs: [scratch[0].into(), self.get_src(&srcs[0])],
+                    ops: [
+                        FSwzAddOp::SubLeft,
+                        FSwzAddOp::SubLeft,
+                        FSwzAddOp::SubRight,
+                        FSwzAddOp::SubRight,
+                    ],
+                    rnd_mode: self.float_ctl[ftype].rnd_mode,
+                    ftz: self.float_ctl[ftype].ftz,
+                });
+
+                self.set_dst(&intrin.def, dst);
+            }
            nir_intrinsic_ballot => {
                assert!(srcs[0].bit_size() == 1);
                let src = self.get_src(&srcs[0]);