nak: Implement 64-bit nir_op_fsign

There is NIR lowering for this but this implementation is more efficient. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26587>
2025-12-24 15:20:10 +01:00 · 2023-12-18 17:50:32 -06:00 · 2023-12-18 17:50:32 -06:00 · 1f5623c557
commit 1f5623c557
parent d03cbac05a
2 changed files with 26 additions and 4 deletions
--- a/src/nouveau/compiler/nak/builder.rs
+++ b/src/nouveau/compiler/nak/builder.rs
@ -217,6 +217,18 @@ pub trait SSABuilder: Builder {
        dst
    }

+    fn dsetp(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSARef {
+        let dst = self.alloc_ssa(RegFile::Pred, 1);
+        self.push_op(OpDSetP {
+            dst: dst.into(),
+            set_op: PredSetOp::And,
+            cmp_op: cmp_op,
+            srcs: [x, y],
+            accum: SrcRef::True.into(),
+        });
+        dst
+    }
+
    fn iabs(&mut self, i: Src) -> SSARef {
        let dst = self.alloc_ssa(RegFile::GPR, 1);
        self.push_op(OpIAbs {
--- a/src/nouveau/compiler/nak/from_nir.rs
+++ b/src/nouveau/compiler/nak/from_nir.rs
@ -885,10 +885,20 @@ impl<'a> ShaderFromNir<'a> {
                }
            }
            nir_op_fsign => {
-                assert!(alu.def.bit_size() == 32);
-                let lz = b.fset(FloatCmpOp::OrdLt, srcs[0], 0.into());
-                let gz = b.fset(FloatCmpOp::OrdGt, srcs[0], 0.into());
-                b.fadd(gz.into(), Src::from(lz).fneg())
+                if alu.def.bit_size() == 64 {
+                    let lz = b.dsetp(FloatCmpOp::OrdLt, srcs[0], 0.into());
+                    let gz = b.dsetp(FloatCmpOp::OrdGt, srcs[0], 0.into());
+                    let hi = b.sel(lz.into(), 0xbff00000.into(), 0.into());
+                    let hi = b.sel(gz.into(), 0x3ff00000.into(), hi.into());
+                    let lo = b.copy(0.into());
+                    [lo[0], hi[0]].into()
+                } else if alu.def.bit_size() == 32 {
+                    let lz = b.fset(FloatCmpOp::OrdLt, srcs[0], 0.into());
+                    let gz = b.fset(FloatCmpOp::OrdGt, srcs[0], 0.into());
+                    b.fadd(gz.into(), Src::from(lz).fneg())
+                } else {
+                    panic!("Unsupported float type: f{}", alu.def.bit_size());
+                }
            }
            nir_op_fsin => {
                let frac_1_2pi = 1.0 / (2.0 * std::f32::consts::PI);