nak: Implement 64-bit comparisons

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26246>
2025-12-24 19:40:10 +01:00 · 2023-12-04 13:03:35 -06:00 · 2023-12-04 13:03:35 -06:00 · d2cec80768
commit d2cec80768
parent 8747a25d65
6 changed files with 97 additions and 22 deletions
--- a/src/nouveau/compiler/nak.rs
+++ b/src/nouveau/compiler/nak.rs
@ -129,8 +129,10 @@ fn nir_options(_dev: &nv_device_info) -> nir_shader_compiler_options {
    op.lower_usub_sat = true; // TODO
    op.lower_iadd_sat = true; // TODO
    op.use_interpolated_input_intrinsics = true;
-    op.lower_int64_options =
-        !(nir_lower_iadd64 | nir_lower_ineg64 | nir_lower_shift64);
+    op.lower_int64_options = !(nir_lower_icmp64
+        | nir_lower_iadd64
+        | nir_lower_ineg64
+        | nir_lower_shift64);
    op.lower_ldexp = true;
    op.lower_fmod = true;
    op.lower_ffract = true;
--- a/src/nouveau/compiler/nak_builder.rs
+++ b/src/nouveau/compiler/nak_builder.rs
@ -201,12 +201,61 @@ pub trait SSABuilder: Builder {
            set_op: PredSetOp::And,
            cmp_op: cmp_op,
            cmp_type: cmp_type,
+            ex: false,
            srcs: [x, y],
-            accum: SrcRef::True.into(),
+            accum: true.into(),
+            low_cmp: true.into(),
        });
        dst
    }

+    fn isetp64(
+        &mut self,
+        cmp_type: IntCmpType,
+        cmp_op: IntCmpOp,
+        x: Src,
+        y: Src,
+    ) -> SSARef {
+        let x = x.as_ssa().unwrap();
+        let y = y.as_ssa().unwrap();
+
+        // Low bits are always an unsigned comparison
+        let low = self.isetp(IntCmpType::U32, cmp_op, x[0].into(), y[0].into());
+
+        let dst = self.alloc_ssa(RegFile::Pred, 1);
+        match cmp_op {
+            IntCmpOp::Eq | IntCmpOp::Ne => {
+                self.push_op(OpISetP {
+                    dst: dst.into(),
+                    set_op: match cmp_op {
+                        IntCmpOp::Eq => PredSetOp::And,
+                        IntCmpOp::Ne => PredSetOp::Or,
+                        _ => panic!("Not an integer equality"),
+                    },
+                    cmp_op: cmp_op,
+                    cmp_type: IntCmpType::U32,
+                    ex: false,
+                    srcs: [x[1].into(), y[1].into()],
+                    accum: low.into(),
+                    low_cmp: true.into(),
+                });
+            }
+            IntCmpOp::Ge | IntCmpOp::Gt | IntCmpOp::Le | IntCmpOp::Lt => {
+                self.push_op(OpISetP {
+                    dst: dst.into(),
+                    set_op: PredSetOp::And,
+                    cmp_op: cmp_op,
+                    cmp_type: cmp_type,
+                    ex: true,
+                    srcs: [x[1].into(), y[1].into()],
+                    accum: true.into(),
+                    low_cmp: low.into(),
+                });
+            }
+        }
+        dst
+    }
+
    fn lop2(&mut self, op: LogicOp, x: Src, y: Src) -> SSARef {
        let dst = if x.is_predicate() {
            self.alloc_ssa(RegFile::Pred, 1)
--- a/src/nouveau/compiler/nak_encode_sm70.rs
+++ b/src/nouveau/compiler/nak_encode_sm70.rs
@ -704,7 +704,8 @@ impl SM70Instr {
            ALUSrc::None,
        );

-        self.set_pred_src(68..71, 71, SrcRef::True.into());
+        self.set_pred_src(68..71, 71, op.low_cmp);
+        self.set_bit(72, op.ex);

        self.set_field(
            73..74,
--- a/src/nouveau/compiler/nak_from_nir.rs
+++ b/src/nouveau/compiler/nak_from_nir.rs
@ -820,7 +820,10 @@ impl<'a> ShaderFromNir<'a> {
                if alu.get_src(0).bit_size() == 1 {
                    let lop = LogicOp::new_lut(&|x, y, _| !(x ^ y));
                    b.lop2(lop, srcs[0], srcs[1])
+                } else if alu.get_src(0).bit_size() == 64 {
+                    b.isetp64(IntCmpType::I32, IntCmpOp::Eq, srcs[0], srcs[1])
                } else {
+                    assert!(alu.get_src(0).bit_size() == 32);
                    b.isetp(IntCmpType::I32, IntCmpOp::Eq, srcs[0], srcs[1])
                }
            }
@ -834,18 +837,21 @@ impl<'a> ShaderFromNir<'a> {
                });
                dst
            }
-            nir_op_ige => {
-                b.isetp(IntCmpType::I32, IntCmpOp::Ge, srcs[0], srcs[1])
-            }
-            nir_op_ilt => {
-                b.isetp(IntCmpType::I32, IntCmpOp::Lt, srcs[0], srcs[1])
-            }
-            nir_op_ine => {
-                if alu.get_src(0).bit_size() == 1 {
-                    let lop = LogicOp::new_lut(&|x, y, _| (x ^ y));
-                    b.lop2(lop, srcs[0], srcs[1])
+            nir_op_ige | nir_op_ilt | nir_op_uge | nir_op_ult => {
+                let x = *srcs[0].as_ssa().unwrap();
+                let y = *srcs[1].as_ssa().unwrap();
+                let (cmp_type, cmp_op) = match alu.op {
+                    nir_op_ige => (IntCmpType::I32, IntCmpOp::Ge),
+                    nir_op_ilt => (IntCmpType::I32, IntCmpOp::Lt),
+                    nir_op_uge => (IntCmpType::U32, IntCmpOp::Ge),
+                    nir_op_ult => (IntCmpType::U32, IntCmpOp::Lt),
+                    _ => panic!("Not an integer comparison"),
+                };
+                if alu.get_src(0).bit_size() == 64 {
+                    b.isetp64(cmp_type, cmp_op, x.into(), y.into())
                } else {
-                    b.isetp(IntCmpType::I32, IntCmpOp::Ne, srcs[0], srcs[1])
+                    assert!(alu.get_src(0).bit_size() == 32);
+                    b.isetp(cmp_type, cmp_op, x.into(), y.into())
                }
            }
            nir_op_imax | nir_op_imin | nir_op_umax | nir_op_umin => {
@ -881,6 +887,17 @@ impl<'a> ShaderFromNir<'a> {
                });
                dst[1].into()
            }
+            nir_op_ine => {
+                if alu.get_src(0).bit_size() == 1 {
+                    let lop = LogicOp::new_lut(&|x, y, _| x ^ y);
+                    b.lop2(lop, srcs[0], srcs[1])
+                } else if alu.get_src(0).bit_size() == 64 {
+                    b.isetp64(IntCmpType::I32, IntCmpOp::Ne, srcs[0], srcs[1])
+                } else {
+                    assert!(alu.get_src(0).bit_size() == 32);
+                    b.isetp(IntCmpType::I32, IntCmpOp::Ne, srcs[0], srcs[1])
+                }
+            }
            nir_op_ineg => {
                if alu.def.bit_size == 64 {
                    let x = srcs[0].as_ssa().unwrap();
@ -1062,12 +1079,6 @@ impl<'a> ShaderFromNir<'a> {
                });
                dst
            }
-            nir_op_uge => {
-                b.isetp(IntCmpType::U32, IntCmpOp::Ge, srcs[0], srcs[1])
-            }
-            nir_op_ult => {
-                b.isetp(IntCmpType::U32, IntCmpOp::Lt, srcs[0], srcs[1])
-            }
            nir_op_unpack_32_2x16_split_x => {
                b.prmt(srcs[0], 0.into(), [0, 1, 4, 4])
            }
--- a/src/nouveau/compiler/nak_ir.rs
+++ b/src/nouveau/compiler/nak_ir.rs
@ -2514,12 +2514,16 @@ pub struct OpISetP {
    pub set_op: PredSetOp,
    pub cmp_op: IntCmpOp,
    pub cmp_type: IntCmpType,
+    pub ex: bool,

    #[src_type(ALU)]
    pub srcs: [Src; 2],

    #[src_type(Pred)]
    pub accum: Src,
+
+    #[src_type(Pred)]
+    pub low_cmp: Src,
 }

 impl DisplayOp for OpISetP {
@ -2528,10 +2532,16 @@ impl DisplayOp for OpISetP {
        if !self.set_op.is_trivial(&self.accum) {
            write!(f, "{}", self.set_op)?;
        }
+        if self.ex {
+            write!(f, ".ex")?;
+        }
        write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
        if !self.set_op.is_trivial(&self.accum) {
            write!(f, " {}", self.accum)?;
        }
+        if self.ex {
+            write!(f, " {}", self.low_cmp)?;
+        }
        Ok(())
    }
 }
--- a/src/nouveau/compiler/nak_spill_values.rs
+++ b/src/nouveau/compiler/nak_spill_values.rs
@ -130,8 +130,10 @@ impl Spill for SpillPred {
            set_op: PredSetOp::And,
            cmp_op: IntCmpOp::Ne,
            cmp_type: IntCmpType::U32,
+            ex: false,
            srcs: [src.into(), Src::new_zero()],
-            accum: SrcRef::True.into(),
+            accum: true.into(),
+            low_cmp: true.into(),
        })
    }
 }