nak,nir: Add 64-bit lea_nv

Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32517>
2025-12-20 07:20:10 +01:00 · 2025-02-10 18:56:26 -05:00 · 2025-02-10 18:56:26 -05:00 · 11b8c8b8e6
commit 11b8c8b8e6
parent c92a92e72b
5 changed files with 105 additions and 2 deletions
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@ -1414,7 +1414,7 @@ opcode("prmt_nv", 0, tuint32, [0, 0, 0], [tuint32, tuint32, tuint32],

 # Address arithmetic instructions: shift and add
 # Shift must be a constant.
-opcode("lea_nv", 0, tuint32, [0, 0, 0], [tuint32, tuint32, tuint32], False,
+opcode("lea_nv", 0, tuint, [0, 0, 0], [tuint, tuint, tuint32], False,
       "", "src0 + (src1 << (src2 % bit_size))")

 # 24b multiply into 32b result (with sign extension)
--- a/src/nouveau/compiler/nak/builder.rs
+++ b/src/nouveau/compiler/nak/builder.rs
@ -630,6 +630,54 @@ pub trait SSABuilder: Builder {
        dst
    }

+    fn lea64(&mut self, a: Src, b: Src, shift: u8) -> SSARef {
+        assert!(self.sm() >= 70);
+        assert!(a.src_mod.is_none());
+        assert!(b.src_mod.is_none());
+
+        let a = a.as_ssa().unwrap();
+        let b = b.as_ssa().unwrap();
+        let dst = self.alloc_ssa(RegFile::GPR, 2);
+        let shift = shift % 64;
+        if shift >= 32 {
+            self.copy_to(dst[0].into(), b[0].into());
+            self.push_op(OpLea {
+                dst: dst[1].into(),
+                overflow: Dst::None,
+                a: a[0].into(),
+                b: b[1].into(),
+                a_high: 0.into(),
+                dst_high: false,
+                shift: shift - 32,
+                intermediate_mod: SrcMod::None,
+            });
+        } else {
+            let carry = self.alloc_ssa(RegFile::Pred, 1);
+            self.push_op(OpLea {
+                dst: dst[0].into(),
+                overflow: carry.into(),
+                a: a[0].into(),
+                b: b[0].into(),
+                a_high: 0.into(),
+                dst_high: false,
+                shift: shift,
+                intermediate_mod: SrcMod::None,
+            });
+            self.push_op(OpLeaX {
+                dst: dst[1].into(),
+                overflow: Dst::None,
+                a: a[0].into(),
+                b: b[1].into(),
+                a_high: a[1].into(),
+                carry: carry.into(),
+                dst_high: true,
+                shift: shift,
+                intermediate_mod: SrcMod::None,
+            });
+        }
+        dst
+    }
+
    fn lop2(&mut self, op: LogicOp2, x: Src, y: Src) -> SSARef {
        let dst = if x.is_predicate() {
            self.alloc_ssa(RegFile::Pred, 1)
--- a/src/nouveau/compiler/nak/from_nir.rs
+++ b/src/nouveau/compiler/nak/from_nir.rs
@ -1482,7 +1482,11 @@ impl<'a> ShaderFromNir<'a> {
                let src_a = srcs[1];
                let src_b = srcs[0];
                let shift = nir_srcs[2].comp_as_uint(0).unwrap() as u8;
-                b.lea(src_a, src_b, shift)
+                match alu.def.bit_size {
+                    32 => b.lea(src_a, src_b, shift),
+                    64 => b.lea64(src_a, src_b, shift),
+                    x => panic!("unsupported bit size for nir_op_lea_nv: {x}"),
+                }
            }
            nir_op_isub => match alu.def.bit_size {
                32 => b.iadd(srcs[0], srcs[1].ineg(), 0.into()),
--- a/src/nouveau/compiler/nak/hw_tests.rs
+++ b/src/nouveau/compiler/nak/hw_tests.rs
@ -788,6 +788,55 @@ fn test_op_leax() {
    }
 }

+#[test]
+fn test_lea64() {
+    let run = RunSingleton::get();
+    let invocations = 100;
+
+    for shift in 0..64 {
+        let mut b = TestShaderBuilder::new(run.sm.as_ref());
+
+        let x = Src::from([
+            b.ld_test_data(0, MemType::B32)[0],
+            b.ld_test_data(4, MemType::B32)[0],
+        ]);
+
+        let y = Src::from([
+            b.ld_test_data(8, MemType::B32)[0],
+            b.ld_test_data(12, MemType::B32)[0],
+        ]);
+
+        let dst = b.lea64(x, y, shift);
+        b.st_test_data(16, MemType::B32, dst[0].into());
+        b.st_test_data(20, MemType::B32, dst[1].into());
+
+        let bin = b.compile();
+
+        let mut a = Acorn::new();
+        let mut data = Vec::new();
+        for _ in 0..invocations {
+            data.push([
+                get_iadd_int(&mut a),
+                get_iadd_int(&mut a),
+                get_iadd_int(&mut a),
+                get_iadd_int(&mut a),
+                0,
+                0,
+            ]);
+        }
+
+        run.run.run(&bin, &mut data).unwrap();
+
+        for d in &data {
+            let x = u64::from(d[0]) | (u64::from(d[1]) << 32);
+            let y = u64::from(d[2]) | (u64::from(d[3]) << 32);
+            let dst = (x << shift).wrapping_add(y);
+            assert_eq!(d[4], dst as u32);
+            assert_eq!(d[5], (dst >> 32) as u32);
+        }
+    }
+}
+
 #[test]
 fn test_op_lop2() {
    if RunSingleton::get().sm.sm() < 70 {
--- a/src/nouveau/compiler/nak_nir_algebraic.py
+++ b/src/nouveau/compiler/nak_nir_algebraic.py
@ -42,6 +42,8 @@ algebraic_lowering = [

    (('iadd(is_used_by_non_ldc_nv)', 'a@32', ('ishl', 'b@32', '#s@32')),
        ('lea_nv', a, b, s), 'nak->sm >= 70'),
+    (('iadd', 'a@64', ('ishl', 'b@64', '#s@32')),
+        ('lea_nv', a, b, s), 'nak->sm >= 70'),
 ]

 def main():