From 11b8c8b8e672e6521e627f45c328a6533c23f77d Mon Sep 17 00:00:00 2001 From: Mel Henning Date: Mon, 10 Feb 2025 18:56:26 -0500 Subject: [PATCH] nak,nir: Add 64-bit lea_nv Reviewed-by: Faith Ekstrand Part-of: --- src/compiler/nir/nir_opcodes.py | 2 +- src/nouveau/compiler/nak/builder.rs | 48 ++++++++++++++++++++++ src/nouveau/compiler/nak/from_nir.rs | 6 ++- src/nouveau/compiler/nak/hw_tests.rs | 49 +++++++++++++++++++++++ src/nouveau/compiler/nak_nir_algebraic.py | 2 + 5 files changed, 105 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index ec4f6667f3e..619efaf64ad 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -1414,7 +1414,7 @@ opcode("prmt_nv", 0, tuint32, [0, 0, 0], [tuint32, tuint32, tuint32], # Address arithmetic instructions: shift and add # Shift must be a constant. -opcode("lea_nv", 0, tuint32, [0, 0, 0], [tuint32, tuint32, tuint32], False, +opcode("lea_nv", 0, tuint, [0, 0, 0], [tuint, tuint, tuint32], False, "", "src0 + (src1 << (src2 % bit_size))") # 24b multiply into 32b result (with sign extension) diff --git a/src/nouveau/compiler/nak/builder.rs b/src/nouveau/compiler/nak/builder.rs index c9fb98a1be7..54a1febb8b0 100644 --- a/src/nouveau/compiler/nak/builder.rs +++ b/src/nouveau/compiler/nak/builder.rs @@ -630,6 +630,54 @@ pub trait SSABuilder: Builder { dst } + fn lea64(&mut self, a: Src, b: Src, shift: u8) -> SSARef { + assert!(self.sm() >= 70); + assert!(a.src_mod.is_none()); + assert!(b.src_mod.is_none()); + + let a = a.as_ssa().unwrap(); + let b = b.as_ssa().unwrap(); + let dst = self.alloc_ssa(RegFile::GPR, 2); + let shift = shift % 64; + if shift >= 32 { + self.copy_to(dst[0].into(), b[0].into()); + self.push_op(OpLea { + dst: dst[1].into(), + overflow: Dst::None, + a: a[0].into(), + b: b[1].into(), + a_high: 0.into(), + dst_high: false, + shift: shift - 32, + intermediate_mod: SrcMod::None, + }); + } else { + let carry = self.alloc_ssa(RegFile::Pred, 1); + self.push_op(OpLea { + dst: dst[0].into(), + overflow: carry.into(), + a: a[0].into(), + b: b[0].into(), + a_high: 0.into(), + dst_high: false, + shift: shift, + intermediate_mod: SrcMod::None, + }); + self.push_op(OpLeaX { + dst: dst[1].into(), + overflow: Dst::None, + a: a[0].into(), + b: b[1].into(), + a_high: a[1].into(), + carry: carry.into(), + dst_high: true, + shift: shift, + intermediate_mod: SrcMod::None, + }); + } + dst + } + fn lop2(&mut self, op: LogicOp2, x: Src, y: Src) -> SSARef { let dst = if x.is_predicate() { self.alloc_ssa(RegFile::Pred, 1) diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index 909a366d990..f1945f92d80 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -1482,7 +1482,11 @@ impl<'a> ShaderFromNir<'a> { let src_a = srcs[1]; let src_b = srcs[0]; let shift = nir_srcs[2].comp_as_uint(0).unwrap() as u8; - b.lea(src_a, src_b, shift) + match alu.def.bit_size { + 32 => b.lea(src_a, src_b, shift), + 64 => b.lea64(src_a, src_b, shift), + x => panic!("unsupported bit size for nir_op_lea_nv: {x}"), + } } nir_op_isub => match alu.def.bit_size { 32 => b.iadd(srcs[0], srcs[1].ineg(), 0.into()), diff --git a/src/nouveau/compiler/nak/hw_tests.rs b/src/nouveau/compiler/nak/hw_tests.rs index 06fd4ce7215..140bdd1d291 100644 --- a/src/nouveau/compiler/nak/hw_tests.rs +++ b/src/nouveau/compiler/nak/hw_tests.rs @@ -788,6 +788,55 @@ fn test_op_leax() { } } +#[test] +fn test_lea64() { + let run = RunSingleton::get(); + let invocations = 100; + + for shift in 0..64 { + let mut b = TestShaderBuilder::new(run.sm.as_ref()); + + let x = Src::from([ + b.ld_test_data(0, MemType::B32)[0], + b.ld_test_data(4, MemType::B32)[0], + ]); + + let y = Src::from([ + b.ld_test_data(8, MemType::B32)[0], + b.ld_test_data(12, MemType::B32)[0], + ]); + + let dst = b.lea64(x, y, shift); + b.st_test_data(16, MemType::B32, dst[0].into()); + b.st_test_data(20, MemType::B32, dst[1].into()); + + let bin = b.compile(); + + let mut a = Acorn::new(); + let mut data = Vec::new(); + for _ in 0..invocations { + data.push([ + get_iadd_int(&mut a), + get_iadd_int(&mut a), + get_iadd_int(&mut a), + get_iadd_int(&mut a), + 0, + 0, + ]); + } + + run.run.run(&bin, &mut data).unwrap(); + + for d in &data { + let x = u64::from(d[0]) | (u64::from(d[1]) << 32); + let y = u64::from(d[2]) | (u64::from(d[3]) << 32); + let dst = (x << shift).wrapping_add(y); + assert_eq!(d[4], dst as u32); + assert_eq!(d[5], (dst >> 32) as u32); + } + } +} + #[test] fn test_op_lop2() { if RunSingleton::get().sm.sm() < 70 { diff --git a/src/nouveau/compiler/nak_nir_algebraic.py b/src/nouveau/compiler/nak_nir_algebraic.py index 4f8a4004917..f1c58247c35 100644 --- a/src/nouveau/compiler/nak_nir_algebraic.py +++ b/src/nouveau/compiler/nak_nir_algebraic.py @@ -42,6 +42,8 @@ algebraic_lowering = [ (('iadd(is_used_by_non_ldc_nv)', 'a@32', ('ishl', 'b@32', '#s@32')), ('lea_nv', a, b, s), 'nak->sm >= 70'), + (('iadd', 'a@64', ('ishl', 'b@64', '#s@32')), + ('lea_nv', a, b, s), 'nak->sm >= 70'), ] def main():