From e6ea2bef6b8e4c22a51e46f676de55dd4dfdb2ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1t=C3=A9=20Pinczel?= Date: Mon, 19 Jan 2026 01:45:18 +0100 Subject: [PATCH] nak: implement uror and urol using shf Reviewed-by: Mel Henning Reviewed-by: Mohamed Ahmed Part-of: --- src/nouveau/compiler/nak/api.rs | 1 + src/nouveau/compiler/nak/builder.rs | 36 +++++++++++++++ src/nouveau/compiler/nak/from_nir.rs | 8 ++++ src/nouveau/compiler/nak/hw_tests.rs | 68 ++++++++++++++++++++++++++++ 4 files changed, 113 insertions(+) diff --git a/src/nouveau/compiler/nak/api.rs b/src/nouveau/compiler/nak/api.rs index 74c4aa2258b..81bf4dc096e 100644 --- a/src/nouveau/compiler/nak/api.rs +++ b/src/nouveau/compiler/nak/api.rs @@ -168,6 +168,7 @@ fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options { lower_scmp: true, lower_uadd_carry: true, lower_usub_borrow: true, + has_rotate32: dev.sm >= 32, has_iadd3: dev.sm >= 70, has_imad32: dev.sm >= 70, has_sdot_4x8: dev.sm >= 70, diff --git a/src/nouveau/compiler/nak/builder.rs b/src/nouveau/compiler/nak/builder.rs index a91f8358bf8..829898d9c9d 100644 --- a/src/nouveau/compiler/nak/builder.rs +++ b/src/nouveau/compiler/nak/builder.rs @@ -241,6 +241,42 @@ pub trait SSABuilder: Builder { dst } + fn urol(&mut self, x: Src, shift: Src) -> SSAValue { + let dst = self.alloc_ssa(RegFile::GPR); + assert!(self.sm() >= 32); + + self.push_op(OpShf { + dst: dst.into(), + low: x.clone(), + high: x, + shift: shift, + right: false, + wrap: true, + data_type: IntType::U32, + dst_high: true, + }); + + dst + } + + fn uror(&mut self, x: Src, shift: Src) -> SSAValue { + let dst = self.alloc_ssa(RegFile::GPR); + assert!(self.sm() >= 32); + + self.push_op(OpShf { + dst: dst.into(), + low: x.clone(), + high: x, + shift: shift, + right: true, + wrap: true, + data_type: IntType::U32, + dst_high: false, + }); + + dst + } + fn fadd(&mut self, x: Src, y: Src) -> SSAValue { let dst = self.alloc_ssa(RegFile::GPR); self.push_op(OpFAdd { diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index e71f5884a5d..5fc5fc156c3 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -1533,6 +1533,14 @@ impl<'a> ShaderFromNir<'a> { b.shr(srcs(0), srcs(1), true).into() } } + nir_op_urol => { + assert!(alu.get_src(0).bit_size() == 32); + b.urol(srcs(0), srcs(1)).into() + } + nir_op_uror => { + assert!(alu.get_src(0).bit_size() == 32); + b.uror(srcs(0), srcs(1)).into() + } nir_op_lea_nv => { let src_a = srcs(1); let src_b = srcs(0); diff --git a/src/nouveau/compiler/nak/hw_tests.rs b/src/nouveau/compiler/nak/hw_tests.rs index 0bb55639aa9..e6b6eb225ac 100644 --- a/src/nouveau/compiler/nak/hw_tests.rs +++ b/src/nouveau/compiler/nak/hw_tests.rs @@ -974,6 +974,74 @@ fn test_op_shf() { } } +#[test] +fn test_op_urol() { + let run = RunSingleton::get(); + if run.sm.sm() < 32 { + return; + } + + let mut b = TestShaderBuilder::new(&run.sm); + let invocations = 100; + + let x = Src::from(b.ld_test_data(0, MemType::B32)[0]); + let y = Src::from(b.ld_test_data(4, MemType::B32)[0]); + + let dst = b.urol(x, y); + b.st_test_data(8, MemType::B32, dst.into()); + + let bin = b.compile(); + + let mut a = Acorn::new(); + let mut data = Vec::new(); + for _ in 0..invocations { + data.push([a.get_u32(), a.get_u32() as u32, 0]); + } + + run.run.run(&bin, &mut data).unwrap(); + + for d in &data { + let x = d[0]; + let y = d[1]; + let dst = x.rotate_left(y); + assert_eq!(d[2], dst as u32); + } +} + +#[test] +fn test_op_uror() { + let run = RunSingleton::get(); + if run.sm.sm() < 32 { + return; + } + + let mut b = TestShaderBuilder::new(&run.sm); + let invocations = 100; + + let x = Src::from(b.ld_test_data(0, MemType::B32)[0]); + let y = Src::from(b.ld_test_data(4, MemType::B32)[0]); + + let dst = b.uror(x, y); + b.st_test_data(8, MemType::B32, dst.into()); + + let bin = b.compile(); + + let mut a = Acorn::new(); + let mut data = Vec::new(); + for _ in 0..invocations { + data.push([a.get_u32(), a.get_u32() as u32, 0]); + } + + run.run.run(&bin, &mut data).unwrap(); + + for d in &data { + let x = d[0]; + let y = d[1]; + let dst = x.rotate_right(y); + assert_eq!(d[2], dst as u32); + } +} + #[test] fn test_op_shr() { let sm = &RunSingleton::get().sm;