nak: implement uror and urol using shf
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Reviewed-by: Mel Henning <mhenning@darkrefraction.com>
Reviewed-by: Mohamed Ahmed <mohamedahmedegypt2001@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39409>
This commit is contained in:
Máté Pinczel 2026-01-19 01:45:18 +01:00 committed by Marge Bot
parent 9e7f757f0f
commit e6ea2bef6b
4 changed files with 113 additions and 0 deletions

View file

@ -168,6 +168,7 @@ fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options {
lower_scmp: true,
lower_uadd_carry: true,
lower_usub_borrow: true,
has_rotate32: dev.sm >= 32,
has_iadd3: dev.sm >= 70,
has_imad32: dev.sm >= 70,
has_sdot_4x8: dev.sm >= 70,

View file

@ -241,6 +241,42 @@ pub trait SSABuilder: Builder {
dst
}
fn urol(&mut self, x: Src, shift: Src) -> SSAValue {
let dst = self.alloc_ssa(RegFile::GPR);
assert!(self.sm() >= 32);
self.push_op(OpShf {
dst: dst.into(),
low: x.clone(),
high: x,
shift: shift,
right: false,
wrap: true,
data_type: IntType::U32,
dst_high: true,
});
dst
}
fn uror(&mut self, x: Src, shift: Src) -> SSAValue {
let dst = self.alloc_ssa(RegFile::GPR);
assert!(self.sm() >= 32);
self.push_op(OpShf {
dst: dst.into(),
low: x.clone(),
high: x,
shift: shift,
right: true,
wrap: true,
data_type: IntType::U32,
dst_high: false,
});
dst
}
fn fadd(&mut self, x: Src, y: Src) -> SSAValue {
let dst = self.alloc_ssa(RegFile::GPR);
self.push_op(OpFAdd {

View file

@ -1533,6 +1533,14 @@ impl<'a> ShaderFromNir<'a> {
b.shr(srcs(0), srcs(1), true).into()
}
}
nir_op_urol => {
assert!(alu.get_src(0).bit_size() == 32);
b.urol(srcs(0), srcs(1)).into()
}
nir_op_uror => {
assert!(alu.get_src(0).bit_size() == 32);
b.uror(srcs(0), srcs(1)).into()
}
nir_op_lea_nv => {
let src_a = srcs(1);
let src_b = srcs(0);

View file

@ -974,6 +974,74 @@ fn test_op_shf() {
}
}
#[test]
fn test_op_urol() {
let run = RunSingleton::get();
if run.sm.sm() < 32 {
return;
}
let mut b = TestShaderBuilder::new(&run.sm);
let invocations = 100;
let x = Src::from(b.ld_test_data(0, MemType::B32)[0]);
let y = Src::from(b.ld_test_data(4, MemType::B32)[0]);
let dst = b.urol(x, y);
b.st_test_data(8, MemType::B32, dst.into());
let bin = b.compile();
let mut a = Acorn::new();
let mut data = Vec::new();
for _ in 0..invocations {
data.push([a.get_u32(), a.get_u32() as u32, 0]);
}
run.run.run(&bin, &mut data).unwrap();
for d in &data {
let x = d[0];
let y = d[1];
let dst = x.rotate_left(y);
assert_eq!(d[2], dst as u32);
}
}
#[test]
fn test_op_uror() {
let run = RunSingleton::get();
if run.sm.sm() < 32 {
return;
}
let mut b = TestShaderBuilder::new(&run.sm);
let invocations = 100;
let x = Src::from(b.ld_test_data(0, MemType::B32)[0]);
let y = Src::from(b.ld_test_data(4, MemType::B32)[0]);
let dst = b.uror(x, y);
b.st_test_data(8, MemType::B32, dst.into());
let bin = b.compile();
let mut a = Acorn::new();
let mut data = Vec::new();
for _ in 0..invocations {
data.push([a.get_u32(), a.get_u32() as u32, 0]);
}
run.run.run(&bin, &mut data).unwrap();
for d in &data {
let x = d[0];
let y = d[1];
let dst = x.rotate_right(y);
assert_eq!(d[2], dst as u32);
}
}
#[test]
fn test_op_shr() {
let sm = &RunSingleton::get().sm;