nak,nir: Add 64-bit lea_nv

Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32517>
This commit is contained in:
Mel Henning 2025-02-10 18:56:26 -05:00 committed by Marge Bot
parent c92a92e72b
commit 11b8c8b8e6
5 changed files with 105 additions and 2 deletions

View file

@ -1414,7 +1414,7 @@ opcode("prmt_nv", 0, tuint32, [0, 0, 0], [tuint32, tuint32, tuint32],
# Address arithmetic instructions: shift and add
# Shift must be a constant.
opcode("lea_nv", 0, tuint32, [0, 0, 0], [tuint32, tuint32, tuint32], False,
opcode("lea_nv", 0, tuint, [0, 0, 0], [tuint, tuint, tuint32], False,
"", "src0 + (src1 << (src2 % bit_size))")
# 24b multiply into 32b result (with sign extension)

View file

@ -630,6 +630,54 @@ pub trait SSABuilder: Builder {
dst
}
fn lea64(&mut self, a: Src, b: Src, shift: u8) -> SSARef {
assert!(self.sm() >= 70);
assert!(a.src_mod.is_none());
assert!(b.src_mod.is_none());
let a = a.as_ssa().unwrap();
let b = b.as_ssa().unwrap();
let dst = self.alloc_ssa(RegFile::GPR, 2);
let shift = shift % 64;
if shift >= 32 {
self.copy_to(dst[0].into(), b[0].into());
self.push_op(OpLea {
dst: dst[1].into(),
overflow: Dst::None,
a: a[0].into(),
b: b[1].into(),
a_high: 0.into(),
dst_high: false,
shift: shift - 32,
intermediate_mod: SrcMod::None,
});
} else {
let carry = self.alloc_ssa(RegFile::Pred, 1);
self.push_op(OpLea {
dst: dst[0].into(),
overflow: carry.into(),
a: a[0].into(),
b: b[0].into(),
a_high: 0.into(),
dst_high: false,
shift: shift,
intermediate_mod: SrcMod::None,
});
self.push_op(OpLeaX {
dst: dst[1].into(),
overflow: Dst::None,
a: a[0].into(),
b: b[1].into(),
a_high: a[1].into(),
carry: carry.into(),
dst_high: true,
shift: shift,
intermediate_mod: SrcMod::None,
});
}
dst
}
fn lop2(&mut self, op: LogicOp2, x: Src, y: Src) -> SSARef {
let dst = if x.is_predicate() {
self.alloc_ssa(RegFile::Pred, 1)

View file

@ -1482,7 +1482,11 @@ impl<'a> ShaderFromNir<'a> {
let src_a = srcs[1];
let src_b = srcs[0];
let shift = nir_srcs[2].comp_as_uint(0).unwrap() as u8;
b.lea(src_a, src_b, shift)
match alu.def.bit_size {
32 => b.lea(src_a, src_b, shift),
64 => b.lea64(src_a, src_b, shift),
x => panic!("unsupported bit size for nir_op_lea_nv: {x}"),
}
}
nir_op_isub => match alu.def.bit_size {
32 => b.iadd(srcs[0], srcs[1].ineg(), 0.into()),

View file

@ -788,6 +788,55 @@ fn test_op_leax() {
}
}
#[test]
fn test_lea64() {
let run = RunSingleton::get();
let invocations = 100;
for shift in 0..64 {
let mut b = TestShaderBuilder::new(run.sm.as_ref());
let x = Src::from([
b.ld_test_data(0, MemType::B32)[0],
b.ld_test_data(4, MemType::B32)[0],
]);
let y = Src::from([
b.ld_test_data(8, MemType::B32)[0],
b.ld_test_data(12, MemType::B32)[0],
]);
let dst = b.lea64(x, y, shift);
b.st_test_data(16, MemType::B32, dst[0].into());
b.st_test_data(20, MemType::B32, dst[1].into());
let bin = b.compile();
let mut a = Acorn::new();
let mut data = Vec::new();
for _ in 0..invocations {
data.push([
get_iadd_int(&mut a),
get_iadd_int(&mut a),
get_iadd_int(&mut a),
get_iadd_int(&mut a),
0,
0,
]);
}
run.run.run(&bin, &mut data).unwrap();
for d in &data {
let x = u64::from(d[0]) | (u64::from(d[1]) << 32);
let y = u64::from(d[2]) | (u64::from(d[3]) << 32);
let dst = (x << shift).wrapping_add(y);
assert_eq!(d[4], dst as u32);
assert_eq!(d[5], (dst >> 32) as u32);
}
}
}
#[test]
fn test_op_lop2() {
if RunSingleton::get().sm.sm() < 70 {

View file

@ -42,6 +42,8 @@ algebraic_lowering = [
(('iadd(is_used_by_non_ldc_nv)', 'a@32', ('ishl', 'b@32', '#s@32')),
('lea_nv', a, b, s), 'nak->sm >= 70'),
(('iadd', 'a@64', ('ishl', 'b@64', '#s@32')),
('lea_nv', a, b, s), 'nak->sm >= 70'),
]
def main():