mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 07:20:10 +01:00
nak,nir: Add 64-bit lea_nv
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32517>
This commit is contained in:
parent
c92a92e72b
commit
11b8c8b8e6
5 changed files with 105 additions and 2 deletions
|
|
@ -1414,7 +1414,7 @@ opcode("prmt_nv", 0, tuint32, [0, 0, 0], [tuint32, tuint32, tuint32],
|
|||
|
||||
# Address arithmetic instructions: shift and add
|
||||
# Shift must be a constant.
|
||||
opcode("lea_nv", 0, tuint32, [0, 0, 0], [tuint32, tuint32, tuint32], False,
|
||||
opcode("lea_nv", 0, tuint, [0, 0, 0], [tuint, tuint, tuint32], False,
|
||||
"", "src0 + (src1 << (src2 % bit_size))")
|
||||
|
||||
# 24b multiply into 32b result (with sign extension)
|
||||
|
|
|
|||
|
|
@ -630,6 +630,54 @@ pub trait SSABuilder: Builder {
|
|||
dst
|
||||
}
|
||||
|
||||
fn lea64(&mut self, a: Src, b: Src, shift: u8) -> SSARef {
|
||||
assert!(self.sm() >= 70);
|
||||
assert!(a.src_mod.is_none());
|
||||
assert!(b.src_mod.is_none());
|
||||
|
||||
let a = a.as_ssa().unwrap();
|
||||
let b = b.as_ssa().unwrap();
|
||||
let dst = self.alloc_ssa(RegFile::GPR, 2);
|
||||
let shift = shift % 64;
|
||||
if shift >= 32 {
|
||||
self.copy_to(dst[0].into(), b[0].into());
|
||||
self.push_op(OpLea {
|
||||
dst: dst[1].into(),
|
||||
overflow: Dst::None,
|
||||
a: a[0].into(),
|
||||
b: b[1].into(),
|
||||
a_high: 0.into(),
|
||||
dst_high: false,
|
||||
shift: shift - 32,
|
||||
intermediate_mod: SrcMod::None,
|
||||
});
|
||||
} else {
|
||||
let carry = self.alloc_ssa(RegFile::Pred, 1);
|
||||
self.push_op(OpLea {
|
||||
dst: dst[0].into(),
|
||||
overflow: carry.into(),
|
||||
a: a[0].into(),
|
||||
b: b[0].into(),
|
||||
a_high: 0.into(),
|
||||
dst_high: false,
|
||||
shift: shift,
|
||||
intermediate_mod: SrcMod::None,
|
||||
});
|
||||
self.push_op(OpLeaX {
|
||||
dst: dst[1].into(),
|
||||
overflow: Dst::None,
|
||||
a: a[0].into(),
|
||||
b: b[1].into(),
|
||||
a_high: a[1].into(),
|
||||
carry: carry.into(),
|
||||
dst_high: true,
|
||||
shift: shift,
|
||||
intermediate_mod: SrcMod::None,
|
||||
});
|
||||
}
|
||||
dst
|
||||
}
|
||||
|
||||
fn lop2(&mut self, op: LogicOp2, x: Src, y: Src) -> SSARef {
|
||||
let dst = if x.is_predicate() {
|
||||
self.alloc_ssa(RegFile::Pred, 1)
|
||||
|
|
|
|||
|
|
@ -1482,7 +1482,11 @@ impl<'a> ShaderFromNir<'a> {
|
|||
let src_a = srcs[1];
|
||||
let src_b = srcs[0];
|
||||
let shift = nir_srcs[2].comp_as_uint(0).unwrap() as u8;
|
||||
b.lea(src_a, src_b, shift)
|
||||
match alu.def.bit_size {
|
||||
32 => b.lea(src_a, src_b, shift),
|
||||
64 => b.lea64(src_a, src_b, shift),
|
||||
x => panic!("unsupported bit size for nir_op_lea_nv: {x}"),
|
||||
}
|
||||
}
|
||||
nir_op_isub => match alu.def.bit_size {
|
||||
32 => b.iadd(srcs[0], srcs[1].ineg(), 0.into()),
|
||||
|
|
|
|||
|
|
@ -788,6 +788,55 @@ fn test_op_leax() {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lea64() {
|
||||
let run = RunSingleton::get();
|
||||
let invocations = 100;
|
||||
|
||||
for shift in 0..64 {
|
||||
let mut b = TestShaderBuilder::new(run.sm.as_ref());
|
||||
|
||||
let x = Src::from([
|
||||
b.ld_test_data(0, MemType::B32)[0],
|
||||
b.ld_test_data(4, MemType::B32)[0],
|
||||
]);
|
||||
|
||||
let y = Src::from([
|
||||
b.ld_test_data(8, MemType::B32)[0],
|
||||
b.ld_test_data(12, MemType::B32)[0],
|
||||
]);
|
||||
|
||||
let dst = b.lea64(x, y, shift);
|
||||
b.st_test_data(16, MemType::B32, dst[0].into());
|
||||
b.st_test_data(20, MemType::B32, dst[1].into());
|
||||
|
||||
let bin = b.compile();
|
||||
|
||||
let mut a = Acorn::new();
|
||||
let mut data = Vec::new();
|
||||
for _ in 0..invocations {
|
||||
data.push([
|
||||
get_iadd_int(&mut a),
|
||||
get_iadd_int(&mut a),
|
||||
get_iadd_int(&mut a),
|
||||
get_iadd_int(&mut a),
|
||||
0,
|
||||
0,
|
||||
]);
|
||||
}
|
||||
|
||||
run.run.run(&bin, &mut data).unwrap();
|
||||
|
||||
for d in &data {
|
||||
let x = u64::from(d[0]) | (u64::from(d[1]) << 32);
|
||||
let y = u64::from(d[2]) | (u64::from(d[3]) << 32);
|
||||
let dst = (x << shift).wrapping_add(y);
|
||||
assert_eq!(d[4], dst as u32);
|
||||
assert_eq!(d[5], (dst >> 32) as u32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_op_lop2() {
|
||||
if RunSingleton::get().sm.sm() < 70 {
|
||||
|
|
|
|||
|
|
@ -42,6 +42,8 @@ algebraic_lowering = [
|
|||
|
||||
(('iadd(is_used_by_non_ldc_nv)', 'a@32', ('ishl', 'b@32', '#s@32')),
|
||||
('lea_nv', a, b, s), 'nak->sm >= 70'),
|
||||
(('iadd', 'a@64', ('ishl', 'b@64', '#s@32')),
|
||||
('lea_nv', a, b, s), 'nak->sm >= 70'),
|
||||
]
|
||||
|
||||
def main():
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue