nak: Implement nir_op_iadd3 on SM70+

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27159>
This commit is contained in:
Faith Ekstrand 2024-01-18 16:34:48 -06:00
parent 0c95d39309
commit 1881d97c27
3 changed files with 44 additions and 17 deletions

View file

@ -139,6 +139,7 @@ fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options {
op.lower_scmp = true;
op.lower_uadd_carry = true;
op.lower_usub_borrow = true;
op.has_iadd3 = dev.sm >= 70;
op.has_sdot_4x8 = dev.sm >= 70;
op.has_udot_4x8 = dev.sm >= 70;
op.has_sudot_4x8 = dev.sm >= 70;

View file

@ -250,15 +250,16 @@ pub trait SSABuilder: Builder {
dst
}
fn iadd(&mut self, x: Src, y: Src) -> SSARef {
fn iadd(&mut self, x: Src, y: Src, z: Src) -> SSARef {
let dst = self.alloc_ssa(RegFile::GPR, 1);
if self.sm() >= 70 {
self.push_op(OpIAdd3 {
dst: dst.into(),
srcs: [Src::new_zero(), x, y],
srcs: [x, y, z],
overflow: [Dst::None; 2],
});
} else {
assert!(z.is_zero());
self.push_op(OpIAdd2 {
dst: dst.into(),
srcs: [x, y],
@ -269,24 +270,44 @@ pub trait SSABuilder: Builder {
dst
}
fn iadd64(&mut self, x: Src, y: Src) -> SSARef {
fn iadd64(&mut self, x: Src, y: Src, z: Src) -> SSARef {
let x = x.as_ssa().unwrap();
let y = y.as_ssa().unwrap();
let dst = self.alloc_ssa(RegFile::GPR, 2);
if self.sm() >= 70 {
let carry = self.alloc_ssa(RegFile::Pred, 1);
self.push_op(OpIAdd3 {
dst: dst[0].into(),
overflow: [carry.into(), Dst::None],
srcs: [x[0].into(), y[0].into(), 0.into()],
});
self.push_op(OpIAdd3X {
dst: dst[1].into(),
overflow: [Dst::None, Dst::None],
srcs: [x[1].into(), y[1].into(), 0.into()],
carry: [carry.into(), false.into()],
});
if let Some(z) = z.as_ssa() {
let carry = [
self.alloc_ssa(RegFile::Pred, 1),
self.alloc_ssa(RegFile::Pred, 1),
];
self.push_op(OpIAdd3 {
dst: dst[0].into(),
overflow: [carry[0].into(), carry[1].into()],
srcs: [x[0].into(), y[0].into(), z[0].into()],
});
self.push_op(OpIAdd3X {
dst: dst[1].into(),
overflow: [Dst::None, Dst::None],
srcs: [x[1].into(), y[1].into(), z[1].into()],
carry: [carry[0].into(), carry[1].into()],
});
} else {
assert!(z.is_zero());
let carry = self.alloc_ssa(RegFile::Pred, 1);
self.push_op(OpIAdd3 {
dst: dst[0].into(),
overflow: [carry.into(), Dst::None],
srcs: [x[0].into(), y[0].into(), 0.into()],
});
self.push_op(OpIAdd3X {
dst: dst[1].into(),
overflow: [Dst::None, Dst::None],
srcs: [x[1].into(), y[1].into(), 0.into()],
carry: [carry.into(), false.into()],
});
}
} else {
assert!(z.is_zero());
let carry = self.alloc_ssa(RegFile::Carry, 1);
self.push_op(OpIAdd2 {
dst: dst[0].into(),

View file

@ -1035,10 +1035,15 @@ impl<'a> ShaderFromNir<'a> {
}
nir_op_iabs => b.iabs(srcs[0]),
nir_op_iadd => match alu.def.bit_size {
32 => b.iadd(srcs[0], srcs[1]),
64 => b.iadd64(srcs[0], srcs[1]),
32 => b.iadd(srcs[0], srcs[1], 0.into()),
64 => b.iadd64(srcs[0], srcs[1], 0.into()),
x => panic!("unsupported bit size for nir_op_iadd: {x}"),
},
nir_op_iadd3 => match alu.def.bit_size {
32 => b.iadd(srcs[0], srcs[1], srcs[2]),
64 => b.iadd64(srcs[0], srcs[1], srcs[2]),
x => panic!("unsupported bit size for nir_op_iadd3: {x}"),
},
nir_op_iand => b.lop2(LogicOp2::And, srcs[0], srcs[1]),
nir_op_ieq => {
if alu.get_src(0).bit_size() == 1 {