mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 02:28:10 +02:00
nak: Implement nir_op_iadd3 on SM70+
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27159>
This commit is contained in:
parent
0c95d39309
commit
1881d97c27
3 changed files with 44 additions and 17 deletions
|
|
@ -139,6 +139,7 @@ fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options {
|
|||
op.lower_scmp = true;
|
||||
op.lower_uadd_carry = true;
|
||||
op.lower_usub_borrow = true;
|
||||
op.has_iadd3 = dev.sm >= 70;
|
||||
op.has_sdot_4x8 = dev.sm >= 70;
|
||||
op.has_udot_4x8 = dev.sm >= 70;
|
||||
op.has_sudot_4x8 = dev.sm >= 70;
|
||||
|
|
|
|||
|
|
@ -250,15 +250,16 @@ pub trait SSABuilder: Builder {
|
|||
dst
|
||||
}
|
||||
|
||||
fn iadd(&mut self, x: Src, y: Src) -> SSARef {
|
||||
fn iadd(&mut self, x: Src, y: Src, z: Src) -> SSARef {
|
||||
let dst = self.alloc_ssa(RegFile::GPR, 1);
|
||||
if self.sm() >= 70 {
|
||||
self.push_op(OpIAdd3 {
|
||||
dst: dst.into(),
|
||||
srcs: [Src::new_zero(), x, y],
|
||||
srcs: [x, y, z],
|
||||
overflow: [Dst::None; 2],
|
||||
});
|
||||
} else {
|
||||
assert!(z.is_zero());
|
||||
self.push_op(OpIAdd2 {
|
||||
dst: dst.into(),
|
||||
srcs: [x, y],
|
||||
|
|
@ -269,24 +270,44 @@ pub trait SSABuilder: Builder {
|
|||
dst
|
||||
}
|
||||
|
||||
fn iadd64(&mut self, x: Src, y: Src) -> SSARef {
|
||||
fn iadd64(&mut self, x: Src, y: Src, z: Src) -> SSARef {
|
||||
let x = x.as_ssa().unwrap();
|
||||
let y = y.as_ssa().unwrap();
|
||||
let dst = self.alloc_ssa(RegFile::GPR, 2);
|
||||
if self.sm() >= 70 {
|
||||
let carry = self.alloc_ssa(RegFile::Pred, 1);
|
||||
self.push_op(OpIAdd3 {
|
||||
dst: dst[0].into(),
|
||||
overflow: [carry.into(), Dst::None],
|
||||
srcs: [x[0].into(), y[0].into(), 0.into()],
|
||||
});
|
||||
self.push_op(OpIAdd3X {
|
||||
dst: dst[1].into(),
|
||||
overflow: [Dst::None, Dst::None],
|
||||
srcs: [x[1].into(), y[1].into(), 0.into()],
|
||||
carry: [carry.into(), false.into()],
|
||||
});
|
||||
if let Some(z) = z.as_ssa() {
|
||||
let carry = [
|
||||
self.alloc_ssa(RegFile::Pred, 1),
|
||||
self.alloc_ssa(RegFile::Pred, 1),
|
||||
];
|
||||
self.push_op(OpIAdd3 {
|
||||
dst: dst[0].into(),
|
||||
overflow: [carry[0].into(), carry[1].into()],
|
||||
srcs: [x[0].into(), y[0].into(), z[0].into()],
|
||||
});
|
||||
self.push_op(OpIAdd3X {
|
||||
dst: dst[1].into(),
|
||||
overflow: [Dst::None, Dst::None],
|
||||
srcs: [x[1].into(), y[1].into(), z[1].into()],
|
||||
carry: [carry[0].into(), carry[1].into()],
|
||||
});
|
||||
} else {
|
||||
assert!(z.is_zero());
|
||||
let carry = self.alloc_ssa(RegFile::Pred, 1);
|
||||
self.push_op(OpIAdd3 {
|
||||
dst: dst[0].into(),
|
||||
overflow: [carry.into(), Dst::None],
|
||||
srcs: [x[0].into(), y[0].into(), 0.into()],
|
||||
});
|
||||
self.push_op(OpIAdd3X {
|
||||
dst: dst[1].into(),
|
||||
overflow: [Dst::None, Dst::None],
|
||||
srcs: [x[1].into(), y[1].into(), 0.into()],
|
||||
carry: [carry.into(), false.into()],
|
||||
});
|
||||
}
|
||||
} else {
|
||||
assert!(z.is_zero());
|
||||
let carry = self.alloc_ssa(RegFile::Carry, 1);
|
||||
self.push_op(OpIAdd2 {
|
||||
dst: dst[0].into(),
|
||||
|
|
|
|||
|
|
@ -1035,10 +1035,15 @@ impl<'a> ShaderFromNir<'a> {
|
|||
}
|
||||
nir_op_iabs => b.iabs(srcs[0]),
|
||||
nir_op_iadd => match alu.def.bit_size {
|
||||
32 => b.iadd(srcs[0], srcs[1]),
|
||||
64 => b.iadd64(srcs[0], srcs[1]),
|
||||
32 => b.iadd(srcs[0], srcs[1], 0.into()),
|
||||
64 => b.iadd64(srcs[0], srcs[1], 0.into()),
|
||||
x => panic!("unsupported bit size for nir_op_iadd: {x}"),
|
||||
},
|
||||
nir_op_iadd3 => match alu.def.bit_size {
|
||||
32 => b.iadd(srcs[0], srcs[1], srcs[2]),
|
||||
64 => b.iadd64(srcs[0], srcs[1], srcs[2]),
|
||||
x => panic!("unsupported bit size for nir_op_iadd3: {x}"),
|
||||
},
|
||||
nir_op_iand => b.lop2(LogicOp2::And, srcs[0], srcs[1]),
|
||||
nir_op_ieq => {
|
||||
if alu.get_src(0).bit_size() == 1 {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue