From 1881d97c2743a6b999942aeaae94176d4a1962bb Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Thu, 18 Jan 2024 16:34:48 -0600 Subject: [PATCH] nak: Implement nir_op_iadd3 on SM70+ Part-of: --- src/nouveau/compiler/nak/api.rs | 1 + src/nouveau/compiler/nak/builder.rs | 51 ++++++++++++++++++++-------- src/nouveau/compiler/nak/from_nir.rs | 9 +++-- 3 files changed, 44 insertions(+), 17 deletions(-) diff --git a/src/nouveau/compiler/nak/api.rs b/src/nouveau/compiler/nak/api.rs index 766c76af43c..d469ee6ba1f 100644 --- a/src/nouveau/compiler/nak/api.rs +++ b/src/nouveau/compiler/nak/api.rs @@ -139,6 +139,7 @@ fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options { op.lower_scmp = true; op.lower_uadd_carry = true; op.lower_usub_borrow = true; + op.has_iadd3 = dev.sm >= 70; op.has_sdot_4x8 = dev.sm >= 70; op.has_udot_4x8 = dev.sm >= 70; op.has_sudot_4x8 = dev.sm >= 70; diff --git a/src/nouveau/compiler/nak/builder.rs b/src/nouveau/compiler/nak/builder.rs index 60651e6f1c4..19ea7f11a91 100644 --- a/src/nouveau/compiler/nak/builder.rs +++ b/src/nouveau/compiler/nak/builder.rs @@ -250,15 +250,16 @@ pub trait SSABuilder: Builder { dst } - fn iadd(&mut self, x: Src, y: Src) -> SSARef { + fn iadd(&mut self, x: Src, y: Src, z: Src) -> SSARef { let dst = self.alloc_ssa(RegFile::GPR, 1); if self.sm() >= 70 { self.push_op(OpIAdd3 { dst: dst.into(), - srcs: [Src::new_zero(), x, y], + srcs: [x, y, z], overflow: [Dst::None; 2], }); } else { + assert!(z.is_zero()); self.push_op(OpIAdd2 { dst: dst.into(), srcs: [x, y], @@ -269,24 +270,44 @@ pub trait SSABuilder: Builder { dst } - fn iadd64(&mut self, x: Src, y: Src) -> SSARef { + fn iadd64(&mut self, x: Src, y: Src, z: Src) -> SSARef { let x = x.as_ssa().unwrap(); let y = y.as_ssa().unwrap(); let dst = self.alloc_ssa(RegFile::GPR, 2); if self.sm() >= 70 { - let carry = self.alloc_ssa(RegFile::Pred, 1); - self.push_op(OpIAdd3 { - dst: dst[0].into(), - overflow: [carry.into(), Dst::None], - srcs: [x[0].into(), y[0].into(), 0.into()], - }); - self.push_op(OpIAdd3X { - dst: dst[1].into(), - overflow: [Dst::None, Dst::None], - srcs: [x[1].into(), y[1].into(), 0.into()], - carry: [carry.into(), false.into()], - }); + if let Some(z) = z.as_ssa() { + let carry = [ + self.alloc_ssa(RegFile::Pred, 1), + self.alloc_ssa(RegFile::Pred, 1), + ]; + self.push_op(OpIAdd3 { + dst: dst[0].into(), + overflow: [carry[0].into(), carry[1].into()], + srcs: [x[0].into(), y[0].into(), z[0].into()], + }); + self.push_op(OpIAdd3X { + dst: dst[1].into(), + overflow: [Dst::None, Dst::None], + srcs: [x[1].into(), y[1].into(), z[1].into()], + carry: [carry[0].into(), carry[1].into()], + }); + } else { + assert!(z.is_zero()); + let carry = self.alloc_ssa(RegFile::Pred, 1); + self.push_op(OpIAdd3 { + dst: dst[0].into(), + overflow: [carry.into(), Dst::None], + srcs: [x[0].into(), y[0].into(), 0.into()], + }); + self.push_op(OpIAdd3X { + dst: dst[1].into(), + overflow: [Dst::None, Dst::None], + srcs: [x[1].into(), y[1].into(), 0.into()], + carry: [carry.into(), false.into()], + }); + } } else { + assert!(z.is_zero()); let carry = self.alloc_ssa(RegFile::Carry, 1); self.push_op(OpIAdd2 { dst: dst[0].into(), diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index 538af443621..2603b95a20e 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -1035,10 +1035,15 @@ impl<'a> ShaderFromNir<'a> { } nir_op_iabs => b.iabs(srcs[0]), nir_op_iadd => match alu.def.bit_size { - 32 => b.iadd(srcs[0], srcs[1]), - 64 => b.iadd64(srcs[0], srcs[1]), + 32 => b.iadd(srcs[0], srcs[1], 0.into()), + 64 => b.iadd64(srcs[0], srcs[1], 0.into()), x => panic!("unsupported bit size for nir_op_iadd: {x}"), }, + nir_op_iadd3 => match alu.def.bit_size { + 32 => b.iadd(srcs[0], srcs[1], srcs[2]), + 64 => b.iadd64(srcs[0], srcs[1], srcs[2]), + x => panic!("unsupported bit size for nir_op_iadd3: {x}"), + }, nir_op_iand => b.lop2(LogicOp2::And, srcs[0], srcs[1]), nir_op_ieq => { if alu.get_src(0).bit_size() == 1 {