diff --git a/src/nouveau/compiler/nak/builder.rs b/src/nouveau/compiler/nak/builder.rs index 4df90b4cb44..9673aef5212 100644 --- a/src/nouveau/compiler/nak/builder.rs +++ b/src/nouveau/compiler/nak/builder.rs @@ -372,52 +372,59 @@ pub trait SSABuilder: Builder { } fn iadd64(&mut self, x: Src, y: Src, z: Src) -> SSARef { - let x = x.as_ssa().unwrap(); - let y = y.as_ssa().unwrap(); + fn split_iadd64_src(src: Src) -> [Src; 2] { + match src.src_ref { + SrcRef::Zero => [0.into(), 0.into()], + SrcRef::SSA(ssa) => { + if src.src_mod.is_ineg() { + [Src::from(ssa[0]).ineg(), Src::from(ssa[1]).bnot()] + } else { + [Src::from(ssa[0]), Src::from(ssa[1])] + } + } + _ => panic!("Unsupported iadd64 source"), + } + } + + let is_3src = !x.is_zero() && !y.is_zero() && !z.is_zero(); + + let x = split_iadd64_src(x); + let y = split_iadd64_src(y); let dst = self.alloc_ssa(RegFile::GPR, 2); if self.sm() >= 70 { - if let Some(z) = z.as_ssa() { - let carry = [ - self.alloc_ssa(RegFile::Pred, 1), - self.alloc_ssa(RegFile::Pred, 1), - ]; - self.push_op(OpIAdd3 { - dst: dst[0].into(), - overflow: [carry[0].into(), carry[1].into()], - srcs: [x[0].into(), y[0].into(), z[0].into()], - }); - self.push_op(OpIAdd3X { - dst: dst[1].into(), - overflow: [Dst::None, Dst::None], - srcs: [x[1].into(), y[1].into(), z[1].into()], - carry: [carry[0].into(), carry[1].into()], - }); + let carry1 = self.alloc_ssa(RegFile::Pred, 1); + let (carry2_dst, carry2_src) = if is_3src { + let carry2 = self.alloc_ssa(RegFile::Pred, 1); + (carry2.into(), carry2.into()) } else { - assert!(z.is_zero()); - let carry = self.alloc_ssa(RegFile::Pred, 1); - self.push_op(OpIAdd3 { - dst: dst[0].into(), - overflow: [carry.into(), Dst::None], - srcs: [x[0].into(), y[0].into(), 0.into()], - }); - self.push_op(OpIAdd3X { - dst: dst[1].into(), - overflow: [Dst::None, Dst::None], - srcs: [x[1].into(), y[1].into(), 0.into()], - carry: [carry.into(), false.into()], - }); - } + // If one of the sources is known to be zero, we only need one + // carry predicate. + (Dst::None, false.into()) + }; + + let z = split_iadd64_src(z); + self.push_op(OpIAdd3 { + dst: dst[0].into(), + overflow: [carry1.into(), carry2_dst], + srcs: [x[0], y[0], z[0]], + }); + self.push_op(OpIAdd3X { + dst: dst[1].into(), + overflow: [Dst::None, Dst::None], + srcs: [x[1], y[1], z[1]], + carry: [carry1.into(), carry2_src], + }); } else { assert!(z.is_zero()); let carry = self.alloc_ssa(RegFile::Carry, 1); self.push_op(OpIAdd2 { dst: dst[0].into(), - srcs: [x[0].into(), y[0].into()], + srcs: [x[0], y[0]], carry_out: carry.into(), }); self.push_op(OpIAdd2X { dst: dst[1].into(), - srcs: [x[1].into(), y[1].into()], + srcs: [x[1], y[1]], carry_out: Dst::None, carry_in: carry.into(), }); @@ -499,36 +506,7 @@ pub trait SSABuilder: Builder { } fn ineg64(&mut self, x: Src) -> SSARef { - let x = x.as_ssa().unwrap(); - let dst = self.alloc_ssa(RegFile::GPR, 2); - if self.sm() >= 70 { - let carry = self.alloc_ssa(RegFile::Pred, 1); - self.push_op(OpIAdd3 { - dst: dst[0].into(), - overflow: [carry.into(), Dst::None], - srcs: [0.into(), Src::from(x[0]).ineg(), 0.into()], - }); - self.push_op(OpIAdd3X { - dst: dst[1].into(), - overflow: [Dst::None, Dst::None], - srcs: [0.into(), Src::from(x[1]).bnot(), 0.into()], - carry: [carry.into(), SrcRef::False.into()], - }); - } else { - let carry = self.alloc_ssa(RegFile::Carry, 1); - self.push_op(OpIAdd2 { - dst: dst[0].into(), - srcs: [0.into(), Src::from(x[0]).ineg()], - carry_out: carry.into(), - }); - self.push_op(OpIAdd2X { - dst: dst[1].into(), - srcs: [0.into(), Src::from(x[1]).bnot()], - carry_out: Dst::None, - carry_in: carry.into(), - }); - } - dst + self.iadd64(0.into(), x.ineg(), 0.into()) } fn isetp( diff --git a/src/nouveau/compiler/nak/hw_tests.rs b/src/nouveau/compiler/nak/hw_tests.rs index 0794d530ade..0e2adf3d3d2 100644 --- a/src/nouveau/compiler/nak/hw_tests.rs +++ b/src/nouveau/compiler/nak/hw_tests.rs @@ -707,36 +707,54 @@ fn test_iadd64() { let run = RunSingleton::get(); let invocations = 100; - let mut b = TestShaderBuilder::new(run.sm.as_ref()); + let cases = [ + (SrcMod::None, SrcMod::None), + (SrcMod::INeg, SrcMod::None), + (SrcMod::None, SrcMod::INeg), + ]; - let x = SSARef::from([ - b.ld_test_data(0, MemType::B32)[0], - b.ld_test_data(4, MemType::B32)[0], - ]); - let y = SSARef::from([ - b.ld_test_data(8, MemType::B32)[0], - b.ld_test_data(12, MemType::B32)[0], - ]); - let dst = b.iadd64(x.into(), y.into(), 0.into()); - b.st_test_data(16, MemType::B32, dst[0].into()); - b.st_test_data(20, MemType::B32, dst[1].into()); + for (x_mod, y_mod) in cases { + let mut b = TestShaderBuilder::new(run.sm.as_ref()); - let bin = b.compile(); + let mut x = Src::from([ + b.ld_test_data(0, MemType::B32)[0], + b.ld_test_data(4, MemType::B32)[0], + ]); + x.src_mod = x_mod; - let mut a = Acorn::new(); - let mut data = Vec::new(); - for _ in 0..invocations { - data.push([a.get_u32(), a.get_u32(), a.get_u32(), a.get_u32(), 0, 0]); - } + let mut y = Src::from([ + b.ld_test_data(8, MemType::B32)[0], + b.ld_test_data(12, MemType::B32)[0], + ]); + y.src_mod = y_mod; - run.run.run(&bin, &mut data).unwrap(); + let dst = b.iadd64(x, y, 0.into()); + b.st_test_data(16, MemType::B32, dst[0].into()); + b.st_test_data(20, MemType::B32, dst[1].into()); - for d in &data { - let x = u64::from(d[0]) | (u64::from(d[1]) << 32); - let y = u64::from(d[2]) | (u64::from(d[3]) << 32); - let dst = x.wrapping_add(y); - assert_eq!(d[4], dst as u32); - assert_eq!(d[5], (dst >> 32) as u32); + let bin = b.compile(); + + let mut a = Acorn::new(); + let mut data = Vec::new(); + for _ in 0..invocations { + data.push([a.get_u32(), a.get_u32(), a.get_u32(), a.get_u32(), 0, 0]); + } + + run.run.run(&bin, &mut data).unwrap(); + + for d in &data { + let mut x = u64::from(d[0]) | (u64::from(d[1]) << 32); + let mut y = u64::from(d[2]) | (u64::from(d[3]) << 32); + if x_mod.is_ineg() { + x = -(x as i64) as u64; + } + if y_mod.is_ineg() { + y = -(y as i64) as u64; + } + let dst = x.wrapping_add(y); + assert_eq!(d[4], dst as u32); + assert_eq!(d[5], (dst >> 32) as u32); + } } }