diff --git a/src/nouveau/compiler/nak/hw_tests.rs b/src/nouveau/compiler/nak/hw_tests.rs index f9270c60055..06fd4ce7215 100644 --- a/src/nouveau/compiler/nak/hw_tests.rs +++ b/src/nouveau/compiler/nak/hw_tests.rs @@ -756,6 +756,38 @@ fn test_op_lea() { } } +#[test] +fn test_op_leax() { + if RunSingleton::get().sm.sm() >= 70 { + let src_mods = [ + (SrcMod::None, SrcMod::None), + (SrcMod::BNot, SrcMod::None), + (SrcMod::None, SrcMod::BNot), + ]; + + for (intermediate_mod, b_mod) in src_mods { + for shift in 0..32 { + for dst_high in [false, true] { + let mut op = OpLeaX { + dst: Dst::None, + overflow: Dst::None, + a: 0.into(), + b: 0.into(), + a_high: 0.into(), + carry: 0.into(), + shift, + dst_high, + intermediate_mod, + }; + op.b.src_mod = b_mod; + + test_foldable_op(op); + } + } + } + } +} + #[test] fn test_op_lop2() { if RunSingleton::get().sm.sm() < 70 { diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index 2473f19901d..3085b85b9fa 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -3859,6 +3859,86 @@ impl DisplayOp for OpLea { } impl_display_for_op!(OpLea); +#[repr(C)] +#[derive(Clone, SrcsAsSlice, DstsAsSlice)] +pub struct OpLeaX { + #[dst_type(GPR)] + pub dst: Dst, + + #[dst_type(Pred)] + pub overflow: Dst, + + #[src_type(ALU)] + pub a: Src, + + #[src_type(B32)] + pub b: Src, + + #[src_type(ALU)] + pub a_high: Src, // High 32-bits of a if .dst_high is set + + #[src_type(Pred)] + pub carry: Src, + + pub shift: u8, + pub dst_high: bool, + pub intermediate_mod: SrcMod, // Modifier for shifted temporary (a << shift) +} + +impl Foldable for OpLeaX { + fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) { + let a = f.get_u32_src(self, &self.a); + let mut b = f.get_u32_src(self, &self.b); + let a_high = f.get_u32_src(self, &self.a_high); + let carry = f.get_pred_src(self, &self.carry); + + let mut overflow = false; + + let mut shift_result = if self.dst_high { + let a = a as u64; + let a_high = a_high as u64; + let a = (a_high << 32) | a; + + (a >> (32 - self.shift)) as u32 + } else { + a << self.shift + }; + + if self.intermediate_mod.is_bnot() { + shift_result = !shift_result; + } + + if self.b.src_mod.is_bnot() { + b = !b; + } + + let (dst, o) = u32::overflowing_add(shift_result, b); + overflow |= o; + + let (dst, o) = u32::overflowing_add(dst, if carry { 1 } else { 0 }); + overflow |= o; + + f.set_u32_dst(self, &self.dst, dst as u32); + f.set_pred_dst(self, &self.overflow, overflow); + } +} + +impl DisplayOp for OpLeaX { + fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "lea.x")?; + if self.dst_high { + write!(f, ".hi")?; + } + write!(f, " {} {} {}", self.a, self.shift, self.b)?; + if self.dst_high { + write!(f, " {}", self.a_high)?; + } + write!(f, " {}", self.carry)?; + Ok(()) + } +} +impl_display_for_op!(OpLeaX); + #[repr(C)] #[derive(Clone, SrcsAsSlice, DstsAsSlice)] pub struct OpLop2 { @@ -6311,6 +6391,7 @@ pub enum Op { IMnMx(OpIMnMx), ISetP(OpISetP), Lea(OpLea), + LeaX(OpLeaX), Lop2(OpLop2), Lop3(OpLop3), PopC(OpPopC), @@ -6805,6 +6886,7 @@ impl Instr { | Op::IMnMx(_) | Op::ISetP(_) | Op::Lea(_) + | Op::LeaX(_) | Op::Lop2(_) | Op::Lop3(_) | Op::Shf(_) diff --git a/src/nouveau/compiler/nak/opt_copy_prop.rs b/src/nouveau/compiler/nak/opt_copy_prop.rs index 8f472007389..024486035ea 100644 --- a/src/nouveau/compiler/nak/opt_copy_prop.rs +++ b/src/nouveau/compiler/nak/opt_copy_prop.rs @@ -651,6 +651,7 @@ impl CopyPropPass { !add.overflow[0].is_none() || !add.overflow[1].is_none() } Op::Lea(lea) => !lea.overflow.is_none(), + Op::LeaX(lea) => !lea.overflow.is_none(), _ => false, }; diff --git a/src/nouveau/compiler/nak/sm70.rs b/src/nouveau/compiler/nak/sm70.rs index 5b273e0428e..1059dd30954 100644 --- a/src/nouveau/compiler/nak/sm70.rs +++ b/src/nouveau/compiler/nak/sm70.rs @@ -84,6 +84,7 @@ impl ShaderModel for ShaderModel70 { | Op::IMad64(_) | Op::ISetP(_) | Op::Lea(_) + | Op::LeaX(_) | Op::Lop3(_) | Op::Mov(_) | Op::PLop3(_) @@ -1801,6 +1802,61 @@ impl SM70Op for OpLea { } } +impl SM70Op for OpLeaX { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + let gpr = op_gpr(self); + b.copy_alu_src_if_not_reg(&mut self.a, gpr, SrcType::ALU); + if self.dst_high { + b.copy_alu_src_if_both_not_reg( + &self.b, + &mut self.a_high, + gpr, + SrcType::ALU, + ); + } + } + + fn encode(&self, e: &mut SM70Encoder<'_>) { + assert!(self.a.src_mod == SrcMod::None); + assert!( + self.intermediate_mod == SrcMod::None + || self.b.src_mod == SrcMod::None + ); + + let c = if self.dst_high { + Some(&self.a_high) + } else { + None + }; + + if self.is_uniform() { + e.encode_ualu( + 0x091, + Some(&self.dst), + Some(&self.a), + Some(&self.b), + c, + ); + e.set_upred_src(87..90, 90, self.carry); + } else { + e.encode_alu( + 0x011, + Some(&self.dst), + Some(&self.a), + Some(&self.b), + c, + ); + e.set_pred_src(87..90, 90, self.carry); + } + + e.set_bit(72, self.intermediate_mod.is_bnot()); + e.set_field(75..80, self.shift); + e.set_bit(80, self.dst_high); + e.set_pred_dst(81..84, self.overflow); + e.set_bit(74, true); // .X + } +} + fn src_as_lop_imm(src: &Src) -> Option { let x = match src.src_ref { SrcRef::Zero => false, @@ -3556,6 +3612,7 @@ macro_rules! as_sm70_op_match { Op::IMnMx(op) => op, Op::ISetP(op) => op, Op::Lea(op) => op, + Op::LeaX(op) => op, Op::Lop3(op) => op, Op::PopC(op) => op, Op::Shf(op) => op,