diff --git a/src/nouveau/compiler/nak/builder.rs b/src/nouveau/compiler/nak/builder.rs index 8426ce2b048..abedf4b64bd 100644 --- a/src/nouveau/compiler/nak/builder.rs +++ b/src/nouveau/compiler/nak/builder.rs @@ -186,11 +186,20 @@ pub trait SSABuilder: Builder { fn iadd(&mut self, x: Src, y: Src) -> SSARef { let dst = self.alloc_ssa(RegFile::GPR, 1); - self.push_op(OpIAdd3 { - dst: dst.into(), - srcs: [Src::new_zero(), x, y], - overflow: [Dst::None; 2], - }); + if self.sm() >= 70 { + self.push_op(OpIAdd3 { + dst: dst.into(), + srcs: [Src::new_zero(), x, y], + overflow: [Dst::None; 2], + }); + } else { + self.push_op(OpIAdd2 { + dst: dst.into(), + srcs: [x, y], + carry_in: false, + carry_out: false, + }); + } dst } diff --git a/src/nouveau/compiler/nak/encode_sm50.rs b/src/nouveau/compiler/nak/encode_sm50.rs index 477c7702068..a6058e37558 100644 --- a/src/nouveau/compiler/nak/encode_sm50.rs +++ b/src/nouveau/compiler/nak/encode_sm50.rs @@ -1614,25 +1614,23 @@ impl SM50Instr { self.set_field(10..12, (src_type.bits() / 8).ilog2()); } - fn encode_iadd3(&mut self, op: &OpIAdd3) { + fn encode_iadd2(&mut self, op: &OpIAdd2) { + /* TODO: support modifiers with imm32 (bit 56) */ assert!(op.srcs[0].is_reg_or_zero()); let src_modifier = Some(ALUSrcsModifier { src0_opt: Some(ALUModifierInfo { abs_bit: None, - neg_bit: Some(51), + neg_bit: Some(49), }), src1_opt: Some(ALUModifierInfo { abs_bit: None, - neg_bit: Some(50), - }), - src2_opt: Some(ALUModifierInfo { - abs_bit: None, - neg_bit: Some(49), + neg_bit: Some(48), }), + src2_opt: None, }); let encoding_info = ALUEncodingInfo { - opcode: 0xc0, + opcode: 0x10, encoding_type: ALUEncodingType::Variant4, reg_modifier: src_modifier, imm24_modifier: src_modifier, @@ -1643,13 +1641,21 @@ impl SM50Instr { }), }; - self.encode_alu( + let is_imm32 = self.encode_alu( encoding_info, Some(op.dst), ALUSrc::from_src(&op.srcs[0]), ALUSrc::from_src(&op.srcs[1]), - ALUSrc::from_src(&op.srcs[2]), + ALUSrc::None, ); + + if is_imm32 { + self.set_bit(53, op.carry_in); + self.set_bit(52, op.carry_out); + } else { + self.set_bit(43, op.carry_in); + self.set_bit(47, op.carry_out); + } } fn encode_prmt(&mut self, op: &OpPrmt) { @@ -1698,7 +1704,7 @@ impl SM50Instr { Op::FSetP(op) => si.encode_fsetp(&op), Op::MuFu(op) => si.encode_mufu(&op), Op::IAbs(op) => si.encode_iabs(&op), - Op::IAdd3(op) => si.encode_iadd3(&op), + Op::IAdd2(op) => si.encode_iadd2(&op), Op::Mov(op) => si.encode_mov(&op), Op::Sel(op) => si.encode_sel(&op), Op::PSetP(op) => si.encode_psetp(&op), diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index e86178c3c26..4dc677aa3b9 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -757,17 +757,32 @@ impl<'a> ShaderFromNir<'a> { let y = srcs[1].as_ssa().unwrap(); let sum = b.alloc_ssa(RegFile::GPR, 2); let carry = b.alloc_ssa(RegFile::Pred, 1); - b.push_op(OpIAdd3 { - dst: sum[0].into(), - overflow: [carry.into(), Dst::None], - srcs: [x[0].into(), y[0].into(), 0.into()], - }); - b.push_op(OpIAdd3X { - dst: sum[1].into(), - overflow: [Dst::None, Dst::None], - srcs: [x[1].into(), y[1].into(), 0.into()], - carry: [carry.into(), SrcRef::False.into()], - }); + if self.info.sm >= 70 { + b.push_op(OpIAdd3 { + dst: sum[0].into(), + overflow: [carry.into(), Dst::None], + srcs: [x[0].into(), y[0].into(), 0.into()], + }); + b.push_op(OpIAdd3X { + dst: sum[1].into(), + overflow: [Dst::None, Dst::None], + srcs: [x[1].into(), y[1].into(), 0.into()], + carry: [carry.into(), SrcRef::False.into()], + }); + } else { + b.push_op(OpIAdd2 { + dst: sum[0].into(), + srcs: [x[0].into(), y[0].into()], + carry_out: true, + carry_in: false, + }); + b.push_op(OpIAdd2 { + dst: sum[1].into(), + srcs: [x[1].into(), y[1].into()], + carry_out: false, + carry_in: true, + }); + } sum } else { assert!(alu.def.bit_size() == 32); diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index 75b616a1a27..c79fdd4981e 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -2447,6 +2447,33 @@ impl DisplayOp for OpINeg { } impl_display_for_op!(OpINeg); +/// Only used on SM50 +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpIAdd2 { + pub dst: Dst, + + #[src_type(ALU)] + pub srcs: [Src; 2], + + // TODO: We should probably track this as an SSA value somehow + pub carry_out: bool, + pub carry_in: bool, +} + +impl DisplayOp for OpIAdd2 { + fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "iadd")?; + if self.carry_in { + write!(f, ".x")?; + } + if self.carry_out { + write!(f, ".cc")?; + } + write!(f, " {} {}", self.srcs[0], self.srcs[1]) + } +} + #[repr(C)] #[derive(SrcsAsSlice, DstsAsSlice)] pub struct OpIAdd3 { @@ -4407,6 +4434,7 @@ pub enum Op { Flo(OpFlo), IAbs(OpIAbs), INeg(OpINeg), + IAdd2(OpIAdd2), IAdd3(OpIAdd3), IAdd3X(OpIAdd3X), IDp4(OpIDp4), @@ -4842,6 +4870,7 @@ impl Instr { Op::Brev(_) | Op::Flo(_) | Op::PopC(_) => false, Op::IAbs(_) | Op::INeg(_) + | Op::IAdd2(_) | Op::IAdd3(_) | Op::IAdd3X(_) | Op::IDp4(_) @@ -5414,13 +5443,26 @@ impl Shader { } pub fn lower_ineg(&mut self) { - self.map_instrs(|instr: Box, _| -> MappedInstrs { + let sm = self.info.sm; + self.map_instrs(|mut instr: Box, _| -> MappedInstrs { match instr.op { - Op::INeg(neg) => MappedInstrs::One(Instr::new_boxed(OpIAdd3 { - dst: neg.dst, - overflow: [Dst::None; 2], - srcs: [Src::new_zero(), neg.src.ineg(), Src::new_zero()], - })), + Op::INeg(neg) => { + if sm >= 75 { + instr.op = Op::IAdd3(OpIAdd3 { + dst: neg.dst, + overflow: [Dst::None; 2], + srcs: [0.into(), neg.src.ineg(), 0.into()], + }); + } else { + instr.op = Op::IAdd2(OpIAdd2 { + dst: neg.dst, + srcs: [0.into(), neg.src.ineg()], + carry_in: false, + carry_out: false, + }); + } + MappedInstrs::One(instr) + } _ => MappedInstrs::One(instr), } }) diff --git a/src/nouveau/compiler/nak/legalize.rs b/src/nouveau/compiler/nak/legalize.rs index 316072b7760..2c63ecb028f 100644 --- a/src/nouveau/compiler/nak/legalize.rs +++ b/src/nouveau/compiler/nak/legalize.rs @@ -122,7 +122,7 @@ fn legalize_sm50_instr( Op::Sel(op) => { copy_src_if_not_reg(b, &mut op.srcs[1], RegFile::GPR); } - Op::IAdd3(op) => { + Op::IAdd2(op) => { copy_src_if_not_reg(b, &mut op.srcs[1], RegFile::GPR); } Op::I2F(op) => {