diff --git a/src/nouveau/compiler/nak/builder.rs b/src/nouveau/compiler/nak/builder.rs index 3eec07369a8..8f5b9f1fca3 100644 --- a/src/nouveau/compiler/nak/builder.rs +++ b/src/nouveau/compiler/nak/builder.rs @@ -300,11 +300,45 @@ pub trait SSABuilder: Builder { fn imul(&mut self, x: Src, y: Src) -> SSARef { let dst = self.alloc_ssa(RegFile::GPR, 1); - self.push_op(OpIMad { - dst: dst.into(), - srcs: [x, y, 0.into()], - signed: false, - }); + if self.sm() > 70 { + self.push_op(OpIMad { + dst: dst.into(), + srcs: [x, y, 0.into()], + signed: false, + }); + } else { + self.push_op(OpIMul { + dst: dst[0].into(), + srcs: [x, y], + signed: [false; 2], + high: false, + }); + } + dst + } + + fn imul_2x32_64(&mut self, x: Src, y: Src, signed: bool) -> SSARef { + let dst = self.alloc_ssa(RegFile::GPR, 2); + if self.sm() > 70 { + self.push_op(OpIMad64 { + dst: dst.into(), + srcs: [x, y, 0.into()], + signed, + }); + } else { + self.push_op(OpIMul { + dst: dst[0].into(), + srcs: [x, y], + signed: [signed; 2], + high: false, + }); + self.push_op(OpIMul { + dst: dst[1].into(), + srcs: [x, y], + signed: [signed; 2], + high: true, + }); + } dst } diff --git a/src/nouveau/compiler/nak/encode_sm50.rs b/src/nouveau/compiler/nak/encode_sm50.rs index 5d06ce3fd3f..f8dac3b9c18 100644 --- a/src/nouveau/compiler/nak/encode_sm50.rs +++ b/src/nouveau/compiler/nak/encode_sm50.rs @@ -712,6 +712,43 @@ impl SM50Instr { self.set_dst(op.dst); } + fn encode_imul(&mut self, op: &OpIMul) { + assert!(op.srcs[0].src_mod.is_none()); + assert!(op.srcs[1].src_mod.is_none()); + + self.set_dst(op.dst); + self.set_reg_src(8..16, op.srcs[0]); + + if let Some(i) = op.srcs[1].as_imm_not_i20() { + self.set_opcode(0x1fc0); + self.set_src_imm32(20..52, i); + + self.set_bit(53, op.high); + self.set_bit(54, op.signed[0]); + self.set_bit(55, op.signed[1]); + } else { + match op.srcs[1].src_ref { + SrcRef::Zero | SrcRef::Reg(_) => { + self.set_opcode(0x5c38); + self.set_reg_src(20..28, op.srcs[1]); + } + SrcRef::Imm32(i) => { + self.set_opcode(0x3838); + self.set_src_imm_i20(20..39, 56, i); + } + SrcRef::CBuf(cb) => { + self.set_opcode(0x4c38); + self.set_src_cb(20..39, &cb); + } + src1 => panic!("unsupported src1 type for IMUL: {src1}"), + }; + + self.set_bit(39, op.high); + self.set_bit(40, op.signed[0]); + self.set_bit(41, op.signed[1]); + } + } + fn encode_f2i(&mut self, op: &OpF2I) { match &op.src.src_ref { SrcRef::Zero | SrcRef::Reg(_) => { @@ -1656,6 +1693,7 @@ impl SM50Instr { Op::I2F(op) => si.encode_i2f(&op), Op::FRnd(op) => si.encode_frnd(&op), Op::IMad(op) => si.encode_imad(&op), + Op::IMul(op) => si.encode_imul(&op), Op::IMnMx(op) => si.encode_imnmx(&op), Op::ISetP(op) => si.encode_isetp(&op), Op::Tex(op) => si.encode_tex(&op), diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index 31fbce44e90..069760569cd 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -810,22 +810,13 @@ impl<'a> ShaderFromNir<'a> { b.imul(srcs[0], srcs[1]) } nir_op_imul_2x32_64 | nir_op_umul_2x32_64 => { - let dst = b.alloc_ssa(RegFile::GPR, 2); - b.push_op(OpIMad64 { - dst: dst.into(), - srcs: [srcs[0], srcs[1], 0.into()], - signed: alu.op == nir_op_imul_2x32_64, - }); - dst + let signed = alu.op == nir_op_imul_2x32_64; + b.imul_2x32_64(srcs[0], srcs[1], signed) } nir_op_imul_high | nir_op_umul_high => { - let dst = b.alloc_ssa(RegFile::GPR, 2); - b.push_op(OpIMad64 { - dst: dst.into(), - srcs: [srcs[0], srcs[1], 0.into()], - signed: alu.op == nir_op_imul_high, - }); - dst[1].into() + let signed = alu.op == nir_op_imul_high; + let dst64 = b.imul_2x32_64(srcs[0], srcs[1], signed); + dst64[1].into() } nir_op_ine => { if alu.get_src(0).bit_size() == 1 { diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index 3103cb0b2d4..a8445313495 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -2630,6 +2630,36 @@ impl DisplayOp for OpIMad { } impl_display_for_op!(OpIMad); +/// Only used on SM50 +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpIMul { + pub dst: Dst, + + #[src_type(ALU)] + pub srcs: [Src; 2], + + pub signed: [bool; 2], + pub high: bool, +} + +impl DisplayOp for OpIMul { + fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "imul")?; + if self.high { + write!(f, ".hi")?; + } + let src_type = |signed| if signed { ".s32" } else { ".u32" }; + write!( + f, + "{}{}", + src_type(self.signed[0]), + src_type(self.signed[1]) + )?; + write!(f, " {} {}", self.srcs[0], self.srcs[1]) + } +} + #[repr(C)] #[derive(SrcsAsSlice, DstsAsSlice)] pub struct OpIMad64 { @@ -4559,6 +4589,7 @@ pub enum Op { IDp4(OpIDp4), IMad(OpIMad), IMad64(OpIMad64), + IMul(OpIMul), IMnMx(OpIMnMx), ISetP(OpISetP), Lop2(OpLop2), @@ -4997,6 +5028,7 @@ impl Instr { | Op::IDp4(_) | Op::IMad(_) | Op::IMad64(_) + | Op::IMul(_) | Op::IMnMx(_) | Op::ISetP(_) | Op::Lop2(_) diff --git a/src/nouveau/compiler/nak/legalize.rs b/src/nouveau/compiler/nak/legalize.rs index c9a929b625e..40e6aad1227 100644 --- a/src/nouveau/compiler/nak/legalize.rs +++ b/src/nouveau/compiler/nak/legalize.rs @@ -154,6 +154,13 @@ fn legalize_sm50_instr( copy_src_if_not_reg(b, &mut op.srcs[1], RegFile::GPR); copy_src_if_not_reg(b, &mut op.srcs[2], RegFile::GPR); } + Op::IMul(op) => { + let [ref mut src0, ref mut src1] = op.srcs; + if swap_srcs_if_not_reg(src0, src1) { + op.signed.swap(0, 1); + } + copy_src_if_not_reg(b, src0, RegFile::GPR); + } Op::F2I(op) => { copy_src_if_not_reg(b, &mut op.src, RegFile::GPR); }