diff --git a/src/nouveau/compiler/nak/builder.rs b/src/nouveau/compiler/nak/builder.rs index 0f377799246..3eec07369a8 100644 --- a/src/nouveau/compiler/nak/builder.rs +++ b/src/nouveau/compiler/nak/builder.rs @@ -121,6 +121,55 @@ pub trait Builder { pub trait SSABuilder: Builder { fn alloc_ssa(&mut self, file: RegFile, comps: u8) -> SSARef; + fn shl(&mut self, x: Src, shift: Src) -> SSARef { + let dst = self.alloc_ssa(RegFile::GPR, 1); + if self.sm() >= 70 { + self.push_op(OpShf { + dst: dst.into(), + low: x, + high: 0.into(), + shift: shift, + right: false, + wrap: false, + data_type: IntType::I32, + dst_high: false, + }); + } else { + self.push_op(OpShl { + dst: dst.into(), + src: x, + shift: shift, + wrap: false, + }); + } + dst + } + + fn shr(&mut self, x: Src, shift: Src, signed: bool) -> SSARef { + let dst = self.alloc_ssa(RegFile::GPR, 1); + if self.sm() >= 70 { + self.push_op(OpShf { + dst: dst.into(), + low: 0.into(), + high: x, + shift: shift, + right: true, + wrap: false, + data_type: if signed { IntType::I32 } else { IntType::U32 }, + dst_high: true, + }); + } else { + self.push_op(OpShr { + dst: dst.into(), + src: x, + shift: shift, + wrap: false, + signed, + }); + } + dst + } + fn fadd(&mut self, x: Src, y: Src) -> SSARef { let dst = self.alloc_ssa(RegFile::GPR, 1); self.push_op(OpFAdd { diff --git a/src/nouveau/compiler/nak/encode_sm50.rs b/src/nouveau/compiler/nak/encode_sm50.rs index 48b28361350..5d06ce3fd3f 100644 --- a/src/nouveau/compiler/nak/encode_sm50.rs +++ b/src/nouveau/compiler/nak/encode_sm50.rs @@ -544,6 +544,51 @@ impl SM50Instr { self.set_bit(50, op.wrap); } + fn encode_shl(&mut self, op: &OpShl) { + self.set_dst(op.dst); + self.set_reg_src(8..16, op.src); + match op.shift.src_ref { + SrcRef::Zero | SrcRef::Reg(_) => { + self.set_opcode(0x5c48); + self.set_reg_src(20..28, op.shift); + } + SrcRef::Imm32(i) => { + self.set_opcode(0x3848); + self.set_src_imm_i20(20..39, 56, i); + } + SrcRef::CBuf(cb) => { + self.set_opcode(0x4c48); + self.set_src_cb(20..39, &cb); + } + src1 => panic!("unsupported src1 type for SHL: {src1}"), + } + + self.set_bit(39, op.wrap); + } + + fn encode_shr(&mut self, op: &OpShr) { + self.set_dst(op.dst); + self.set_reg_src(8..16, op.src); + match op.shift.src_ref { + SrcRef::Zero | SrcRef::Reg(_) => { + self.set_opcode(0x5c28); + self.set_reg_src(20..28, op.shift); + } + SrcRef::Imm32(i) => { + self.set_opcode(0x3828); + self.set_src_imm_i20(20..39, 56, i); + } + SrcRef::CBuf(cb) => { + self.set_opcode(0x4c28); + self.set_src_cb(20..39, &cb); + } + src1 => panic!("unsupported src1 type for SHL: {src1}"), + } + + self.set_bit(39, op.wrap); + self.set_bit(48, op.signed); + } + fn encode_i2f(&mut self, op: &OpI2F) { let abs_bit = 49; let neg_bit = 45; @@ -1604,6 +1649,8 @@ impl SM50Instr { Op::St(op) => si.encode_st(&op), Op::Lop2(op) => si.encode_lop2(&op), Op::Shf(op) => si.encode_shf(&op), + Op::Shl(op) => si.encode_shl(&op), + Op::Shr(op) => si.encode_shr(&op), Op::F2F(op) => si.encode_f2f(&op), Op::F2I(op) => si.encode_f2i(&op), Op::I2F(op) => si.encode_i2f(&op), diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index baad78bb07c..31fbce44e90 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -899,18 +899,7 @@ impl<'a> ShaderFromNir<'a> { dst } else { assert!(alu.def.bit_size() == 32); - let dst = b.alloc_ssa(RegFile::GPR, 1); - b.push_op(OpShf { - dst: dst.into(), - low: x.into(), - high: 0.into(), - shift: shift, - right: false, - wrap: true, - data_type: IntType::U32, - dst_high: false, - }); - dst + b.shl(srcs[0], srcs[1]) } } nir_op_ishr => { @@ -944,18 +933,7 @@ impl<'a> ShaderFromNir<'a> { dst } else { assert!(alu.def.bit_size() == 32); - let dst = b.alloc_ssa(RegFile::GPR, 1); - b.push_op(OpShf { - dst: dst.into(), - low: 0.into(), - high: x.into(), - shift: shift, - right: true, - wrap: true, - data_type: IntType::I32, - dst_high: true, - }); - dst + b.shr(srcs[0], srcs[1], true) } } nir_op_ixor => b.lop2(LogicOp2::Xor, srcs[0], srcs[1]), @@ -1153,18 +1131,7 @@ impl<'a> ShaderFromNir<'a> { dst } else { assert!(alu.def.bit_size() == 32); - let dst = b.alloc_ssa(RegFile::GPR, 1); - b.push_op(OpShf { - dst: dst.into(), - low: x.into(), - high: 0.into(), - shift: shift, - right: true, - wrap: true, - data_type: IntType::U32, - dst_high: false, - }); - dst + b.shr(srcs[0], srcs[1], false) } } nir_op_fddx | nir_op_fddx_coarse | nir_op_fddx_fine => { diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index 661dc5ea0ff..3103cb0b2d4 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -2816,6 +2816,60 @@ impl DisplayOp for OpShf { } impl_display_for_op!(OpShf); +/// Only used on SM50 +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpShl { + pub dst: Dst, + + #[src_type(GPR)] + pub src: Src, + + #[src_type(ALU)] + pub shift: Src, + + pub wrap: bool, +} + +impl DisplayOp for OpShl { + fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "shl")?; + if self.wrap { + write!(f, ".w")?; + } + write!(f, " {} {}", self.src, self.shift) + } +} + +/// Only used on SM50 +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpShr { + pub dst: Dst, + + #[src_type(GPR)] + pub src: Src, + + #[src_type(ALU)] + pub shift: Src, + + pub wrap: bool, + pub signed: bool, +} + +impl DisplayOp for OpShr { + fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "shr")?; + if self.wrap { + write!(f, ".w")?; + } + if !self.signed { + write!(f, ".u32")?; + } + write!(f, " {} {}", self.src, self.shift) + } +} + #[repr(C)] #[derive(DstsAsSlice)] pub struct OpF2F { @@ -4511,6 +4565,8 @@ pub enum Op { Lop3(OpLop3), PopC(OpPopC), Shf(OpShf), + Shl(OpShl), + Shr(OpShr), F2F(OpF2F), F2I(OpF2I), I2F(OpI2F), @@ -4945,7 +5001,9 @@ impl Instr { | Op::ISetP(_) | Op::Lop2(_) | Op::Lop3(_) - | Op::Shf(_) => true, + | Op::Shf(_) + | Op::Shl(_) + | Op::Shr(_) => true, // Conversions are variable latency?!? Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::FRnd(_) => false, diff --git a/src/nouveau/compiler/nak/legalize.rs b/src/nouveau/compiler/nak/legalize.rs index d6143538b15..c9a929b625e 100644 --- a/src/nouveau/compiler/nak/legalize.rs +++ b/src/nouveau/compiler/nak/legalize.rs @@ -72,6 +72,16 @@ fn swap_srcs_if_not_reg(x: &mut Src, y: &mut Src) -> bool { } } +fn copy_src_if_i20_overflow( + b: &mut impl SSABuilder, + src: &mut Src, + file: RegFile, +) { + if src.as_imm_not_i20().is_some() { + copy_src(b, src, file); + } +} + fn legalize_sm50_instr( b: &mut impl SSABuilder, _bl: &impl BlockLiveness, @@ -83,6 +93,14 @@ fn legalize_sm50_instr( copy_src_if_not_reg(b, &mut op.shift, RegFile::GPR); copy_src_if_not_reg(b, &mut op.high, RegFile::GPR); } + Op::Shl(op) => { + copy_src_if_not_reg(b, &mut op.src, RegFile::GPR); + copy_src_if_i20_overflow(b, &mut op.shift, RegFile::GPR); + } + Op::Shr(op) => { + copy_src_if_not_reg(b, &mut op.src, RegFile::GPR); + copy_src_if_i20_overflow(b, &mut op.shift, RegFile::GPR); + } Op::FAdd(op) => { let [ref mut src0, ref mut src1] = op.srcs; swap_srcs_if_not_reg(src0, src1);