diff --git a/src/nouveau/compiler/nak_encode_sm75.rs b/src/nouveau/compiler/nak_encode_sm75.rs index 86b6b5de50a..56d35b9c193 100644 --- a/src/nouveau/compiler/nak_encode_sm75.rs +++ b/src/nouveau/compiler/nak_encode_sm75.rs @@ -1519,6 +1519,16 @@ impl SM75Instr { self.set_field(63..64, not_mod) } + fn encode_prmt(&mut self, op: &OpPrmt) { + self.encode_alu( + 0x16, + Some(op.dst), + ALUSrc::from_src(&op.srcs[0]), + ALUSrc::Imm32(op.selection.inner()), + ALUSrc::from_src(&op.srcs[1]), + ); + } + pub fn encode( instr: &Instr, sm: u8, @@ -1580,6 +1590,7 @@ impl SM75Instr { Op::PopC(op) => si.encode_popc(&op), Op::Brev(op) => si.encode_brev(&op), Op::BFind(op) => si.encode_bfind(&op), + Op::Prmt(op) => si.encode_prmt(&op), _ => panic!("Unhandled instruction"), } diff --git a/src/nouveau/compiler/nak_from_nir.rs b/src/nouveau/compiler/nak_from_nir.rs index 1528530ad21..7444ec14b19 100644 --- a/src/nouveau/compiler/nak_from_nir.rs +++ b/src/nouveau/compiler/nak_from_nir.rs @@ -665,6 +665,57 @@ impl<'a> ShaderFromNir<'a> { b.push_op(pcopy); dst } + nir_op_pack_half_2x16_split => { + assert!(alu.get_src(0).bit_size() == 32); + let low = b.alloc_ssa(RegFile::GPR, 1); + let high = b.alloc_ssa(RegFile::GPR, 1); + + b.push_op(OpF2F { + dst: low.into(), + src: srcs[0], + src_type: FloatType::F32, + dst_type: FloatType::F16, + rnd_mode: FRndMode::NearestEven, + ftz: false, + }); + + let src_bits = usize::from(alu.get_src(1).bit_size()); + let src_type = FloatType::from_bits(src_bits); + assert!(matches!(src_type, FloatType::F32)); + b.push_op(OpF2F { + dst: high.into(), + src: srcs[1], + src_type: FloatType::F32, + dst_type: FloatType::F16, + rnd_mode: FRndMode::NearestEven, + ftz: false, + }); + + let dst = b.alloc_ssa(RegFile::GPR, 1); + b.push_op(OpPrmt { + dst: dst.into(), + srcs: [low.into(), high.into()], + selection: PrmtSelectionEval::from([ + PrmtSelection { + src: PrmtSrc::Byte5, + sign_extend: false, + }, + PrmtSelection { + src: PrmtSrc::Byte4, + sign_extend: false, + }, + PrmtSelection { + src: PrmtSrc::Byte1, + sign_extend: false, + }, + PrmtSelection { + src: PrmtSrc::Byte0, + sign_extend: false, + }, + ]), + }); + dst + } nir_op_u2f32 => { assert!(alu.def.bit_size() == 32); let dst = b.alloc_ssa(RegFile::GPR, 1); diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs index 81ea4dc568e..acc16f67f28 100644 --- a/src/nouveau/compiler/nak_ir.rs +++ b/src/nouveau/compiler/nak_ir.rs @@ -3187,6 +3187,130 @@ impl fmt::Display for OpBFind { } } +#[derive(Copy, Clone, Debug)] +pub enum PrmtSrc { + Byte0 = 0, + Byte1 = 1, + Byte2 = 2, + Byte3 = 3, + Byte4 = 4, + Byte5 = 5, + Byte6 = 6, + Byte7 = 7, +} + +impl TryFrom for PrmtSrc { + type Error = String; + + fn try_from(value: u32) -> Result { + match value { + 0 => Ok(Self::Byte0), + 1 => Ok(Self::Byte1), + 2 => Ok(Self::Byte2), + 3 => Ok(Self::Byte3), + 4 => Ok(Self::Byte4), + 5 => Ok(Self::Byte5), + 6 => Ok(Self::Byte6), + 7 => Ok(Self::Byte7), + _ => Err(format!("Invalid value {}", value)), + } + } +} + +#[derive(Copy, Clone, Debug)] +pub struct PrmtSelection { + pub src: PrmtSrc, + pub sign_extend: bool, +} + +impl From for [PrmtSelection; 4] { + fn from(value: PrmtSelectionEval) -> Self { + let sel0 = value.0 & 0x7; + let sel1 = (value.0 & 0x70) >> 4; + let sel2 = (value.0 & 0x700) >> 8; + let sel3 = (value.0 & 0x7000) >> 12; + + let sign0 = value.0 & 0x8; + let sign1 = value.0 & 0x80; + let sign2 = value.0 & 0x800; + let sign3 = value.0 & 0x8000; + + [ + PrmtSelection { + src: sel3.try_into().unwrap(), + sign_extend: sign3 != 0, + }, + PrmtSelection { + src: sel2.try_into().unwrap(), + sign_extend: sign2 != 0, + }, + PrmtSelection { + src: sel1.try_into().unwrap(), + sign_extend: sign1 != 0, + }, + PrmtSelection { + src: sel0.try_into().unwrap(), + sign_extend: sign0 != 0, + }, + ] + } +} + +#[derive(Copy, Clone, Debug)] +pub struct PrmtSelectionEval(u32); + +impl PrmtSelectionEval { + pub fn inner(&self) -> u32 { + self.0 + } +} + +impl From<[PrmtSelection; 4]> for PrmtSelectionEval { + fn from(selections: [PrmtSelection; 4]) -> Self { + let mut selection = 0; + + for v in selections { + let src = if v.sign_extend { + v.src as u32 | 0x8 + } else { + v.src as u32 + }; + selection = selection << 4 | src; + } + + Self(selection) + } +} + +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +/// Permutes `srcs` into `dst` using `selection`. +pub struct OpPrmt { + pub dst: Dst, + + #[src_type(ALU)] + pub srcs: [Src; 2], + + pub selection: PrmtSelectionEval, +} + +impl fmt::Display for OpPrmt { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let sel: [PrmtSelection; 4] = self.selection.into(); + write!( + f, + "PRMT {}, {} [{:?}, {:?}, {:?}, {:?}], {}", + self.dst, + self.srcs[0], + sel[0].src, + sel[1].src, + sel[2].src, + sel[3].src, + self.srcs[1], + ) + } +} + #[derive(Display, DstsAsSlice, SrcsAsSlice, FromVariants)] pub enum Op { FAdd(OpFAdd), @@ -3244,6 +3368,7 @@ pub enum Op { PopC(OpPopC), Brev(OpBrev), BFind(OpBFind), + Prmt(OpPrmt), } #[derive(Clone, Copy, Eq, Hash, PartialEq)] @@ -3560,7 +3685,7 @@ impl Instr { | Op::FSOut(_) => { panic!("Not a hardware opcode") } - Op::PopC(_) | Op::Brev(_) | Op::BFind(_) => Some(15), + Op::PopC(_) | Op::Brev(_) | Op::BFind(_) | Op::Prmt(_) => Some(15), } } } diff --git a/src/nouveau/compiler/nak_legalize.rs b/src/nouveau/compiler/nak_legalize.rs index b44cd6f0373..d082c655705 100644 --- a/src/nouveau/compiler/nak_legalize.rs +++ b/src/nouveau/compiler/nak_legalize.rs @@ -189,7 +189,8 @@ impl<'a> LegalizeInstr<'a> { | Op::FRnd(_) | Op::PopC(_) | Op::Brev(_) - | Op::BFind(_) => (), + | Op::BFind(_) + | Op::Prmt(_) => (), Op::Sel(op) => { let [ref mut src0, ref mut src1] = op.srcs; if !src_is_reg(src0) && src_is_reg(src1) {