diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index d2df2c60af5..1d823773849 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -903,6 +903,12 @@ impl From for SrcRef { } } +impl From for SrcRef { + fn from(sel: PrmtSel) -> SrcRef { + u32::from(sel.0).into() + } +} + impl From for SrcRef { fn from(cb: CBufRef) -> SrcRef { SrcRef::CBuf(cb) @@ -3659,6 +3665,41 @@ impl DisplayOp for OpMov { } impl_display_for_op!(OpMov); +#[derive(Copy, Clone)] +pub struct PrmtSelByte(u8); + +impl PrmtSelByte { + pub fn src(&self) -> usize { + ((self.0 >> 2) & 0x1).into() + } + + pub fn byte(&self) -> usize { + (self.0 & 0x3).into() + } + + pub fn msb(&self) -> bool { + (self.0 & 0x8) != 0 + } + + pub fn fold_u32(&self, u: u32) -> u8 { + let mut sb = (u >> (self.byte() * 8)) as u8; + if self.msb() { + sb = ((sb as i8) >> 7) as u8; + } + sb + } +} + +#[derive(Clone, Copy, Eq, Hash, PartialEq)] +pub struct PrmtSel(pub u16); + +impl PrmtSel { + pub fn get(&self, byte_idx: usize) -> PrmtSelByte { + assert!(byte_idx < 4); + PrmtSelByte(((self.0 >> (byte_idx * 4)) & 0xf) as u8) + } +} + #[allow(dead_code)] #[derive(Clone, Copy, Eq, Hash, PartialEq)] pub enum PrmtMode { @@ -3700,6 +3741,41 @@ pub struct OpPrmt { pub mode: PrmtMode, } +impl OpPrmt { + pub fn get_sel(&self) -> Option { + // TODO: We could construct a PrmtSel for the other modes but we don't + // use them right now because they're kinda pointless. + if self.mode != PrmtMode::Index { + return None; + } + + if let Some(sel) = self.sel.as_u32() { + // The top 16 bits are ignored + Some(PrmtSel(sel as u16)) + } else { + None + } + } + + pub fn as_u32(&self) -> Option { + let Some(sel) = self.get_sel() else { + return None; + }; + + let mut imm = 0_u32; + for b in 0..4 { + let sel_byte = sel.get(b); + let Some(src_u32) = self.srcs[sel_byte.src()].as_u32() else { + return None; + }; + + let sb = sel_byte.fold_u32(src_u32); + imm |= u32::from(sb) << (b * 8); + } + Some(imm) + } +} + impl DisplayOp for OpPrmt { fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( diff --git a/src/nouveau/compiler/nak/opt_copy_prop.rs b/src/nouveau/compiler/nak/opt_copy_prop.rs index e9d9292a874..aaae7bd8581 100644 --- a/src/nouveau/compiler/nak/opt_copy_prop.rs +++ b/src/nouveau/compiler/nak/opt_copy_prop.rs @@ -37,8 +37,8 @@ struct CopyEntry { struct PrmtEntry { bi: usize, + sel: PrmtSel, srcs: [Src; 2], - selection: u16, } enum CopyPropEntry { @@ -73,21 +73,15 @@ impl CopyPropPass { &mut self, bi: usize, dst: SSAValue, + sel: PrmtSel, srcs: [Src; 2], - selection: u16, ) { assert!( srcs[0].src_ref.get_reg().is_none() && srcs[1].src_ref.get_reg().is_none() ); - self.ssa_map.insert( - dst, - CopyPropEntry::Prmt(PrmtEntry { - bi, - srcs, - selection, - }), - ); + self.ssa_map + .insert(dst, CopyPropEntry::Prmt(PrmtEntry { bi, sel, srcs })); } fn add_fp64_copy(&mut self, bi: usize, dst: &SSARef, src: Src) { @@ -277,25 +271,22 @@ impl CopyPropPass { let mut combined = [0_u8; 4]; for i in 0..4 { - let val = ((entry.selection >> (swizzle_prmt[i] * 4)) - & 0xF) as u8; + let prmt_byte = entry.sel.get(swizzle_prmt[i].into()); // If we have a sign extension, we cannot simplify it. - if val & 8 != 0 { + if prmt_byte.msb() { return; } - let target_src_idx = val / 4; - // Ensure we are using the same source, we cannot // combine multiple sources. if entry_src_idx.is_none() { - entry_src_idx = Some(target_src_idx); - } else if entry_src_idx != Some(target_src_idx) { + entry_src_idx = Some(prmt_byte.src()); + } else if entry_src_idx != Some(prmt_byte.src()) { return; } - combined[i] = val & 0x3; + combined[i] = prmt_byte.byte().try_into().unwrap(); } let entry_src_idx = usize::from(entry_src_idx.unwrap()); @@ -597,41 +588,15 @@ impl CopyPropPass { Op::Prmt(prmt) => { let dst = prmt.dst.as_ssa().unwrap(); assert!(dst.comps() == 1); - if prmt.mode != PrmtMode::Index { - return; - } - let Some(sel) = prmt.sel.as_u32() else { - return; - }; - - // The top 16 bits are ignored - let sel = sel as u16; - - if sel == 0x3210 { - self.add_copy(bi, dst[0], SrcType::GPR, prmt.srcs[0]); - } else if sel == 0x7654 { - self.add_copy(bi, dst[0], SrcType::GPR, prmt.srcs[1]); - } else { - let mut is_imm = true; - let mut imm = 0_u32; - for d in 0..4 { - let s = ((sel >> d * 4) & 0x7) as usize; - let sign = (sel >> d * 4) & 0x8 != 0; - if let Some(u) = prmt.srcs[s / 4].as_u32() { - let mut sb = (u >> (s * 8)) as u8; - if sign { - sb = ((sb as i8) >> 7) as u8; - } - imm |= (sb as u32) << (d * 8); - } else { - is_imm = false; - break; - } - } - if is_imm { + if let Some(sel) = prmt.get_sel() { + if let Some(imm) = prmt.as_u32() { self.add_copy(bi, dst[0], SrcType::GPR, imm.into()); + } else if sel == PrmtSel(0x3210) { + self.add_copy(bi, dst[0], SrcType::GPR, prmt.srcs[0]); + } else if sel == PrmtSel(0x7654) { + self.add_copy(bi, dst[0], SrcType::GPR, prmt.srcs[1]); } else { - self.add_prmt(bi, dst[0], prmt.srcs, sel); + self.add_prmt(bi, dst[0], sel, prmt.srcs); } } }