nak: Add some helpers for working with OpPrmt selectors

We had some helpers for this at one point but the old ones were super
clunky and didn't really do what we wanted so they were removed.
However, we have a lot of manual banging in opt_copy_prop and we're
about to add more.  These new helpers will make it all a lot safer.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30230>
This commit is contained in:
Faith Ekstrand 2024-07-16 16:05:03 -05:00 committed by Marge Bot
parent 7e3106fa6f
commit b96d2d4351
2 changed files with 92 additions and 51 deletions

View file

@ -903,6 +903,12 @@ impl From<f32> for SrcRef {
}
}
impl From<PrmtSel> for SrcRef {
fn from(sel: PrmtSel) -> SrcRef {
u32::from(sel.0).into()
}
}
impl From<CBufRef> for SrcRef {
fn from(cb: CBufRef) -> SrcRef {
SrcRef::CBuf(cb)
@ -3659,6 +3665,41 @@ impl DisplayOp for OpMov {
}
impl_display_for_op!(OpMov);
#[derive(Copy, Clone)]
pub struct PrmtSelByte(u8);
impl PrmtSelByte {
pub fn src(&self) -> usize {
((self.0 >> 2) & 0x1).into()
}
pub fn byte(&self) -> usize {
(self.0 & 0x3).into()
}
pub fn msb(&self) -> bool {
(self.0 & 0x8) != 0
}
pub fn fold_u32(&self, u: u32) -> u8 {
let mut sb = (u >> (self.byte() * 8)) as u8;
if self.msb() {
sb = ((sb as i8) >> 7) as u8;
}
sb
}
}
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
pub struct PrmtSel(pub u16);
impl PrmtSel {
pub fn get(&self, byte_idx: usize) -> PrmtSelByte {
assert!(byte_idx < 4);
PrmtSelByte(((self.0 >> (byte_idx * 4)) & 0xf) as u8)
}
}
#[allow(dead_code)]
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
pub enum PrmtMode {
@ -3700,6 +3741,41 @@ pub struct OpPrmt {
pub mode: PrmtMode,
}
impl OpPrmt {
pub fn get_sel(&self) -> Option<PrmtSel> {
// TODO: We could construct a PrmtSel for the other modes but we don't
// use them right now because they're kinda pointless.
if self.mode != PrmtMode::Index {
return None;
}
if let Some(sel) = self.sel.as_u32() {
// The top 16 bits are ignored
Some(PrmtSel(sel as u16))
} else {
None
}
}
pub fn as_u32(&self) -> Option<u32> {
let Some(sel) = self.get_sel() else {
return None;
};
let mut imm = 0_u32;
for b in 0..4 {
let sel_byte = sel.get(b);
let Some(src_u32) = self.srcs[sel_byte.src()].as_u32() else {
return None;
};
let sb = sel_byte.fold_u32(src_u32);
imm |= u32::from(sb) << (b * 8);
}
Some(imm)
}
}
impl DisplayOp for OpPrmt {
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(

View file

@ -37,8 +37,8 @@ struct CopyEntry {
struct PrmtEntry {
bi: usize,
sel: PrmtSel,
srcs: [Src; 2],
selection: u16,
}
enum CopyPropEntry {
@ -73,21 +73,15 @@ impl CopyPropPass {
&mut self,
bi: usize,
dst: SSAValue,
sel: PrmtSel,
srcs: [Src; 2],
selection: u16,
) {
assert!(
srcs[0].src_ref.get_reg().is_none()
&& srcs[1].src_ref.get_reg().is_none()
);
self.ssa_map.insert(
dst,
CopyPropEntry::Prmt(PrmtEntry {
bi,
srcs,
selection,
}),
);
self.ssa_map
.insert(dst, CopyPropEntry::Prmt(PrmtEntry { bi, sel, srcs }));
}
fn add_fp64_copy(&mut self, bi: usize, dst: &SSARef, src: Src) {
@ -277,25 +271,22 @@ impl CopyPropPass {
let mut combined = [0_u8; 4];
for i in 0..4 {
let val = ((entry.selection >> (swizzle_prmt[i] * 4))
& 0xF) as u8;
let prmt_byte = entry.sel.get(swizzle_prmt[i].into());
// If we have a sign extension, we cannot simplify it.
if val & 8 != 0 {
if prmt_byte.msb() {
return;
}
let target_src_idx = val / 4;
// Ensure we are using the same source, we cannot
// combine multiple sources.
if entry_src_idx.is_none() {
entry_src_idx = Some(target_src_idx);
} else if entry_src_idx != Some(target_src_idx) {
entry_src_idx = Some(prmt_byte.src());
} else if entry_src_idx != Some(prmt_byte.src()) {
return;
}
combined[i] = val & 0x3;
combined[i] = prmt_byte.byte().try_into().unwrap();
}
let entry_src_idx = usize::from(entry_src_idx.unwrap());
@ -597,41 +588,15 @@ impl CopyPropPass {
Op::Prmt(prmt) => {
let dst = prmt.dst.as_ssa().unwrap();
assert!(dst.comps() == 1);
if prmt.mode != PrmtMode::Index {
return;
}
let Some(sel) = prmt.sel.as_u32() else {
return;
};
// The top 16 bits are ignored
let sel = sel as u16;
if sel == 0x3210 {
self.add_copy(bi, dst[0], SrcType::GPR, prmt.srcs[0]);
} else if sel == 0x7654 {
self.add_copy(bi, dst[0], SrcType::GPR, prmt.srcs[1]);
} else {
let mut is_imm = true;
let mut imm = 0_u32;
for d in 0..4 {
let s = ((sel >> d * 4) & 0x7) as usize;
let sign = (sel >> d * 4) & 0x8 != 0;
if let Some(u) = prmt.srcs[s / 4].as_u32() {
let mut sb = (u >> (s * 8)) as u8;
if sign {
sb = ((sb as i8) >> 7) as u8;
}
imm |= (sb as u32) << (d * 8);
} else {
is_imm = false;
break;
}
}
if is_imm {
if let Some(sel) = prmt.get_sel() {
if let Some(imm) = prmt.as_u32() {
self.add_copy(bi, dst[0], SrcType::GPR, imm.into());
} else if sel == PrmtSel(0x3210) {
self.add_copy(bi, dst[0], SrcType::GPR, prmt.srcs[0]);
} else if sel == PrmtSel(0x7654) {
self.add_copy(bi, dst[0], SrcType::GPR, prmt.srcs[1]);
} else {
self.add_prmt(bi, dst[0], prmt.srcs, sel);
self.add_prmt(bi, dst[0], sel, prmt.srcs);
}
}
}