nak: add support for packhalf2x16_split

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24998>
This commit is contained in:
Daniel Almeida 2023-07-24 17:38:15 -03:00 committed by Marge Bot
parent 1f10cdbbbe
commit 59c05e16e4
4 changed files with 190 additions and 2 deletions

View file

@ -1519,6 +1519,16 @@ impl SM75Instr {
self.set_field(63..64, not_mod)
}
fn encode_prmt(&mut self, op: &OpPrmt) {
self.encode_alu(
0x16,
Some(op.dst),
ALUSrc::from_src(&op.srcs[0]),
ALUSrc::Imm32(op.selection.inner()),
ALUSrc::from_src(&op.srcs[1]),
);
}
pub fn encode(
instr: &Instr,
sm: u8,
@ -1580,6 +1590,7 @@ impl SM75Instr {
Op::PopC(op) => si.encode_popc(&op),
Op::Brev(op) => si.encode_brev(&op),
Op::BFind(op) => si.encode_bfind(&op),
Op::Prmt(op) => si.encode_prmt(&op),
_ => panic!("Unhandled instruction"),
}

View file

@ -665,6 +665,57 @@ impl<'a> ShaderFromNir<'a> {
b.push_op(pcopy);
dst
}
nir_op_pack_half_2x16_split => {
assert!(alu.get_src(0).bit_size() == 32);
let low = b.alloc_ssa(RegFile::GPR, 1);
let high = b.alloc_ssa(RegFile::GPR, 1);
b.push_op(OpF2F {
dst: low.into(),
src: srcs[0],
src_type: FloatType::F32,
dst_type: FloatType::F16,
rnd_mode: FRndMode::NearestEven,
ftz: false,
});
let src_bits = usize::from(alu.get_src(1).bit_size());
let src_type = FloatType::from_bits(src_bits);
assert!(matches!(src_type, FloatType::F32));
b.push_op(OpF2F {
dst: high.into(),
src: srcs[1],
src_type: FloatType::F32,
dst_type: FloatType::F16,
rnd_mode: FRndMode::NearestEven,
ftz: false,
});
let dst = b.alloc_ssa(RegFile::GPR, 1);
b.push_op(OpPrmt {
dst: dst.into(),
srcs: [low.into(), high.into()],
selection: PrmtSelectionEval::from([
PrmtSelection {
src: PrmtSrc::Byte5,
sign_extend: false,
},
PrmtSelection {
src: PrmtSrc::Byte4,
sign_extend: false,
},
PrmtSelection {
src: PrmtSrc::Byte1,
sign_extend: false,
},
PrmtSelection {
src: PrmtSrc::Byte0,
sign_extend: false,
},
]),
});
dst
}
nir_op_u2f32 => {
assert!(alu.def.bit_size() == 32);
let dst = b.alloc_ssa(RegFile::GPR, 1);

View file

@ -3187,6 +3187,130 @@ impl fmt::Display for OpBFind {
}
}
#[derive(Copy, Clone, Debug)]
pub enum PrmtSrc {
Byte0 = 0,
Byte1 = 1,
Byte2 = 2,
Byte3 = 3,
Byte4 = 4,
Byte5 = 5,
Byte6 = 6,
Byte7 = 7,
}
impl TryFrom<u32> for PrmtSrc {
type Error = String;
fn try_from(value: u32) -> Result<Self, Self::Error> {
match value {
0 => Ok(Self::Byte0),
1 => Ok(Self::Byte1),
2 => Ok(Self::Byte2),
3 => Ok(Self::Byte3),
4 => Ok(Self::Byte4),
5 => Ok(Self::Byte5),
6 => Ok(Self::Byte6),
7 => Ok(Self::Byte7),
_ => Err(format!("Invalid value {}", value)),
}
}
}
#[derive(Copy, Clone, Debug)]
pub struct PrmtSelection {
pub src: PrmtSrc,
pub sign_extend: bool,
}
impl From<PrmtSelectionEval> for [PrmtSelection; 4] {
fn from(value: PrmtSelectionEval) -> Self {
let sel0 = value.0 & 0x7;
let sel1 = (value.0 & 0x70) >> 4;
let sel2 = (value.0 & 0x700) >> 8;
let sel3 = (value.0 & 0x7000) >> 12;
let sign0 = value.0 & 0x8;
let sign1 = value.0 & 0x80;
let sign2 = value.0 & 0x800;
let sign3 = value.0 & 0x8000;
[
PrmtSelection {
src: sel3.try_into().unwrap(),
sign_extend: sign3 != 0,
},
PrmtSelection {
src: sel2.try_into().unwrap(),
sign_extend: sign2 != 0,
},
PrmtSelection {
src: sel1.try_into().unwrap(),
sign_extend: sign1 != 0,
},
PrmtSelection {
src: sel0.try_into().unwrap(),
sign_extend: sign0 != 0,
},
]
}
}
#[derive(Copy, Clone, Debug)]
pub struct PrmtSelectionEval(u32);
impl PrmtSelectionEval {
pub fn inner(&self) -> u32 {
self.0
}
}
impl From<[PrmtSelection; 4]> for PrmtSelectionEval {
fn from(selections: [PrmtSelection; 4]) -> Self {
let mut selection = 0;
for v in selections {
let src = if v.sign_extend {
v.src as u32 | 0x8
} else {
v.src as u32
};
selection = selection << 4 | src;
}
Self(selection)
}
}
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
/// Permutes `srcs` into `dst` using `selection`.
pub struct OpPrmt {
pub dst: Dst,
#[src_type(ALU)]
pub srcs: [Src; 2],
pub selection: PrmtSelectionEval,
}
impl fmt::Display for OpPrmt {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let sel: [PrmtSelection; 4] = self.selection.into();
write!(
f,
"PRMT {}, {} [{:?}, {:?}, {:?}, {:?}], {}",
self.dst,
self.srcs[0],
sel[0].src,
sel[1].src,
sel[2].src,
sel[3].src,
self.srcs[1],
)
}
}
#[derive(Display, DstsAsSlice, SrcsAsSlice, FromVariants)]
pub enum Op {
FAdd(OpFAdd),
@ -3244,6 +3368,7 @@ pub enum Op {
PopC(OpPopC),
Brev(OpBrev),
BFind(OpBFind),
Prmt(OpPrmt),
}
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
@ -3560,7 +3685,7 @@ impl Instr {
| Op::FSOut(_) => {
panic!("Not a hardware opcode")
}
Op::PopC(_) | Op::Brev(_) | Op::BFind(_) => Some(15),
Op::PopC(_) | Op::Brev(_) | Op::BFind(_) | Op::Prmt(_) => Some(15),
}
}
}

View file

@ -189,7 +189,8 @@ impl<'a> LegalizeInstr<'a> {
| Op::FRnd(_)
| Op::PopC(_)
| Op::Brev(_)
| Op::BFind(_) => (),
| Op::BFind(_)
| Op::Prmt(_) => (),
Op::Sel(op) => {
let [ref mut src0, ref mut src1] = op.srcs;
if !src_is_reg(src0) && src_is_reg(src1) {