mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-23 22:40:34 +01:00
nak/sm20: Add more memory ops
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34616>
This commit is contained in:
parent
84f18f31ad
commit
5a140e7c3e
1 changed files with 179 additions and 0 deletions
|
|
@ -1862,6 +1862,119 @@ impl SM20Op for OpSt {
|
|||
}
|
||||
}
|
||||
|
||||
fn atom_src_as_ssa(
|
||||
b: &mut LegalizeBuilder,
|
||||
src: Src,
|
||||
atom_type: AtomType,
|
||||
) -> SSARef {
|
||||
if let Some(ssa) = src.as_ssa() {
|
||||
return *ssa;
|
||||
}
|
||||
|
||||
let tmp;
|
||||
if atom_type.bits() == 32 {
|
||||
tmp = b.alloc_ssa(RegFile::GPR, 1);
|
||||
b.copy_to(tmp.into(), 0.into());
|
||||
} else {
|
||||
debug_assert!(atom_type.bits() == 64);
|
||||
tmp = b.alloc_ssa(RegFile::GPR, 2);
|
||||
b.copy_to(tmp[0].into(), 0.into());
|
||||
b.copy_to(tmp[1].into(), 0.into());
|
||||
}
|
||||
tmp
|
||||
}
|
||||
|
||||
impl SM20Op for OpAtom {
|
||||
fn legalize(&mut self, b: &mut LegalizeBuilder) {
|
||||
if self.atom_op == AtomOp::CmpExch(AtomCmpSrc::Separate) {
|
||||
let cmpr = atom_src_as_ssa(b, self.cmpr, self.atom_type);
|
||||
let data = atom_src_as_ssa(b, self.data, self.atom_type);
|
||||
|
||||
let mut cmpr_data = Vec::new();
|
||||
cmpr_data.extend_from_slice(&cmpr);
|
||||
cmpr_data.extend_from_slice(&data);
|
||||
let cmpr_data = SSARef::try_from(cmpr_data).unwrap();
|
||||
|
||||
self.cmpr = 0.into();
|
||||
self.data = cmpr_data.into();
|
||||
self.atom_op = AtomOp::CmpExch(AtomCmpSrc::Packed);
|
||||
}
|
||||
legalize_ext_instr(self, b);
|
||||
}
|
||||
|
||||
fn encode(&self, e: &mut SM20Encoder<'_>) {
|
||||
let MemSpace::Global(addr_type) = self.mem_space else {
|
||||
panic!("SM20 only supports global atomics");
|
||||
};
|
||||
assert!(addr_type == MemAddrType::A64);
|
||||
|
||||
if self.dst.is_none() {
|
||||
e.set_opcode(SM20Unit::Mem, 0x1);
|
||||
} else {
|
||||
e.set_opcode(SM20Unit::Mem, 0x11);
|
||||
}
|
||||
|
||||
let op = match self.atom_op {
|
||||
AtomOp::Add => 0_u8,
|
||||
AtomOp::Min => 1_u8,
|
||||
AtomOp::Max => 2_u8,
|
||||
AtomOp::Inc => 3_u8,
|
||||
AtomOp::Dec => 4_u8,
|
||||
AtomOp::And => 5_u8,
|
||||
AtomOp::Or => 6_u8,
|
||||
AtomOp::Xor => 7_u8,
|
||||
AtomOp::Exch => 8_u8,
|
||||
AtomOp::CmpExch(_) => 9_u8,
|
||||
};
|
||||
e.set_field(5..9, op);
|
||||
|
||||
let typ = match self.atom_type {
|
||||
AtomType::F16x2 => panic!("Unsupported atomic type"),
|
||||
// AtomType::U8 => 0x0_u8,
|
||||
// AtomType::I8 => 0x1_u8,
|
||||
// AtomType::U16 => 0x2_u8,
|
||||
// AtomType::I16 => 0x3_u8,
|
||||
AtomType::U32 => 0x4_u8,
|
||||
AtomType::U64 => 0x5_u8,
|
||||
//AtomType::U128 => 0x6_u8,
|
||||
AtomType::I32 => 0x7_u8,
|
||||
AtomType::I64 => 0x8_u8,
|
||||
//AtomType::I128 => 0x9_u8,
|
||||
//AtomType::F16 => 0xa_u8,
|
||||
AtomType::F64 => 0xc_u8,
|
||||
AtomType::F32 => 0xd_u8,
|
||||
};
|
||||
e.set_field(9..10, typ & 0x1);
|
||||
e.set_field(59..62, typ >> 1);
|
||||
|
||||
e.set_reg_src(20..26, self.addr);
|
||||
e.set_reg_src(14..20, self.data);
|
||||
|
||||
if self.dst.is_none() {
|
||||
e.set_field(26..58, self.addr_offset);
|
||||
} else {
|
||||
e.set_dst(43..49, self.dst);
|
||||
e.set_field(26..43, self.addr_offset & 0x1ffff);
|
||||
e.set_field(55..58, self.addr_offset >> 17);
|
||||
}
|
||||
|
||||
if let AtomOp::CmpExch(cmp_src) = self.atom_op {
|
||||
// The hardware expects the first source to be packed and then the
|
||||
// second source to be the top half of the first.
|
||||
assert!(cmp_src == AtomCmpSrc::Packed);
|
||||
let cmpr_data = self.data.src_ref.as_reg().unwrap();
|
||||
assert!(cmpr_data.comps() % 2 == 0);
|
||||
let data_comps = cmpr_data.comps() / 2;
|
||||
let data_idx = cmpr_data.base_idx() + u32::from(data_comps);
|
||||
let data = RegRef::new(cmpr_data.file(), data_idx, data_comps);
|
||||
|
||||
e.set_reg_src(49..55, data.into());
|
||||
} else if !self.dst.is_none() {
|
||||
e.set_reg_src(49..55, 0.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SM20Op for OpALd {
|
||||
fn legalize(&mut self, b: &mut LegalizeBuilder) {
|
||||
legalize_ext_instr(self, b);
|
||||
|
|
@ -1940,6 +2053,69 @@ impl SM20Op for OpIpa {
|
|||
}
|
||||
}
|
||||
|
||||
impl SM20Op for OpCCtl {
|
||||
fn legalize(&mut self, b: &mut LegalizeBuilder) {
|
||||
legalize_ext_instr(self, b);
|
||||
}
|
||||
|
||||
fn encode(&self, e: &mut SM20Encoder<'_>) {
|
||||
let op = match self.mem_space {
|
||||
MemSpace::Global(MemAddrType::A32) => 0x26,
|
||||
MemSpace::Global(MemAddrType::A64) => 0x27,
|
||||
MemSpace::Local => panic!("cctl does not support local"),
|
||||
MemSpace::Shared => 0x34,
|
||||
};
|
||||
e.set_opcode(SM20Unit::Mem, op);
|
||||
|
||||
e.set_field(
|
||||
5..10,
|
||||
match self.op {
|
||||
CCtlOp::Qry1 => 0_u8,
|
||||
CCtlOp::PF1 => 1_u8,
|
||||
CCtlOp::PF1_5 => 2_u8,
|
||||
CCtlOp::PF2 => 3_u8,
|
||||
CCtlOp::WB => 4_u8,
|
||||
CCtlOp::IV => 5_u8,
|
||||
CCtlOp::IVAll => 6_u8,
|
||||
CCtlOp::RS => 7_u8,
|
||||
CCtlOp::WBAll => 8_u8,
|
||||
CCtlOp::RSLB => 9_u8,
|
||||
CCtlOp::IVAllP | CCtlOp::WBAllP => {
|
||||
panic!("cctl{} is not supported on SM20", self.op);
|
||||
}
|
||||
},
|
||||
);
|
||||
e.set_dst(14..20, Dst::None);
|
||||
e.set_reg_src(20..26, self.addr);
|
||||
e.set_field(26..28, 0); // 1: .u, 2: .c: 3: .i
|
||||
|
||||
assert!(self.addr_offset % 4 == 0);
|
||||
if matches!(self.mem_space, MemSpace::Global(_)) {
|
||||
e.set_field(28..58, self.addr_offset / 4);
|
||||
} else {
|
||||
e.set_field(28..50, self.addr_offset / 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SM20Op for OpMemBar {
|
||||
fn legalize(&mut self, _b: &mut LegalizeBuilder) {
|
||||
// Nothing to do
|
||||
}
|
||||
|
||||
fn encode(&self, e: &mut SM20Encoder<'_>) {
|
||||
e.set_opcode(SM20Unit::Mem, 0x38);
|
||||
e.set_field(
|
||||
5..7,
|
||||
match self.scope {
|
||||
MemScope::CTA => 0_u8,
|
||||
MemScope::GPU => 1_u8,
|
||||
MemScope::System => 2_u8,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
impl SM20Encoder<'_> {
|
||||
fn set_rel_offset(&mut self, range: Range<usize>, label: &Label) {
|
||||
let ip = u32::try_from(self.ip).unwrap();
|
||||
|
|
@ -2199,9 +2375,12 @@ macro_rules! as_sm20_op_match {
|
|||
Op::Ld(op) => op,
|
||||
Op::Ldc(op) => op,
|
||||
Op::St(op) => op,
|
||||
Op::Atom(op) => op,
|
||||
Op::ALd(op) => op,
|
||||
Op::ASt(op) => op,
|
||||
Op::Ipa(op) => op,
|
||||
Op::CCtl(op) => op,
|
||||
Op::MemBar(op) => op,
|
||||
Op::Bra(op) => op,
|
||||
Op::SSy(op) => op,
|
||||
Op::Sync(op) => op,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue