From d1c56b12b9cc574f4ae61aed67dd621198dbf629 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Wed, 26 Apr 2023 17:40:29 -0500 Subject: [PATCH] nak: Implement nir_intrinsic_global_atomic_* Part-of: --- src/nouveau/compiler/nak_encode_sm75.rs | 70 +++++++++++++ src/nouveau/compiler/nak_from_nir.rs | 58 +++++++++++ src/nouveau/compiler/nak_ir.rs | 131 ++++++++++++++++++++++++ 3 files changed, 259 insertions(+) diff --git a/src/nouveau/compiler/nak_encode_sm75.rs b/src/nouveau/compiler/nak_encode_sm75.rs index 25eb73a44c6..b8f94ab1f6e 100644 --- a/src/nouveau/compiler/nak_encode_sm75.rs +++ b/src/nouveau/compiler/nak_encode_sm75.rs @@ -1153,6 +1153,75 @@ impl SM75Instr { } } + fn set_atom_op(&mut self, range: Range, atom_op: AtomOp) { + assert!(range.len() == 4); + self.set_field( + range, + match atom_op { + AtomOp::Add => 0_u8, + AtomOp::Min => 1_u8, + AtomOp::Max => 2_u8, + AtomOp::Inc => 3_u8, + AtomOp::Dec => 4_u8, + AtomOp::And => 5_u8, + AtomOp::Or => 6_u8, + AtomOp::Xor => 7_u8, + AtomOp::Exch => 8_u8, + }, + ); + } + + fn set_atom_type(&mut self, range: Range, atom_type: AtomType) { + assert!(range.len() == 3); + self.set_field( + range, + match atom_type { + AtomType::U32 => 0_u8, + AtomType::I32 => 1_u8, + AtomType::U64 => 2_u8, + AtomType::F32 => 3_u8, + AtomType::F16x2 => 4_u8, + AtomType::I64 => 5_u8, + AtomType::F64 => 6_u8, + }, + ); + } + + fn encode_atomg(&mut self, op: &OpAtom) { + self.set_opcode(0x38a); + + self.set_dst(op.dst); + self.set_pred_dst(81..84, Dst::None); + + self.set_reg_src(24..32, op.addr); + self.set_reg_src(32..40, op.data); + self.set_field(40..64, op.addr_offset); + + self.set_field( + 72..73, + match op.addr_type { + MemAddrType::A32 => 0_u8, + MemAddrType::A64 => 1_u8, + }, + ); + + self.set_atom_type(73..76, op.atom_type); + self.set_mem_order_scope(&op.mem_order, &op.mem_scope); + self.set_atom_op(87..91, op.atom_op); + } + + fn encode_atoms(&mut self, op: &OpAtom) { + panic!("Shared atomic ops not yet implemented"); + } + + fn encode_atom(&mut self, op: &OpAtom) { + match op.mem_space { + MemSpace::Global => self.encode_atomg(op), + MemSpace::Local => panic!("Atomics do not support local"), + MemSpace::Shared => self.encode_atoms(op), + } + } + fn encode_ald(&mut self, op: &OpALd) { self.set_opcode(0x321); @@ -1335,6 +1404,7 @@ impl SM75Instr { Op::SuSt(op) => si.encode_sust(&op), Op::Ld(op) => si.encode_ld(&op), Op::St(op) => si.encode_st(&op), + Op::Atom(op) => si.encode_atom(&op), Op::ALd(op) => si.encode_ald(&op), Op::ASt(op) => si.encode_ast(&op), Op::Ipa(op) => si.encode_ipa(&op), diff --git a/src/nouveau/compiler/nak_from_nir.rs b/src/nouveau/compiler/nak_from_nir.rs index 4029176113e..3162e557337 100644 --- a/src/nouveau/compiler/nak_from_nir.rs +++ b/src/nouveau/compiler/nak_from_nir.rs @@ -778,6 +778,43 @@ impl<'a> ShaderFromNir<'a> { } } + fn get_atomic_type(&self, intrin: &nir_intrinsic_instr) -> AtomType { + let bit_size = intrin.def.bit_size(); + match intrin.atomic_op() { + nir_atomic_op_iadd => AtomType::U(bit_size), + nir_atomic_op_imin => AtomType::I(bit_size), + nir_atomic_op_umin => AtomType::U(bit_size), + nir_atomic_op_imax => AtomType::I(bit_size), + nir_atomic_op_umax => AtomType::U(bit_size), + nir_atomic_op_iand => AtomType::U(bit_size), + nir_atomic_op_ior => AtomType::U(bit_size), + nir_atomic_op_ixor => AtomType::U(bit_size), + nir_atomic_op_xchg => AtomType::U(bit_size), + nir_atomic_op_fadd => AtomType::F(bit_size), + nir_atomic_op_fmin => AtomType::F(bit_size), + nir_atomic_op_fmax => AtomType::F(bit_size), + _ => panic!("Unsupported NIR atomic op"), + } + } + + fn get_atomic_op(&self, intrin: &nir_intrinsic_instr) -> AtomOp { + match intrin.atomic_op() { + nir_atomic_op_iadd => AtomOp::Add, + nir_atomic_op_imin => AtomOp::Min, + nir_atomic_op_umin => AtomOp::Min, + nir_atomic_op_imax => AtomOp::Max, + nir_atomic_op_umax => AtomOp::Max, + nir_atomic_op_iand => AtomOp::And, + nir_atomic_op_ior => AtomOp::Or, + nir_atomic_op_ixor => AtomOp::Xor, + nir_atomic_op_xchg => AtomOp::Exch, + nir_atomic_op_fadd => AtomOp::Add, + nir_atomic_op_fmin => AtomOp::Min, + nir_atomic_op_fmax => AtomOp::Max, + _ => panic!("Unsupported NIR atomic op"), + } + } + fn get_image_dim(&mut self, intrin: &nir_intrinsic_instr) -> ImageDim { let is_array = intrin.image_array(); let image_dim = intrin.image_dim(); @@ -856,6 +893,27 @@ impl<'a> ShaderFromNir<'a> { data: data, })); } + nir_intrinsic_global_atomic => { + let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24); + let data = self.get_src(&srcs[1]); + let atom_type = self.get_atomic_type(intrin); + let atom_op = self.get_atomic_op(intrin); + let dst = self.get_dst(&intrin.def); + + let atom = OpAtom { + dst: dst, + addr: addr, + data: data, + atom_op: atom_op, + atom_type: atom_type, + addr_type: MemAddrType::A64, + addr_offset: offset, + mem_space: MemSpace::Global, + mem_order: MemOrder::Strong, + mem_scope: MemScope::System, + }; + self.instrs.push(atom.into()); + } nir_intrinsic_load_barycentric_centroid => (), nir_intrinsic_load_barycentric_pixel => (), nir_intrinsic_load_barycentric_sample => (), diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs index fcb66763419..2769d20fc7b 100644 --- a/src/nouveau/compiler/nak_ir.rs +++ b/src/nouveau/compiler/nak_ir.rs @@ -1416,6 +1416,87 @@ impl fmt::Display for MemAccess { } } +#[derive(Clone, Copy, Eq, Hash, PartialEq)] +pub enum AtomType { + F16x2, + U32, + I32, + F32, + U64, + I64, + F64, +} + +impl AtomType { + pub fn F(bits: u8) -> AtomType { + match bits { + 16 => panic!("16-bit float atomics not yet supported"), + 32 => AtomType::F32, + 64 => AtomType::F64, + _ => panic!("Invalid float atomic type"), + } + } + + pub fn U(bits: u8) -> AtomType { + match bits { + 32 => AtomType::U32, + 64 => AtomType::U64, + _ => panic!("Invalid uint atomic type"), + } + } + + pub fn I(bits: u8) -> AtomType { + match bits { + 32 => AtomType::I32, + 64 => AtomType::I64, + _ => panic!("Invalid int atomic type"), + } + } +} + +impl fmt::Display for AtomType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + AtomType::F16x2 => write!(f, "F16x2"), + AtomType::U32 => write!(f, "U32"), + AtomType::I32 => write!(f, "I32"), + AtomType::F32 => write!(f, "F32"), + AtomType::U64 => write!(f, "U64"), + AtomType::I64 => write!(f, "I64"), + AtomType::F64 => write!(f, "F64"), + } + } +} + +#[derive(Clone, Copy, Eq, Hash, PartialEq)] +pub enum AtomOp { + Add, + Min, + Max, + Inc, + Dec, + And, + Or, + Xor, + Exch, +} + +impl fmt::Display for AtomOp { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + AtomOp::Add => write!(f, "ADD"), + AtomOp::Min => write!(f, "MIN"), + AtomOp::Max => write!(f, "MAX"), + AtomOp::Inc => write!(f, "INC"), + AtomOp::Dec => write!(f, "DEC"), + AtomOp::And => write!(f, "AND"), + AtomOp::Or => write!(f, "OR"), + AtomOp::Xor => write!(f, "XOR"), + AtomOp::Exch => write!(f, "EXCH"), + } + } +} + #[derive(Clone, Copy, Eq, PartialEq)] pub enum InterpFreq { Pass, @@ -2272,6 +2353,53 @@ impl fmt::Display for OpSt { } } +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpAtom { + pub dst: Dst, + + #[src_type(GPR)] + pub addr: Src, + + #[src_type(SSA)] + pub data: Src, + + pub atom_op: AtomOp, + pub atom_type: AtomType, + + pub addr_type: MemAddrType, + pub addr_offset: i32, + + pub mem_space: MemSpace, + pub mem_order: MemOrder, + pub mem_scope: MemScope, +} + +impl fmt::Display for OpAtom { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "ATOM.{}.{}.{}.{} {}", + self.atom_op, + self.atom_type, + self.mem_order, + self.mem_scope, + self.dst + )?; + write!(f, " [")?; + if !self.addr.is_zero() { + write!(f, "{}", self.addr)?; + } + if self.addr_offset > 0 { + if !self.addr.is_zero() { + write!(f, "+")?; + } + write!(f, "{:#x}", self.addr_offset)?; + } + write!(f, "] {}", self.data) + } +} + #[repr(C)] #[derive(SrcsAsSlice, DstsAsSlice)] pub struct OpALd { @@ -2752,6 +2880,7 @@ pub enum Op { SuSt(OpSuSt), Ld(OpLd), St(OpSt), + Atom(OpAtom), ALd(OpALd), ASt(OpASt), Ipa(OpIpa), @@ -3219,6 +3348,7 @@ impl Instr { Op::ASt(_) | Op::SuSt(_) | Op::St(_) + | Op::Atom(_) | Op::MemBar(_) | Op::Bra(_) | Op::Exit(_) @@ -3263,6 +3393,7 @@ impl Instr { Op::SuSt(_) => None, Op::Ld(_) => None, Op::St(_) => None, + Op::Atom(_) => None, Op::MemBar(_) => None, Op::Bar(_) => None, Op::Bra(_) | Op::Exit(_) => Some(15),