diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index bcda1dd1431..68c2acf1b93 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -1874,7 +1874,11 @@ impl<'a> ShaderFromNir<'a> { } } - fn get_atomic_op(&self, intrin: &nir_intrinsic_instr) -> AtomOp { + fn get_atomic_op( + &self, + intrin: &nir_intrinsic_instr, + cmp_src: AtomCmpSrc, + ) -> AtomOp { match intrin.atomic_op() { nir_atomic_op_iadd => AtomOp::Add, nir_atomic_op_imin => AtomOp::Min, @@ -1888,7 +1892,7 @@ impl<'a> ShaderFromNir<'a> { nir_atomic_op_fadd => AtomOp::Add, nir_atomic_op_fmin => AtomOp::Min, nir_atomic_op_fmax => AtomOp::Max, - nir_atomic_op_cmpxchg => AtomOp::CmpExch, + nir_atomic_op_cmpxchg => AtomOp::CmpExch(cmp_src), _ => panic!("Unsupported NIR atomic op"), } } @@ -2137,7 +2141,7 @@ impl<'a> ShaderFromNir<'a> { let coord = self.get_image_coord(intrin, dim); // let sample = self.get_src(&srcs[2]); let atom_type = self.get_atomic_type(intrin); - let atom_op = self.get_atomic_op(intrin); + let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Packed); assert!( intrin.def.bit_size() == 32 || intrin.def.bit_size() == 64 @@ -2331,7 +2335,7 @@ impl<'a> ShaderFromNir<'a> { let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24); let data = self.get_src(&srcs[1]); let atom_type = self.get_atomic_type(intrin); - let atom_op = self.get_atomic_op(intrin); + let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate); assert!(intrin.def.num_components() == 1); let dst = b.alloc_ssa(RegFile::GPR, bit_size.div_ceil(32)); @@ -2366,7 +2370,7 @@ impl<'a> ShaderFromNir<'a> { addr: addr, cmpr: cmpr, data: data, - atom_op: AtomOp::CmpExch, + atom_op: AtomOp::CmpExch(AtomCmpSrc::Separate), atom_type: atom_type, addr_offset: offset, mem_space: MemSpace::Global(MemAddrType::A64), @@ -2837,7 +2841,7 @@ impl<'a> ShaderFromNir<'a> { let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24); let data = self.get_src(&srcs[1]); let atom_type = self.get_atomic_type(intrin); - let atom_op = self.get_atomic_op(intrin); + let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate); assert!(intrin.def.num_components() == 1); let dst = b.alloc_ssa(RegFile::GPR, bit_size.div_ceil(32)); @@ -2872,7 +2876,7 @@ impl<'a> ShaderFromNir<'a> { addr: addr, cmpr: cmpr, data: data, - atom_op: AtomOp::CmpExch, + atom_op: AtomOp::CmpExch(AtomCmpSrc::Separate), atom_type: atom_type, addr_offset: offset, mem_space: MemSpace::Shared, diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index 70f6076101c..54547dc382b 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -2239,6 +2239,14 @@ impl fmt::Display for AtomType { } } +#[derive(Clone, Copy, Eq, Hash, PartialEq)] +pub enum AtomCmpSrc { + /// The cmpr value is passed as a separate source + Separate, + /// The cmpr value is packed in with the data with cmpr coming first + Packed, +} + #[allow(dead_code)] #[derive(Clone, Copy, Eq, Hash, PartialEq)] pub enum AtomOp { @@ -2251,7 +2259,7 @@ pub enum AtomOp { Or, Xor, Exch, - CmpExch, + CmpExch(AtomCmpSrc), } impl fmt::Display for AtomOp { @@ -2266,7 +2274,8 @@ impl fmt::Display for AtomOp { AtomOp::Or => write!(f, ".or"), AtomOp::Xor => write!(f, ".xor"), AtomOp::Exch => write!(f, ".exch"), - AtomOp::CmpExch => write!(f, ".cmpexch"), + AtomOp::CmpExch(AtomCmpSrc::Separate) => write!(f, ".cmpexch"), + AtomOp::CmpExch(AtomCmpSrc::Packed) => write!(f, ".cmpexch.packed"), } } } @@ -4369,7 +4378,11 @@ impl DisplayOp for OpAtom { } write!(f, "{:#x}", self.addr_offset)?; } - write!(f, "] {}", self.data) + write!(f, "]")?; + if self.atom_op == AtomOp::CmpExch(AtomCmpSrc::Separate) { + write!(f, " {}", self.cmpr)?; + } + write!(f, " {}", self.data) } } impl_display_for_op!(OpAtom); diff --git a/src/nouveau/compiler/nak/sm50.rs b/src/nouveau/compiler/nak/sm50.rs index e63285c7a42..35321b62382 100644 --- a/src/nouveau/compiler/nak/sm50.rs +++ b/src/nouveau/compiler/nak/sm50.rs @@ -2213,7 +2213,7 @@ impl SM50Encoder<'_> { AtomOp::Or => 6_u8, AtomOp::Xor => 7_u8, AtomOp::Exch => 8_u8, - AtomOp::CmpExch => panic!("CmpXchg not yet supported"), + AtomOp::CmpExch(_) => panic!("CmpXchg not yet supported"), }, ); } @@ -2228,8 +2228,9 @@ impl SM50Op for OpSuAtom { } fn encode(&self, e: &mut SM50Encoder<'_>) { - if matches!(self.atom_op, AtomOp::CmpExch) { + if let AtomOp::CmpExch(cmp_src) = self.atom_op { e.set_opcode(0xeac0); + assert!(cmp_src == AtomCmpSrc::Packed); } else { e.set_opcode(0xea60); } @@ -2253,7 +2254,7 @@ impl SM50Op for OpSuAtom { AtomOp::Or => 6, AtomOp::Xor => 7, AtomOp::Exch => 8, - AtomOp::CmpExch => 0, + AtomOp::CmpExch(_) => 0, }; e.set_image_dim(33..36, self.image_dim); diff --git a/src/nouveau/compiler/nak/sm70.rs b/src/nouveau/compiler/nak/sm70.rs index 11d00d68048..46fd74ef467 100644 --- a/src/nouveau/compiler/nak/sm70.rs +++ b/src/nouveau/compiler/nak/sm70.rs @@ -2585,8 +2585,9 @@ impl SM70Op for OpSuAtom { } fn encode(&self, e: &mut SM70Encoder<'_>) { - if matches!(self.atom_op, AtomOp::CmpExch) { + if let AtomOp::CmpExch(cmp_src) = self.atom_op { e.set_opcode(0x396); + assert!(cmp_src == AtomCmpSrc::Packed); } else { e.set_opcode(0x394); }; @@ -2759,7 +2760,7 @@ impl SM70Encoder<'_> { self.set_field( range, match atom_op { - AtomOp::Add | AtomOp::CmpExch => 0_u8, + AtomOp::Add | AtomOp::CmpExch(_) => 0_u8, AtomOp::Min => 1_u8, AtomOp::Max => 2_u8, AtomOp::Inc => 3_u8, @@ -2797,9 +2798,10 @@ impl SM70Op for OpAtom { fn encode(&self, e: &mut SM70Encoder<'_>) { match self.mem_space { MemSpace::Global(_) => { - if self.atom_op == AtomOp::CmpExch { + if let AtomOp::CmpExch(cmp_src) = self.atom_op { e.set_opcode(0x3a9); + assert!(cmp_src == AtomCmpSrc::Separate); e.set_reg_src(32..40, self.cmpr); e.set_reg_src(64..72, self.data); } else { @@ -2824,9 +2826,10 @@ impl SM70Op for OpAtom { } MemSpace::Local => panic!("Atomics do not support local"), MemSpace::Shared => { - if self.atom_op == AtomOp::CmpExch { + if let AtomOp::CmpExch(cmp_src) = self.atom_op { e.set_opcode(0x38d); + assert!(cmp_src == AtomCmpSrc::Separate); e.set_reg_src(32..40, self.cmpr); e.set_reg_src(64..72, self.data); } else {