diff --git a/src/nouveau/compiler/nak/nvdisasm_tests.rs b/src/nouveau/compiler/nak/nvdisasm_tests.rs index b9ef8f9252c..cdd6839fdb8 100644 --- a/src/nouveau/compiler/nak/nvdisasm_tests.rs +++ b/src/nouveau/compiler/nak/nvdisasm_tests.rs @@ -306,6 +306,18 @@ pub fn test_ld_st_atom() { (AtomType::F64, ".f64.rn"), ]; + let atom_ops = [ + AtomOp::Add, + AtomOp::Min, + AtomOp::Max, + AtomOp::Inc, + AtomOp::Dec, + AtomOp::And, + AtomOp::Or, + AtomOp::Xor, + AtomOp::Exch, + ]; + let spaces = [ MemSpace::Global(MemAddrType::A64), MemSpace::Shared, @@ -416,55 +428,71 @@ pub fn test_ld_st_atom() { c.push(instr, expected); for (atom_type, atom_type_str) in atom_types { - for use_dst in [true, false] { - let instr = OpAtom { - dst: if use_dst { - Dst::Reg(r0) - } else { - Dst::None - }, - addr: SrcRef::Reg(r4_64).into(), - uniform_address: urz.clone(), - data: SrcRef::Reg(r2).into(), - atom_op: AtomOp::Add, - cmpr: SrcRef::Reg(r3).into(), - atom_type, + let active_atom_ops = if atom_type.is_float() { + &atom_ops[0..3] + } else { + &atom_ops[..] + }; - addr_offset, - addr_stride: addr_stride, + for atom_op in active_atom_ops { + for use_dst in [true, false] { + if !use_dst && *atom_op == AtomOp::Exch { + continue; + } - mem_space: space, - mem_order: order, - mem_eviction_priority: pri, - }; - - let expected = match space { - MemSpace::Global(_) => { - let op = if use_dst { - "atomg" - } else if sm >= 90 { - "redg" + let instr = OpAtom { + dst: if use_dst { + Dst::Reg(r0) } else { - "red" - }; - let dst = - if use_dst { "pt, r0, " } else { "" }; - format!("{op}.e.add.ef{atom_type_str}.strong.{cta} {dst}[{r4_64_str}{uniform_addr}+{addr_offset_str}], r2;") - } - MemSpace::Shared => { - if atom_type.is_float() { - continue; - } - if atom_type.bits() == 64 { - continue; - } - let dst = if use_dst { "r0" } else { "rz" }; - format!("atoms.add{atom_type_str} {dst}, [{r4_64_str}{addr_stride}{uniform_addr}+{addr_offset_str}], r2;") - } - MemSpace::Local => continue, - }; + Dst::None + }, + addr: SrcRef::Reg(r4_64).into(), + uniform_address: urz.clone(), + data: SrcRef::Reg(r2).into(), + atom_op: *atom_op, + cmpr: SrcRef::Reg(r3).into(), + atom_type, - c.push(instr, expected); + addr_offset, + addr_stride: addr_stride, + + mem_space: space, + mem_order: order, + mem_eviction_priority: pri, + }; + + let expected = match space { + MemSpace::Global(_) => { + let op = if use_dst { + "atomg" + } else if sm >= 90 { + "redg" + } else { + "red" + }; + let dst = if use_dst { + "pt, r0, " + } else { + "" + }; + format!("{op}.e{atom_op}.ef{atom_type_str}.strong.{cta} {dst}[{r4_64_str}{uniform_addr}+{addr_offset_str}], r2;") + } + MemSpace::Shared => { + if atom_type.is_float() { + continue; + } + if atom_type.bits() == 64 { + continue; + } + let dst = + if use_dst { "r0" } else { "rz" }; + format!("atoms{atom_op}{atom_type_str} {dst}, [{r4_64_str}{addr_stride}{uniform_addr}+{addr_offset_str}], r2;") + } + MemSpace::Local => continue, + }; + + c.push(instr, expected); + } } } } diff --git a/src/nouveau/compiler/nak/sm70_encode.rs b/src/nouveau/compiler/nak/sm70_encode.rs index 438392f6e6d..88f050e4187 100644 --- a/src/nouveau/compiler/nak/sm70_encode.rs +++ b/src/nouveau/compiler/nak/sm70_encode.rs @@ -3480,6 +3480,19 @@ impl SM70Encoder<'_> { ); } + fn set_atom_op_sm90_float(&mut self, range: Range, atom_op: AtomOp) { + assert!(self.sm >= 90); + self.set_field( + range, + match atom_op { + AtomOp::Add => 0_u8, + AtomOp::Min => 2_u8, + AtomOp::Max => 4_u8, + _ => panic!("Unsupported float atomic"), + }, + ); + } + fn set_atom_type(&mut self, atom_type: AtomType, su: bool) { if self.sm >= 90 && !su { // Float/int is differentiated by opcode @@ -3548,13 +3561,14 @@ impl SM70Op for OpAtom { if self.dst.is_none() { if e.sm >= 90 && self.atom_type.is_float() { e.set_opcode(0x9a6); + e.set_atom_op_sm90_float(87..90, self.atom_op); } else { e.set_opcode(0x98e); + e.set_atom_op(87..90, self.atom_op); } e.set_reg_src(32..40, &self.data); e.set_field(40..64, self.addr_offset); - e.set_atom_op(87..90, self.atom_op); if has_ugpr { e.set_reg_addr(24..32, &self.addr, 90); e.set_ureg_addr(64, &self.uniform_address, 72); @@ -3576,10 +3590,14 @@ impl SM70Op for OpAtom { } else { if e.sm >= 90 && self.atom_type.is_float() { e.set_opcode(0x9a3); - } else if has_ugpr { - e.set_opcode(0x9a8); + e.set_atom_op_sm90_float(87..91, self.atom_op); } else { - e.set_opcode(0x3a8); + if has_ugpr { + e.set_opcode(0x9a8); + } else { + e.set_opcode(0x3a8); + } + e.set_atom_op(87..91, self.atom_op); } if e.sm >= 100 { @@ -3601,7 +3619,6 @@ impl SM70Op for OpAtom { e.set_reg_src(32..40, &self.data); e.set_pred_dst(81..84, &Dst::None); - e.set_atom_op(87..91, self.atom_op); e.set_bit(91, has_ugpr); }