From dfb62604831c0f17054ea7913382bfffa0522b04 Mon Sep 17 00:00:00 2001 From: M Henning Date: Fri, 20 Oct 2023 22:13:27 -0400 Subject: [PATCH] nak: Specify MemScope on MemOrder::Strong The scope isn't relevant on MemOrder::Weak. In fact, ptxas refuses any input that specifies a scope on weak loads. So, don't make those combinations representable in our IR. Part-of: --- src/nouveau/compiler/nak_encode_sm75.rs | 48 ++++++++++----------- src/nouveau/compiler/nak_from_nir.rs | 39 ++++++----------- src/nouveau/compiler/nak_ir.rs | 39 +++++------------ src/nouveau/compiler/nak_lower_copy_swap.rs | 6 +-- 4 files changed, 49 insertions(+), 83 deletions(-) diff --git a/src/nouveau/compiler/nak_encode_sm75.rs b/src/nouveau/compiler/nak_encode_sm75.rs index 4e28244302d..551254636f9 100644 --- a/src/nouveau/compiler/nak_encode_sm75.rs +++ b/src/nouveau/compiler/nak_encode_sm75.rs @@ -1137,8 +1137,12 @@ impl SM75Instr { ); } - fn set_mem_order_scope(&mut self, order: &MemOrder, scope: &MemScope) { + fn set_mem_order(&mut self, order: &MemOrder) { if self.sm < 80 { + let scope = match order { + MemOrder::Weak => MemScope::CTA, + MemOrder::Strong(s) => *s, + }; self.set_field( 77..79, match scope { @@ -1153,18 +1157,18 @@ impl SM75Instr { match order { /* Constant => 0_u8, */ MemOrder::Weak => 1_u8, - MemOrder::Strong => 2_u8, + MemOrder::Strong(_) => 2_u8, /* MMIO => 3_u8, */ }, ); } else { self.set_field( 77..81, - match (order, scope) { - (MemOrder::Weak, _) => 0x0_u8, - (MemOrder::Strong, MemScope::CTA) => 0x5_u8, - (MemOrder::Strong, MemScope::GPU) => 0x7_u8, - (MemOrder::Strong, MemScope::System) => 0xa_u8, + match order { + MemOrder::Weak => 0x0_u8, + MemOrder::Strong(MemScope::CTA) => 0x5_u8, + MemOrder::Strong(MemScope::GPU) => 0x7_u8, + MemOrder::Strong(MemScope::System) => 0xa_u8, }, ); } @@ -1179,7 +1183,7 @@ impl SM75Instr { self.set_pred_dst(81..84, op.resident); self.set_image_dim(61..64, op.image_dim); - self.set_mem_order_scope(&op.mem_order, &op.mem_scope); + self.set_mem_order(&op.mem_order); assert!(op.mask == 0x1 || op.mask == 0x3 || op.mask == 0xf); self.set_field(72..76, op.mask); @@ -1193,7 +1197,7 @@ impl SM75Instr { self.set_reg_src(64..72, op.handle); self.set_image_dim(61..64, op.image_dim); - self.set_mem_order_scope(&op.mem_order, &op.mem_scope); + self.set_mem_order(&op.mem_order); assert!(op.mask == 0x1 || op.mask == 0x3 || op.mask == 0xf); self.set_field(72..76, op.mask); @@ -1213,7 +1217,7 @@ impl SM75Instr { self.set_pred_dst(81..84, op.resident); self.set_image_dim(61..64, op.image_dim); - self.set_mem_order_scope(&op.mem_order, &op.mem_scope); + self.set_mem_order(&op.mem_order); self.set_bit(72, false); /* .BA */ self.set_atom_type(73..76, op.atom_type); @@ -1245,7 +1249,7 @@ impl SM75Instr { }, ); self.set_mem_type(73..76, access.mem_type); - self.set_mem_order_scope(&access.order, &access.scope); + self.set_mem_order(&access.order); } fn encode_ldg(&mut self, op: &OpLd) { @@ -1268,8 +1272,7 @@ impl SM75Instr { assert!(op.access.addr_type == MemAddrType::A32); self.set_mem_type(73..76, op.access.mem_type); - assert!(op.access.order == MemOrder::Strong); - assert!(op.access.scope == MemScope::CTA); + assert!(op.access.order == MemOrder::Strong(MemScope::CTA)); } fn encode_lds(&mut self, op: &OpLd) { @@ -1281,8 +1284,7 @@ impl SM75Instr { assert!(op.access.addr_type == MemAddrType::A32); self.set_mem_type(73..76, op.access.mem_type); - assert!(op.access.order == MemOrder::Strong); - assert!(op.access.scope == MemScope::CTA); + assert!(op.access.order == MemOrder::Strong(MemScope::CTA)); self.set_bit(87, false); /* !.ZD - Returns a predicate? */ } @@ -1328,8 +1330,7 @@ impl SM75Instr { assert!(op.access.addr_type == MemAddrType::A32); self.set_mem_type(73..76, op.access.mem_type); - assert!(op.access.order == MemOrder::Strong); - assert!(op.access.scope == MemScope::CTA); + assert!(op.access.order == MemOrder::Strong(MemScope::CTA)); } fn encode_sts(&mut self, op: &OpSt) { @@ -1341,8 +1342,7 @@ impl SM75Instr { assert!(op.access.addr_type == MemAddrType::A32); self.set_mem_type(73..76, op.access.mem_type); - assert!(op.access.order == MemOrder::Strong); - assert!(op.access.scope == MemScope::CTA); + assert!(op.access.order == MemOrder::Strong(MemScope::CTA)); } fn encode_st(&mut self, op: &OpSt) { @@ -1406,7 +1406,7 @@ impl SM75Instr { ); self.set_atom_type(73..76, op.atom_type); - self.set_mem_order_scope(&op.mem_order, &op.mem_scope); + self.set_mem_order(&op.mem_order); self.set_atom_op(87..91, op.atom_op); } @@ -1430,7 +1430,7 @@ impl SM75Instr { ); self.set_atom_type(73..76, op.atom_type); - self.set_mem_order_scope(&op.mem_order, &op.mem_scope); + self.set_mem_order(&op.mem_order); } fn encode_atoms(&mut self, op: &OpAtom) { @@ -1443,8 +1443,7 @@ impl SM75Instr { self.set_field(40..64, op.addr_offset); assert!(op.addr_type == MemAddrType::A32); - assert!(op.mem_order == MemOrder::Strong); - assert!(op.mem_scope == MemScope::CTA); + assert!(op.mem_order == MemOrder::Strong(MemScope::CTA)); self.set_atom_type(73..76, op.atom_type); self.set_atom_op(87..91, op.atom_op); @@ -1461,8 +1460,7 @@ impl SM75Instr { self.set_reg_src(64..72, op.data); assert!(op.addr_type == MemAddrType::A32); - assert!(op.mem_order == MemOrder::Strong); - assert!(op.mem_scope == MemScope::CTA); + assert!(op.mem_order == MemOrder::Strong(MemScope::CTA)); self.set_atom_type(73..76, op.atom_type); } diff --git a/src/nouveau/compiler/nak_from_nir.rs b/src/nouveau/compiler/nak_from_nir.rs index 51257d417ad..d87542cc891 100644 --- a/src/nouveau/compiler/nak_from_nir.rs +++ b/src/nouveau/compiler/nak_from_nir.rs @@ -1432,8 +1432,7 @@ impl<'a> ShaderFromNir<'a> { atom_op: atom_op, atom_type: atom_type, image_dim: dim, - mem_order: MemOrder::Strong, - mem_scope: MemScope::System, + mem_order: MemOrder::Strong(MemScope::System), }); self.set_dst(&intrin.def, dst); } @@ -1453,8 +1452,7 @@ impl<'a> ShaderFromNir<'a> { dst: dst.into(), resident: Dst::None, image_dim: dim, - mem_order: MemOrder::Strong, - mem_scope: MemScope::System, + mem_order: MemOrder::Strong(MemScope::System), mask: (1 << comps) - 1, handle: handle, coord: coord, @@ -1474,8 +1472,7 @@ impl<'a> ShaderFromNir<'a> { b.push_op(OpSuSt { image_dim: dim, - mem_order: MemOrder::Strong, - mem_scope: MemScope::System, + mem_order: MemOrder::Strong(MemScope::System), mask: (1 << comps) - 1, handle: handle, coord: coord, @@ -1530,8 +1527,7 @@ impl<'a> ShaderFromNir<'a> { addr_type: MemAddrType::A64, addr_offset: offset, mem_space: MemSpace::Global, - mem_order: MemOrder::Strong, - mem_scope: MemScope::System, + mem_order: MemOrder::Strong(MemScope::System), }); self.set_dst(&intrin.def, dst); } @@ -1555,8 +1551,7 @@ impl<'a> ShaderFromNir<'a> { addr_type: MemAddrType::A64, addr_offset: offset, mem_space: MemSpace::Global, - mem_order: MemOrder::Strong, - mem_scope: MemScope::System, + mem_order: MemOrder::Strong(MemScope::System), }); self.set_dst(&intrin.def, dst); } @@ -1580,8 +1575,7 @@ impl<'a> ShaderFromNir<'a> { addr_type: MemAddrType::A64, mem_type: MemType::from_size(size_B, false), space: MemSpace::Global, - order: MemOrder::Strong, - scope: MemScope::System, + order: MemOrder::Strong(MemScope::System), }; let (addr, offset) = self.get_io_addr_offset(&srcs[0], 32); let dst = b.alloc_ssa(RegFile::GPR, size_B.div_ceil(4)); @@ -1761,8 +1755,7 @@ impl<'a> ShaderFromNir<'a> { addr_type: MemAddrType::A32, mem_type: MemType::from_size(size_B, false), space: MemSpace::Local, - order: MemOrder::Strong, - scope: MemScope::CTA, + order: MemOrder::Strong(MemScope::CTA), }; let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24); let dst = b.alloc_ssa(RegFile::GPR, size_B.div_ceil(4)); @@ -1783,8 +1776,7 @@ impl<'a> ShaderFromNir<'a> { addr_type: MemAddrType::A32, mem_type: MemType::from_size(size_B, false), space: MemSpace::Shared, - order: MemOrder::Strong, - scope: MemScope::CTA, + order: MemOrder::Strong(MemScope::CTA), }; let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24); let offset = offset + intrin.base(); @@ -1941,8 +1933,7 @@ impl<'a> ShaderFromNir<'a> { addr_type: MemAddrType::A32, addr_offset: offset, mem_space: MemSpace::Shared, - mem_order: MemOrder::Strong, - mem_scope: MemScope::CTA, + mem_order: MemOrder::Strong(MemScope::CTA), }); self.set_dst(&intrin.def, dst); } @@ -1966,8 +1957,7 @@ impl<'a> ShaderFromNir<'a> { addr_type: MemAddrType::A32, addr_offset: offset, mem_space: MemSpace::Shared, - mem_order: MemOrder::Strong, - mem_scope: MemScope::CTA, + mem_order: MemOrder::Strong(MemScope::CTA), }); self.set_dst(&intrin.def, dst); } @@ -1980,8 +1970,7 @@ impl<'a> ShaderFromNir<'a> { addr_type: MemAddrType::A64, mem_type: MemType::from_size(size_B, false), space: MemSpace::Global, - order: MemOrder::Strong, - scope: MemScope::System, + order: MemOrder::Strong(MemScope::System), }; let (addr, offset) = self.get_io_addr_offset(&srcs[1], 32); @@ -2017,8 +2006,7 @@ impl<'a> ShaderFromNir<'a> { addr_type: MemAddrType::A32, mem_type: MemType::from_size(size_B, false), space: MemSpace::Local, - order: MemOrder::Strong, - scope: MemScope::CTA, + order: MemOrder::Strong(MemScope::CTA), }; let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24); @@ -2038,8 +2026,7 @@ impl<'a> ShaderFromNir<'a> { addr_type: MemAddrType::A32, mem_type: MemType::from_size(size_B, false), space: MemSpace::Shared, - order: MemOrder::Strong, - scope: MemScope::CTA, + order: MemOrder::Strong(MemScope::CTA), }; let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24); let offset = offset + intrin.base(); diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs index 31a1d005ee2..c8beb84507a 100644 --- a/src/nouveau/compiler/nak_ir.rs +++ b/src/nouveau/compiler/nak_ir.rs @@ -1749,14 +1749,14 @@ impl fmt::Display for MemType { #[derive(Clone, Copy, Eq, Hash, PartialEq)] pub enum MemOrder { Weak, - Strong, + Strong(MemScope), } impl fmt::Display for MemOrder { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { MemOrder::Weak => write!(f, "WEAK"), - MemOrder::Strong => write!(f, "STRONG"), + MemOrder::Strong(scope) => write!(f, "STRONG.{}", scope), } } } @@ -1801,7 +1801,6 @@ pub struct MemAccess { pub mem_type: MemType, pub space: MemSpace, pub order: MemOrder, - pub scope: MemScope, } impl fmt::Display for MemAccess { @@ -1809,7 +1808,7 @@ impl fmt::Display for MemAccess { write!( f, "{}.{}.{}.{}", - self.addr_type, self.mem_type, self.space, self.scope + self.addr_type, self.mem_type, self.space, self.order ) } } @@ -3044,7 +3043,6 @@ pub struct OpSuLd { pub image_dim: ImageDim, pub mem_order: MemOrder, - pub mem_scope: MemScope, pub mask: u8, #[src_type(GPR)] @@ -3058,10 +3056,9 @@ impl fmt::Display for OpSuLd { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, - "SULD.P.{}.{}.{} {{ {} {} }} [{}] {}", + "SULD.P.{}.{} {{ {} {} }} [{}] {}", self.image_dim, self.mem_order, - self.mem_scope, self.dst, self.resident, self.coord, @@ -3075,7 +3072,6 @@ impl fmt::Display for OpSuLd { pub struct OpSuSt { pub image_dim: ImageDim, pub mem_order: MemOrder, - pub mem_scope: MemScope, pub mask: u8, #[src_type(GPR)] @@ -3092,13 +3088,8 @@ impl fmt::Display for OpSuSt { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, - "SUST.P.{}.{}.{} [{}] {} {}", - self.image_dim, - self.mem_order, - self.mem_scope, - self.coord, - self.data, - self.handle, + "SUST.P.{}.{} [{}] {} {}", + self.image_dim, self.mem_order, self.coord, self.data, self.handle, ) } } @@ -3115,7 +3106,6 @@ pub struct OpSuAtom { pub atom_type: AtomType, pub mem_order: MemOrder, - pub mem_scope: MemScope, #[src_type(GPR)] pub handle: Src, @@ -3131,12 +3121,11 @@ impl fmt::Display for OpSuAtom { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, - "SUATOM.P.{}.{}.{}.{}.{} [{}] {} {}", + "SUATOM.P.{}.{}.{}.{} [{}] {} {}", self.image_dim, self.atom_op, self.atom_type, self.mem_order, - self.mem_scope, self.coord, self.data, self.handle, @@ -3239,19 +3228,14 @@ pub struct OpAtom { pub mem_space: MemSpace, pub mem_order: MemOrder, - pub mem_scope: MemScope, } impl fmt::Display for OpAtom { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, - "ATOM.{}.{}.{}.{} {}", - self.atom_op, - self.atom_type, - self.mem_order, - self.mem_scope, - self.dst + "ATOM.{}.{}.{} {}", + self.atom_op, self.atom_type, self.mem_order, self.dst )?; write!(f, " [")?; if !self.addr.is_zero() { @@ -3288,15 +3272,14 @@ pub struct OpAtomCas { pub mem_space: MemSpace, pub mem_order: MemOrder, - pub mem_scope: MemScope, } impl fmt::Display for OpAtomCas { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, - "ATOM.CAS.{}.{}.{} {}", - self.atom_type, self.mem_order, self.mem_scope, self.dst + "ATOM.CAS.{}.{} {}", + self.atom_type, self.mem_order, self.dst )?; write!(f, " [")?; if !self.addr.is_zero() { diff --git a/src/nouveau/compiler/nak_lower_copy_swap.rs b/src/nouveau/compiler/nak_lower_copy_swap.rs index 1db1c8dffad..6eba047997a 100644 --- a/src/nouveau/compiler/nak_lower_copy_swap.rs +++ b/src/nouveau/compiler/nak_lower_copy_swap.rs @@ -48,8 +48,7 @@ impl LowerCopySwap { addr_type: MemAddrType::A32, mem_type: MemType::B32, space: MemSpace::Local, - order: MemOrder::Strong, - scope: MemScope::CTA, + order: MemOrder::Strong(MemScope::CTA), }; let addr = self.tls_start + src_reg.base_idx() * 4; self.tls_size = max(self.tls_size, addr + 4); @@ -104,8 +103,7 @@ impl LowerCopySwap { addr_type: MemAddrType::A32, mem_type: MemType::B32, space: MemSpace::Local, - order: MemOrder::Strong, - scope: MemScope::CTA, + order: MemOrder::Strong(MemScope::CTA), }; let addr = self.tls_start + dst_reg.base_idx() * 4; self.tls_size = max(self.tls_size, addr + 4);