nak: Specify MemScope on MemOrder::Strong

The scope isn't relevant on MemOrder::Weak. In fact, ptxas refuses
any input that specifies a scope on weak loads. So, don't make those
combinations representable in our IR.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24998>
This commit is contained in:
M Henning 2023-10-20 22:13:27 -04:00 committed by Marge Bot
parent 94ea8a5883
commit dfb6260483
4 changed files with 49 additions and 83 deletions

View file

@ -1137,8 +1137,12 @@ impl SM75Instr {
);
}
fn set_mem_order_scope(&mut self, order: &MemOrder, scope: &MemScope) {
fn set_mem_order(&mut self, order: &MemOrder) {
if self.sm < 80 {
let scope = match order {
MemOrder::Weak => MemScope::CTA,
MemOrder::Strong(s) => *s,
};
self.set_field(
77..79,
match scope {
@ -1153,18 +1157,18 @@ impl SM75Instr {
match order {
/* Constant => 0_u8, */
MemOrder::Weak => 1_u8,
MemOrder::Strong => 2_u8,
MemOrder::Strong(_) => 2_u8,
/* MMIO => 3_u8, */
},
);
} else {
self.set_field(
77..81,
match (order, scope) {
(MemOrder::Weak, _) => 0x0_u8,
(MemOrder::Strong, MemScope::CTA) => 0x5_u8,
(MemOrder::Strong, MemScope::GPU) => 0x7_u8,
(MemOrder::Strong, MemScope::System) => 0xa_u8,
match order {
MemOrder::Weak => 0x0_u8,
MemOrder::Strong(MemScope::CTA) => 0x5_u8,
MemOrder::Strong(MemScope::GPU) => 0x7_u8,
MemOrder::Strong(MemScope::System) => 0xa_u8,
},
);
}
@ -1179,7 +1183,7 @@ impl SM75Instr {
self.set_pred_dst(81..84, op.resident);
self.set_image_dim(61..64, op.image_dim);
self.set_mem_order_scope(&op.mem_order, &op.mem_scope);
self.set_mem_order(&op.mem_order);
assert!(op.mask == 0x1 || op.mask == 0x3 || op.mask == 0xf);
self.set_field(72..76, op.mask);
@ -1193,7 +1197,7 @@ impl SM75Instr {
self.set_reg_src(64..72, op.handle);
self.set_image_dim(61..64, op.image_dim);
self.set_mem_order_scope(&op.mem_order, &op.mem_scope);
self.set_mem_order(&op.mem_order);
assert!(op.mask == 0x1 || op.mask == 0x3 || op.mask == 0xf);
self.set_field(72..76, op.mask);
@ -1213,7 +1217,7 @@ impl SM75Instr {
self.set_pred_dst(81..84, op.resident);
self.set_image_dim(61..64, op.image_dim);
self.set_mem_order_scope(&op.mem_order, &op.mem_scope);
self.set_mem_order(&op.mem_order);
self.set_bit(72, false); /* .BA */
self.set_atom_type(73..76, op.atom_type);
@ -1245,7 +1249,7 @@ impl SM75Instr {
},
);
self.set_mem_type(73..76, access.mem_type);
self.set_mem_order_scope(&access.order, &access.scope);
self.set_mem_order(&access.order);
}
fn encode_ldg(&mut self, op: &OpLd) {
@ -1268,8 +1272,7 @@ impl SM75Instr {
assert!(op.access.addr_type == MemAddrType::A32);
self.set_mem_type(73..76, op.access.mem_type);
assert!(op.access.order == MemOrder::Strong);
assert!(op.access.scope == MemScope::CTA);
assert!(op.access.order == MemOrder::Strong(MemScope::CTA));
}
fn encode_lds(&mut self, op: &OpLd) {
@ -1281,8 +1284,7 @@ impl SM75Instr {
assert!(op.access.addr_type == MemAddrType::A32);
self.set_mem_type(73..76, op.access.mem_type);
assert!(op.access.order == MemOrder::Strong);
assert!(op.access.scope == MemScope::CTA);
assert!(op.access.order == MemOrder::Strong(MemScope::CTA));
self.set_bit(87, false); /* !.ZD - Returns a predicate? */
}
@ -1328,8 +1330,7 @@ impl SM75Instr {
assert!(op.access.addr_type == MemAddrType::A32);
self.set_mem_type(73..76, op.access.mem_type);
assert!(op.access.order == MemOrder::Strong);
assert!(op.access.scope == MemScope::CTA);
assert!(op.access.order == MemOrder::Strong(MemScope::CTA));
}
fn encode_sts(&mut self, op: &OpSt) {
@ -1341,8 +1342,7 @@ impl SM75Instr {
assert!(op.access.addr_type == MemAddrType::A32);
self.set_mem_type(73..76, op.access.mem_type);
assert!(op.access.order == MemOrder::Strong);
assert!(op.access.scope == MemScope::CTA);
assert!(op.access.order == MemOrder::Strong(MemScope::CTA));
}
fn encode_st(&mut self, op: &OpSt) {
@ -1406,7 +1406,7 @@ impl SM75Instr {
);
self.set_atom_type(73..76, op.atom_type);
self.set_mem_order_scope(&op.mem_order, &op.mem_scope);
self.set_mem_order(&op.mem_order);
self.set_atom_op(87..91, op.atom_op);
}
@ -1430,7 +1430,7 @@ impl SM75Instr {
);
self.set_atom_type(73..76, op.atom_type);
self.set_mem_order_scope(&op.mem_order, &op.mem_scope);
self.set_mem_order(&op.mem_order);
}
fn encode_atoms(&mut self, op: &OpAtom) {
@ -1443,8 +1443,7 @@ impl SM75Instr {
self.set_field(40..64, op.addr_offset);
assert!(op.addr_type == MemAddrType::A32);
assert!(op.mem_order == MemOrder::Strong);
assert!(op.mem_scope == MemScope::CTA);
assert!(op.mem_order == MemOrder::Strong(MemScope::CTA));
self.set_atom_type(73..76, op.atom_type);
self.set_atom_op(87..91, op.atom_op);
@ -1461,8 +1460,7 @@ impl SM75Instr {
self.set_reg_src(64..72, op.data);
assert!(op.addr_type == MemAddrType::A32);
assert!(op.mem_order == MemOrder::Strong);
assert!(op.mem_scope == MemScope::CTA);
assert!(op.mem_order == MemOrder::Strong(MemScope::CTA));
self.set_atom_type(73..76, op.atom_type);
}

View file

@ -1432,8 +1432,7 @@ impl<'a> ShaderFromNir<'a> {
atom_op: atom_op,
atom_type: atom_type,
image_dim: dim,
mem_order: MemOrder::Strong,
mem_scope: MemScope::System,
mem_order: MemOrder::Strong(MemScope::System),
});
self.set_dst(&intrin.def, dst);
}
@ -1453,8 +1452,7 @@ impl<'a> ShaderFromNir<'a> {
dst: dst.into(),
resident: Dst::None,
image_dim: dim,
mem_order: MemOrder::Strong,
mem_scope: MemScope::System,
mem_order: MemOrder::Strong(MemScope::System),
mask: (1 << comps) - 1,
handle: handle,
coord: coord,
@ -1474,8 +1472,7 @@ impl<'a> ShaderFromNir<'a> {
b.push_op(OpSuSt {
image_dim: dim,
mem_order: MemOrder::Strong,
mem_scope: MemScope::System,
mem_order: MemOrder::Strong(MemScope::System),
mask: (1 << comps) - 1,
handle: handle,
coord: coord,
@ -1530,8 +1527,7 @@ impl<'a> ShaderFromNir<'a> {
addr_type: MemAddrType::A64,
addr_offset: offset,
mem_space: MemSpace::Global,
mem_order: MemOrder::Strong,
mem_scope: MemScope::System,
mem_order: MemOrder::Strong(MemScope::System),
});
self.set_dst(&intrin.def, dst);
}
@ -1555,8 +1551,7 @@ impl<'a> ShaderFromNir<'a> {
addr_type: MemAddrType::A64,
addr_offset: offset,
mem_space: MemSpace::Global,
mem_order: MemOrder::Strong,
mem_scope: MemScope::System,
mem_order: MemOrder::Strong(MemScope::System),
});
self.set_dst(&intrin.def, dst);
}
@ -1580,8 +1575,7 @@ impl<'a> ShaderFromNir<'a> {
addr_type: MemAddrType::A64,
mem_type: MemType::from_size(size_B, false),
space: MemSpace::Global,
order: MemOrder::Strong,
scope: MemScope::System,
order: MemOrder::Strong(MemScope::System),
};
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 32);
let dst = b.alloc_ssa(RegFile::GPR, size_B.div_ceil(4));
@ -1761,8 +1755,7 @@ impl<'a> ShaderFromNir<'a> {
addr_type: MemAddrType::A32,
mem_type: MemType::from_size(size_B, false),
space: MemSpace::Local,
order: MemOrder::Strong,
scope: MemScope::CTA,
order: MemOrder::Strong(MemScope::CTA),
};
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
let dst = b.alloc_ssa(RegFile::GPR, size_B.div_ceil(4));
@ -1783,8 +1776,7 @@ impl<'a> ShaderFromNir<'a> {
addr_type: MemAddrType::A32,
mem_type: MemType::from_size(size_B, false),
space: MemSpace::Shared,
order: MemOrder::Strong,
scope: MemScope::CTA,
order: MemOrder::Strong(MemScope::CTA),
};
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
let offset = offset + intrin.base();
@ -1941,8 +1933,7 @@ impl<'a> ShaderFromNir<'a> {
addr_type: MemAddrType::A32,
addr_offset: offset,
mem_space: MemSpace::Shared,
mem_order: MemOrder::Strong,
mem_scope: MemScope::CTA,
mem_order: MemOrder::Strong(MemScope::CTA),
});
self.set_dst(&intrin.def, dst);
}
@ -1966,8 +1957,7 @@ impl<'a> ShaderFromNir<'a> {
addr_type: MemAddrType::A32,
addr_offset: offset,
mem_space: MemSpace::Shared,
mem_order: MemOrder::Strong,
mem_scope: MemScope::CTA,
mem_order: MemOrder::Strong(MemScope::CTA),
});
self.set_dst(&intrin.def, dst);
}
@ -1980,8 +1970,7 @@ impl<'a> ShaderFromNir<'a> {
addr_type: MemAddrType::A64,
mem_type: MemType::from_size(size_B, false),
space: MemSpace::Global,
order: MemOrder::Strong,
scope: MemScope::System,
order: MemOrder::Strong(MemScope::System),
};
let (addr, offset) = self.get_io_addr_offset(&srcs[1], 32);
@ -2017,8 +2006,7 @@ impl<'a> ShaderFromNir<'a> {
addr_type: MemAddrType::A32,
mem_type: MemType::from_size(size_B, false),
space: MemSpace::Local,
order: MemOrder::Strong,
scope: MemScope::CTA,
order: MemOrder::Strong(MemScope::CTA),
};
let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24);
@ -2038,8 +2026,7 @@ impl<'a> ShaderFromNir<'a> {
addr_type: MemAddrType::A32,
mem_type: MemType::from_size(size_B, false),
space: MemSpace::Shared,
order: MemOrder::Strong,
scope: MemScope::CTA,
order: MemOrder::Strong(MemScope::CTA),
};
let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24);
let offset = offset + intrin.base();

View file

@ -1749,14 +1749,14 @@ impl fmt::Display for MemType {
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
pub enum MemOrder {
Weak,
Strong,
Strong(MemScope),
}
impl fmt::Display for MemOrder {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
MemOrder::Weak => write!(f, "WEAK"),
MemOrder::Strong => write!(f, "STRONG"),
MemOrder::Strong(scope) => write!(f, "STRONG.{}", scope),
}
}
}
@ -1801,7 +1801,6 @@ pub struct MemAccess {
pub mem_type: MemType,
pub space: MemSpace,
pub order: MemOrder,
pub scope: MemScope,
}
impl fmt::Display for MemAccess {
@ -1809,7 +1808,7 @@ impl fmt::Display for MemAccess {
write!(
f,
"{}.{}.{}.{}",
self.addr_type, self.mem_type, self.space, self.scope
self.addr_type, self.mem_type, self.space, self.order
)
}
}
@ -3044,7 +3043,6 @@ pub struct OpSuLd {
pub image_dim: ImageDim,
pub mem_order: MemOrder,
pub mem_scope: MemScope,
pub mask: u8,
#[src_type(GPR)]
@ -3058,10 +3056,9 @@ impl fmt::Display for OpSuLd {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"SULD.P.{}.{}.{} {{ {} {} }} [{}] {}",
"SULD.P.{}.{} {{ {} {} }} [{}] {}",
self.image_dim,
self.mem_order,
self.mem_scope,
self.dst,
self.resident,
self.coord,
@ -3075,7 +3072,6 @@ impl fmt::Display for OpSuLd {
pub struct OpSuSt {
pub image_dim: ImageDim,
pub mem_order: MemOrder,
pub mem_scope: MemScope,
pub mask: u8,
#[src_type(GPR)]
@ -3092,13 +3088,8 @@ impl fmt::Display for OpSuSt {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"SUST.P.{}.{}.{} [{}] {} {}",
self.image_dim,
self.mem_order,
self.mem_scope,
self.coord,
self.data,
self.handle,
"SUST.P.{}.{} [{}] {} {}",
self.image_dim, self.mem_order, self.coord, self.data, self.handle,
)
}
}
@ -3115,7 +3106,6 @@ pub struct OpSuAtom {
pub atom_type: AtomType,
pub mem_order: MemOrder,
pub mem_scope: MemScope,
#[src_type(GPR)]
pub handle: Src,
@ -3131,12 +3121,11 @@ impl fmt::Display for OpSuAtom {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"SUATOM.P.{}.{}.{}.{}.{} [{}] {} {}",
"SUATOM.P.{}.{}.{}.{} [{}] {} {}",
self.image_dim,
self.atom_op,
self.atom_type,
self.mem_order,
self.mem_scope,
self.coord,
self.data,
self.handle,
@ -3239,19 +3228,14 @@ pub struct OpAtom {
pub mem_space: MemSpace,
pub mem_order: MemOrder,
pub mem_scope: MemScope,
}
impl fmt::Display for OpAtom {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"ATOM.{}.{}.{}.{} {}",
self.atom_op,
self.atom_type,
self.mem_order,
self.mem_scope,
self.dst
"ATOM.{}.{}.{} {}",
self.atom_op, self.atom_type, self.mem_order, self.dst
)?;
write!(f, " [")?;
if !self.addr.is_zero() {
@ -3288,15 +3272,14 @@ pub struct OpAtomCas {
pub mem_space: MemSpace,
pub mem_order: MemOrder,
pub mem_scope: MemScope,
}
impl fmt::Display for OpAtomCas {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"ATOM.CAS.{}.{}.{} {}",
self.atom_type, self.mem_order, self.mem_scope, self.dst
"ATOM.CAS.{}.{} {}",
self.atom_type, self.mem_order, self.dst
)?;
write!(f, " [")?;
if !self.addr.is_zero() {

View file

@ -48,8 +48,7 @@ impl LowerCopySwap {
addr_type: MemAddrType::A32,
mem_type: MemType::B32,
space: MemSpace::Local,
order: MemOrder::Strong,
scope: MemScope::CTA,
order: MemOrder::Strong(MemScope::CTA),
};
let addr = self.tls_start + src_reg.base_idx() * 4;
self.tls_size = max(self.tls_size, addr + 4);
@ -104,8 +103,7 @@ impl LowerCopySwap {
addr_type: MemAddrType::A32,
mem_type: MemType::B32,
space: MemSpace::Local,
order: MemOrder::Strong,
scope: MemScope::CTA,
order: MemOrder::Strong(MemScope::CTA),
};
let addr = self.tls_start + dst_reg.base_idx() * 4;
self.tls_size = max(self.tls_size, addr + 4);