nak/sm70: Properly encode ldc on Blackwell+

Also add nvdisasm tests for ldc because it's pretty important and has
lots of subtle per-SM differences.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34910>
This commit is contained in:
Faith Ekstrand 2025-05-09 11:31:21 -04:00 committed by Marge Bot
parent 0b142182cb
commit 8c3ebddba3
3 changed files with 140 additions and 9 deletions

View file

@ -5138,7 +5138,7 @@ impl fmt::Display for LdcMode {
}
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
#[derive(Clone, SrcsAsSlice, DstsAsSlice)]
pub struct OpLdc {
pub dst: Dst,

View file

@ -169,6 +169,101 @@ pub fn test_nop() {
}
}
#[test]
pub fn test_ldc() {
let reg_files = [RegFile::GPR, RegFile::UGPR];
let ur2_4 = RegRef::new(RegFile::UGPR, 2, 2);
let cbufs = [
(CBuf::Binding(5), "c[0x5]"),
(CBuf::BindlessUGPR(ur2_4), "cx[ur2]"),
];
let mem_types = [
(MemType::U8, ".u8"),
(MemType::I8, ".s8"),
(MemType::U16, ".u16"),
(MemType::I16, ".s16"),
(MemType::B32, ""),
(MemType::B64, ".64"),
(MemType::B128, ".128"),
];
for sm in SM_LIST {
let mut c = DisasmCheck::new();
for reg_file in reg_files {
if reg_file == RegFile::UGPR && sm < 73 {
continue;
}
let ldc_op_str = match reg_file {
RegFile::GPR => "ldc",
RegFile::UGPR => {
if sm >= 100 {
"ldcu"
} else {
"uldc"
}
}
_ => panic!("Unsupported register file"),
};
for (cbuf, cbuf_str) in &cbufs {
if matches!(cbuf, CBuf::BindlessUGPR(_)) && sm < 73 {
continue;
}
for (mem_type, mem_type_str) in mem_types {
if mem_type == MemType::B128
&& (reg_file == RegFile::GPR || sm < 100)
{
continue;
}
let dst_regs = mem_type.bits().div_ceil(32);
let r4 = RegRef::new(reg_file, 4, dst_regs as u8);
let r4_str = format!("{}4", reg_file.fmt_prefix());
let cb = CBufRef {
buf: cbuf.clone(),
offset: 0x248,
};
let mut instr = OpLdc {
dst: r4.into(),
cb: cb.into(),
offset: 0.into(),
mode: LdcMode::Indexed,
mem_type,
};
c.push(
instr.clone(),
format!(
"{ldc_op_str}{mem_type_str} {r4_str}, {cbuf_str}[0x248];"
),
);
if reg_file == RegFile::GPR
|| (sm >= 100 && matches!(cbuf, CBuf::Binding(_)))
|| sm >= 120
{
let r6 = RegRef::new(reg_file, 6, 1);
instr.offset = r6.into();
c.push(
instr.clone(),
format!(
"{ldc_op_str}{mem_type_str} {r4_str}, {cbuf_str}[{r6}+0x248];"
),
);
}
}
}
}
c.check(sm);
}
}
#[test]
pub fn test_ld_st_atom() {
let r0 = RegRef::new(RegFile::GPR, 0, 1);

View file

@ -105,6 +105,15 @@ impl SM70Encoder<'_> {
}
}
fn set_ureg_src(&mut self, range: Range<usize>, src: &Src) {
assert!(src.src_mod.is_none());
match src.src_ref {
SrcRef::Zero => self.set_ureg(range, self.zero_reg(RegFile::UGPR)),
SrcRef::Reg(reg) => self.set_ureg(range, reg),
_ => panic!("Not a register"),
}
}
fn set_pred_dst(&mut self, range: Range<usize>, dst: &Dst) {
match dst {
Dst::None => self.set_pred_reg(range, self.true_reg(RegFile::Pred)),
@ -2875,10 +2884,16 @@ impl SM70Op for OpLdc {
match cb.buf {
CBuf::Binding(idx) => {
if self.is_uniform() {
e.set_opcode(0xab9);
if e.sm >= 100 {
e.set_opcode(0x7ac);
e.set_bit(91, true);
e.set_ureg_src(24..32, &self.offset);
} else {
e.set_opcode(0xab9);
e.set_bit(91, false);
assert!(self.offset.is_zero());
}
e.set_udst(&self.dst);
assert!(self.offset.is_zero());
assert!(self.mode == LdcMode::Indexed);
} else {
e.set_opcode(0xb82);
@ -2894,16 +2909,28 @@ impl SM70Op for OpLdc {
LdcMode::IndexedSegmentedLinear => 3_u8,
},
);
e.set_bit(91, false); // Bound
}
e.set_field(54..59, idx);
e.set_bit(91, false); // Bound
}
CBuf::BindlessUGPR(handle) => {
if self.is_uniform() {
e.set_opcode(0xab9);
if e.sm >= 100 {
e.set_opcode(0xbac);
} else {
e.set_opcode(0xab9);
}
e.set_udst(&self.dst);
assert!(self.offset.is_zero());
if e.sm >= 120 {
e.set_ureg_src(64..72, &self.offset);
} else if e.sm >= 100 {
// Blackwell A adds the source but it has to be zero
assert!(self.offset.is_zero());
e.set_ureg_src(64..72, &self.offset);
} else {
assert!(self.offset.is_zero());
}
} else {
e.set_opcode(0x582);
e.set_dst(&self.dst);
@ -2912,15 +2939,24 @@ impl SM70Op for OpLdc {
}
e.set_ureg(24..32, handle);
e.set_reg_src(64..72, &self.offset);
assert!(self.mode == LdcMode::Indexed);
e.set_bit(91, true); // Bindless
}
CBuf::BindlessSSA(_) => panic!("SSA values must be lowered"),
}
e.set_field(38..54, cb.offset);
if e.sm >= 100 && self.is_uniform() {
e.set_field(37..54, cb.offset);
} else {
e.set_field(38..54, cb.offset);
}
e.set_mem_type(73..76, self.mem_type);
if e.sm >= 120 {
e.set_field(80..82, 0_u8); // tex/hdr_unpack
} else if e.sm >= 100 {
e.set_bit(80, false); // tex_unpack
}
}
}