mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 11:48:06 +02:00
nak/sm70: Properly encode ldc on Blackwell+
Also add nvdisasm tests for ldc because it's pretty important and has lots of subtle per-SM differences. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34910>
This commit is contained in:
parent
0b142182cb
commit
8c3ebddba3
3 changed files with 140 additions and 9 deletions
|
|
@ -5138,7 +5138,7 @@ impl fmt::Display for LdcMode {
|
|||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
#[derive(Clone, SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpLdc {
|
||||
pub dst: Dst,
|
||||
|
||||
|
|
|
|||
|
|
@ -169,6 +169,101 @@ pub fn test_nop() {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_ldc() {
|
||||
let reg_files = [RegFile::GPR, RegFile::UGPR];
|
||||
|
||||
let ur2_4 = RegRef::new(RegFile::UGPR, 2, 2);
|
||||
let cbufs = [
|
||||
(CBuf::Binding(5), "c[0x5]"),
|
||||
(CBuf::BindlessUGPR(ur2_4), "cx[ur2]"),
|
||||
];
|
||||
|
||||
let mem_types = [
|
||||
(MemType::U8, ".u8"),
|
||||
(MemType::I8, ".s8"),
|
||||
(MemType::U16, ".u16"),
|
||||
(MemType::I16, ".s16"),
|
||||
(MemType::B32, ""),
|
||||
(MemType::B64, ".64"),
|
||||
(MemType::B128, ".128"),
|
||||
];
|
||||
|
||||
for sm in SM_LIST {
|
||||
let mut c = DisasmCheck::new();
|
||||
for reg_file in reg_files {
|
||||
if reg_file == RegFile::UGPR && sm < 73 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let ldc_op_str = match reg_file {
|
||||
RegFile::GPR => "ldc",
|
||||
RegFile::UGPR => {
|
||||
if sm >= 100 {
|
||||
"ldcu"
|
||||
} else {
|
||||
"uldc"
|
||||
}
|
||||
}
|
||||
_ => panic!("Unsupported register file"),
|
||||
};
|
||||
|
||||
for (cbuf, cbuf_str) in &cbufs {
|
||||
if matches!(cbuf, CBuf::BindlessUGPR(_)) && sm < 73 {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (mem_type, mem_type_str) in mem_types {
|
||||
if mem_type == MemType::B128
|
||||
&& (reg_file == RegFile::GPR || sm < 100)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
let dst_regs = mem_type.bits().div_ceil(32);
|
||||
let r4 = RegRef::new(reg_file, 4, dst_regs as u8);
|
||||
let r4_str = format!("{}4", reg_file.fmt_prefix());
|
||||
|
||||
let cb = CBufRef {
|
||||
buf: cbuf.clone(),
|
||||
offset: 0x248,
|
||||
};
|
||||
let mut instr = OpLdc {
|
||||
dst: r4.into(),
|
||||
cb: cb.into(),
|
||||
offset: 0.into(),
|
||||
mode: LdcMode::Indexed,
|
||||
mem_type,
|
||||
};
|
||||
|
||||
c.push(
|
||||
instr.clone(),
|
||||
format!(
|
||||
"{ldc_op_str}{mem_type_str} {r4_str}, {cbuf_str}[0x248];"
|
||||
),
|
||||
);
|
||||
|
||||
if reg_file == RegFile::GPR
|
||||
|| (sm >= 100 && matches!(cbuf, CBuf::Binding(_)))
|
||||
|| sm >= 120
|
||||
{
|
||||
let r6 = RegRef::new(reg_file, 6, 1);
|
||||
instr.offset = r6.into();
|
||||
|
||||
c.push(
|
||||
instr.clone(),
|
||||
format!(
|
||||
"{ldc_op_str}{mem_type_str} {r4_str}, {cbuf_str}[{r6}+0x248];"
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
c.check(sm);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_ld_st_atom() {
|
||||
let r0 = RegRef::new(RegFile::GPR, 0, 1);
|
||||
|
|
|
|||
|
|
@ -105,6 +105,15 @@ impl SM70Encoder<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
fn set_ureg_src(&mut self, range: Range<usize>, src: &Src) {
|
||||
assert!(src.src_mod.is_none());
|
||||
match src.src_ref {
|
||||
SrcRef::Zero => self.set_ureg(range, self.zero_reg(RegFile::UGPR)),
|
||||
SrcRef::Reg(reg) => self.set_ureg(range, reg),
|
||||
_ => panic!("Not a register"),
|
||||
}
|
||||
}
|
||||
|
||||
fn set_pred_dst(&mut self, range: Range<usize>, dst: &Dst) {
|
||||
match dst {
|
||||
Dst::None => self.set_pred_reg(range, self.true_reg(RegFile::Pred)),
|
||||
|
|
@ -2875,10 +2884,16 @@ impl SM70Op for OpLdc {
|
|||
match cb.buf {
|
||||
CBuf::Binding(idx) => {
|
||||
if self.is_uniform() {
|
||||
e.set_opcode(0xab9);
|
||||
if e.sm >= 100 {
|
||||
e.set_opcode(0x7ac);
|
||||
e.set_bit(91, true);
|
||||
e.set_ureg_src(24..32, &self.offset);
|
||||
} else {
|
||||
e.set_opcode(0xab9);
|
||||
e.set_bit(91, false);
|
||||
assert!(self.offset.is_zero());
|
||||
}
|
||||
e.set_udst(&self.dst);
|
||||
|
||||
assert!(self.offset.is_zero());
|
||||
assert!(self.mode == LdcMode::Indexed);
|
||||
} else {
|
||||
e.set_opcode(0xb82);
|
||||
|
|
@ -2894,16 +2909,28 @@ impl SM70Op for OpLdc {
|
|||
LdcMode::IndexedSegmentedLinear => 3_u8,
|
||||
},
|
||||
);
|
||||
e.set_bit(91, false); // Bound
|
||||
}
|
||||
e.set_field(54..59, idx);
|
||||
e.set_bit(91, false); // Bound
|
||||
}
|
||||
CBuf::BindlessUGPR(handle) => {
|
||||
if self.is_uniform() {
|
||||
e.set_opcode(0xab9);
|
||||
if e.sm >= 100 {
|
||||
e.set_opcode(0xbac);
|
||||
} else {
|
||||
e.set_opcode(0xab9);
|
||||
}
|
||||
e.set_udst(&self.dst);
|
||||
|
||||
assert!(self.offset.is_zero());
|
||||
if e.sm >= 120 {
|
||||
e.set_ureg_src(64..72, &self.offset);
|
||||
} else if e.sm >= 100 {
|
||||
// Blackwell A adds the source but it has to be zero
|
||||
assert!(self.offset.is_zero());
|
||||
e.set_ureg_src(64..72, &self.offset);
|
||||
} else {
|
||||
assert!(self.offset.is_zero());
|
||||
}
|
||||
} else {
|
||||
e.set_opcode(0x582);
|
||||
e.set_dst(&self.dst);
|
||||
|
|
@ -2912,15 +2939,24 @@ impl SM70Op for OpLdc {
|
|||
}
|
||||
|
||||
e.set_ureg(24..32, handle);
|
||||
e.set_reg_src(64..72, &self.offset);
|
||||
assert!(self.mode == LdcMode::Indexed);
|
||||
e.set_bit(91, true); // Bindless
|
||||
}
|
||||
CBuf::BindlessSSA(_) => panic!("SSA values must be lowered"),
|
||||
}
|
||||
|
||||
e.set_field(38..54, cb.offset);
|
||||
if e.sm >= 100 && self.is_uniform() {
|
||||
e.set_field(37..54, cb.offset);
|
||||
} else {
|
||||
e.set_field(38..54, cb.offset);
|
||||
}
|
||||
e.set_mem_type(73..76, self.mem_type);
|
||||
|
||||
if e.sm >= 120 {
|
||||
e.set_field(80..82, 0_u8); // tex/hdr_unpack
|
||||
} else if e.sm >= 100 {
|
||||
e.set_bit(80, false); // tex_unpack
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue