From 264ff0528cc18f8d6cd00905c56ff5dbf0f03704 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Tue, 15 Jul 2025 16:43:58 -0400 Subject: [PATCH] nak: Wire up the mma predicate on Hopper+ Fixes: 90438bae51bc ("nir: Add NVIDIA-specific muladd intrinsics") Part-of: (cherry picked from commit 4bb67cacbac4311ece87fcac55bbc0d5120c3f9f) --- .pick_status.json | 2 +- src/nouveau/compiler/nak/sm70_encode.rs | 35 +++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/.pick_status.json b/.pick_status.json index 6e9088ddf22..ea18e02daef 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -3284,7 +3284,7 @@ "description": "nak: Wire up the mma predicate on Hopper+", "nominated": true, "nomination_type": 2, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "90438bae51bc3a29bf1dfdb63e84a498104fc790", "notes": null diff --git a/src/nouveau/compiler/nak/sm70_encode.rs b/src/nouveau/compiler/nak/sm70_encode.rs index 58acf5b4928..23c2ced9c76 100644 --- a/src/nouveau/compiler/nak/sm70_encode.rs +++ b/src/nouveau/compiler/nak/sm70_encode.rs @@ -149,6 +149,33 @@ impl SM70Encoder<'_> { self.set_pred_src_file(range, not_bit, src, RegFile::UPred); } + fn set_rev_upred_src( + &mut self, + range: Range, + not_bit: usize, + src: &Src, + ) { + let file = RegFile::UPred; + let (not, reg) = match src.src_ref { + SrcRef::True => (false, self.true_reg(file)), + SrcRef::False => (true, self.true_reg(file)), + SrcRef::Reg(reg) => { + assert!(reg.file() == file); + (false, reg) + } + _ => panic!("Not a register"), + }; + + assert!(range.len() == 3); + assert!(reg.base_idx() <= 7); + assert!(reg.comps() == 1); + + // These sources are funky. They're encoded backwards. + self.set_field(range, 7 - reg.base_idx()); + + self.set_bit(not_bit, not ^ src_mod_is_bnot(src.src_mod)); + } + fn set_src_cb(&mut self, range: Range, cx_bit: usize, cb: &CBufRef) { let mut v = BitMutView::new_subset(self, range); v.set_field(6..22, cb.offset); @@ -3847,6 +3874,10 @@ impl SM70Op for OpImma { e.set_reg_src(64..72, &self.srcs[2]); e.set_bit(74, true); // SRC1.COL + if e.sm >= 90 { + e.set_rev_upred_src(87..90, 90, &true.into()); + } + assert!(self.mat_size == ImmaSize::M8N8K16 || e.sm >= 80); e.set_field2( 75..76, @@ -3897,6 +3928,10 @@ impl SM70Op for OpHmma { e.set_reg_src(32..40, &self.srcs[1]); e.set_reg_src(64..72, &self.srcs[2]); + if e.sm >= 90 { + e.set_rev_upred_src(87..90, 90, &true.into()); + } + assert!(self.mat_size != HmmaSize::M16N8K4 || e.sm >= 80); e.set_field2( 75..76,