From 9f04a69b474a152a22f938e8ba748542d8a13bfb Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Thu, 5 Feb 2026 09:21:26 +0100 Subject: [PATCH 1/5] nir: Add isbewr_nv intrinsic and extends isberd_nv Adds a new intrinsic allowing to do raw write in the various ISBE spaces where attributes are stored. This also adapt isberd_nv to map to what we have since SM70+. This will be used to support mesh shaders. Signed-off-by: Mary Guillemard --- src/compiler/nir/nir_divergence_analysis.c | 1 + src/compiler/nir/nir_intrinsics.py | 11 +++++++++-- src/nouveau/compiler/nak/from_nir.rs | 13 +++++++++++++ src/nouveau/compiler/nak_nir_lower_vtg_io.c | 12 +++++++++++- src/nouveau/compiler/nak_private.h | 15 +++++++++++++++ 5 files changed, 49 insertions(+), 3 deletions(-) diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index feafb307088..5ca93209ad4 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -994,6 +994,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_dpas_intel: case nir_intrinsic_convert_cmat_intel: case nir_intrinsic_isberd_nv: + case nir_intrinsic_isbewr_nv: case nir_intrinsic_vild_nv: case nir_intrinsic_al2p_nv: case nir_intrinsic_ald_nv: diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 374ff0e97c8..d3e0fa6223a 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -2768,8 +2768,15 @@ intrinsic("ldcx_nv", dest_comp=0, src_comp=[1, 1], flags=[CAN_ELIMINATE, CAN_REORDER]) intrinsic("load_sysval_nv", dest_comp=1, src_comp=[], bit_sizes=[32, 64], indices=[ACCESS, BASE, DIVERGENT], flags=[CAN_ELIMINATE]) -intrinsic("isberd_nv", dest_comp=1, src_comp=[1], bit_sizes=[32], - flags=[CAN_ELIMINATE, CAN_REORDER]) +# src[] = { offset }. +# FLAGS is struct nak_nir_isbe_flags +intrinsic("isberd_nv", dest_comp=1, src_comp=[1], bit_sizes=[8, 16, 32], + indices=[RANGE_BASE, RANGE, FLAGS, ACCESS], + flags=[CAN_ELIMINATE]) +# src[] = { data, offset }. +# FLAGS is struct nak_nir_isbe_flags +intrinsic("isbewr_nv", src_comp=[0, 1], + indices=[RANGE_BASE, RANGE, FLAGS, ACCESS], flags=[]) intrinsic("vild_nv", dest_comp=1, src_comp=[1], bit_sizes=[32], flags=[CAN_ELIMINATE, CAN_REORDER], indices=[BASE]) diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index bdec2e0fb58..8833aac94a6 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -3033,6 +3033,19 @@ impl<'a> ShaderFromNir<'a> { self.set_dst(&intrin.def, dst.into()); } nir_intrinsic_isberd_nv => { + let flags = intrin.flags(); + let flags: nak_nir_isbe_flags = + unsafe { std::mem::transmute_copy(&flags) }; + + // TODO: Implement 16 and 32 bits in ISBERD + assert!( + intrin.def.bit_size() == 8 + && intrin.def.num_components == 1 + ); + + // TODO: Implement mode in ISBERD + assert!(flags.mode() == NAK_ISBE_MODE_MAP); + let dst = b.alloc_ssa(RegFile::GPR); b.push_op(OpIsberd { dst: dst.into(), diff --git a/src/nouveau/compiler/nak_nir_lower_vtg_io.c b/src/nouveau/compiler/nak_nir_lower_vtg_io.c index d1766876206..32e835fa35a 100644 --- a/src/nouveau/compiler/nak_nir_lower_vtg_io.c +++ b/src/nouveau/compiler/nak_nir_lower_vtg_io.c @@ -144,7 +144,17 @@ lower_vtg_io_intrin(nir_builder *b, nir_def *lo = nir_extract_u8_imm(b, info, 0); nir_def *hi = nir_extract_u8_imm(b, info, 2); nir_def *idx = nir_iadd(b, nir_imul(b, lo, hi), vtx); - vtx = nir_isberd_nv(b, idx); + + const struct nak_nir_isbe_flags flags = { + .mode = NAK_ISBE_MODE_MAP, + .output = false, + .skew = false, + .per_primitive = false, + }; + + vtx = nir_isberd_nv(b, 8, idx, + .flags = NAK_AS_U32(flags), + .access = ACCESS_CAN_REORDER); } else { vtx = nir_vild_nv(b, vtx); } diff --git a/src/nouveau/compiler/nak_private.h b/src/nouveau/compiler/nak_private.h index 7c65d8f6662..b312a218e1e 100644 --- a/src/nouveau/compiler/nak_private.h +++ b/src/nouveau/compiler/nak_private.h @@ -257,6 +257,21 @@ struct nak_nir_imadsp_flags { bool nak_nir_lower_vtg_io(nir_shader *nir, const struct nak_compiler *nak); +enum nak_isbe_mode { + NAK_ISBE_MODE_MAP, + NAK_ISBE_MODE_PATCH, + NAK_ISBE_MODE_PRIM, + NAK_ISBE_MODE_ATTR, +}; + +struct nak_nir_isbe_flags { + enum nak_isbe_mode mode : 2; + bool output : 1; + bool skew : 1; + bool per_primitive : 1; + uint32_t pad : 27; +}; + enum nak_interp_mode { NAK_INTERP_MODE_PERSPECTIVE, NAK_INTERP_MODE_SCREEN_LINEAR, From d97ce339facd9d76d03513371dda637879a977f0 Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Thu, 5 Feb 2026 09:30:46 +0100 Subject: [PATCH 2/5] nak: Legalize ISBERD This instruction can only take GPRs. Signed-off-by: Mary Guillemard Reviewed-by: Mel Henning --- src/nouveau/compiler/nak/sm70_encode.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nouveau/compiler/nak/sm70_encode.rs b/src/nouveau/compiler/nak/sm70_encode.rs index 8b9a5c24b78..c7fb4437f40 100644 --- a/src/nouveau/compiler/nak/sm70_encode.rs +++ b/src/nouveau/compiler/nak/sm70_encode.rs @@ -3751,8 +3751,8 @@ impl SM70Op for OpCS2R { } impl SM70Op for OpIsberd { - fn legalize(&mut self, _b: &mut LegalizeBuilder) { - // Nothing to do + fn legalize(&mut self, b: &mut LegalizeBuilder) { + b.copy_src_if_uniform(&mut self.idx); } fn encode(&self, e: &mut SM70Encoder<'_>) { From 991c71e20b075971355286e6b02c4e73c2972b8d Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Thu, 5 Feb 2026 10:30:43 +0100 Subject: [PATCH 3/5] nak: Implement ISBEWR and extend ISBERD implementation ISBERD/ISBEWR allow raw manipulation of the various ISBE spaces where attributes are stored. This extends the implementation of ISBERD to support the additional elements added in its intrinsic and implement ISBEWR intrinsic while extending the ISBE space sharing detection pass. Signed-off-by: Mary Guillemard Reviewed-by: Mel Henning --- src/nouveau/compiler/nak/from_nir.rs | 94 ++++++++++++++-- src/nouveau/compiler/nak/ir.rs | 105 +++++++++++++++++- .../compiler/nak/opt_instr_sched_common.rs | 2 + .../compiler/nak/sm120_instr_latencies.rs | 1 + src/nouveau/compiler/nak/sm50.rs | 6 +- src/nouveau/compiler/nak/sm70_encode.rs | 67 ++++++++++- .../compiler/nak/sm75_instr_latencies.rs | 1 + .../compiler/nak/sm80_instr_latencies.rs | 1 + src/nouveau/compiler/nak_nir_lower_vtg_io.c | 2 +- src/nouveau/compiler/nak_private.h | 12 +- 10 files changed, 268 insertions(+), 23 deletions(-) diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index 8833aac94a6..1deab68d783 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -3033,26 +3033,104 @@ impl<'a> ShaderFromNir<'a> { self.set_dst(&intrin.def, dst.into()); } nir_intrinsic_isberd_nv => { + let base = u16::try_from(intrin.range_base()).unwrap(); + let range = u16::try_from(intrin.range()).unwrap(); + let range = base..(base + range); + let flags = intrin.flags(); let flags: nak_nir_isbe_flags = unsafe { std::mem::transmute_copy(&flags) }; - // TODO: Implement 16 and 32 bits in ISBERD - assert!( - intrin.def.bit_size() == 8 - && intrin.def.num_components == 1 - ); + assert!(intrin.def.num_components() == 1); - // TODO: Implement mode in ISBERD - assert!(flags.mode() == NAK_ISBE_MODE_MAP); + let size_B = intrin.def.bit_size() / 8; + + let access_type = match flags.access() { + NAK_ISBE_ACCESS_MAP => IsbeAccessType::Map, + NAK_ISBE_ACCESS_PATCH => IsbeAccessType::Patch, + NAK_ISBE_ACCESS_PRIM => IsbeAccessType::Primitive, + NAK_ISBE_ACCESS_ATTR => IsbeAccessType::Attribute, + _ => panic!("Invalid ISBE access {}", flags.access()), + }; + + if matches!(access_type, IsbeAccessType::Attribute) + && !flags.per_primitive() + && !range.is_empty() + { + if let ShaderIoInfo::Vtg(io) = &mut self.info.io { + if flags.output() { + io.mark_attrs_written(range); + } else { + io.mark_attrs_read(range); + } + } else { + panic!("Must be a VTG stage"); + } + } let dst = b.alloc_ssa(RegFile::GPR); b.push_op(OpIsberd { dst: dst.into(), - idx: self.get_src(&srcs[0]), + offset: self.get_src(&srcs[0]), + imm_offset: 0, + output: flags.output(), + skew: flags.skew(), + mem_type: MemType::from_size(size_B, false), + access_type, }); self.set_dst(&intrin.def, dst.into()); } + nir_intrinsic_isbewr_nv => { + let base = u16::try_from(intrin.range_base()).unwrap(); + let range = u16::try_from(intrin.range()).unwrap(); + let range = base..(base + range); + + let flags = intrin.flags(); + let flags: nak_nir_isbe_flags = + unsafe { std::mem::transmute_copy(&flags) }; + + assert!(srcs[0].num_components() == 1); + + let size_B = srcs[0].bit_size() / 8; + + let access_type = match flags.access() { + NAK_ISBE_ACCESS_MAP => IsbeAccessType::Map, + NAK_ISBE_ACCESS_PATCH => { + panic!("PATCH access is invalid in ISBEWR") + } + NAK_ISBE_ACCESS_PRIM => { + panic!("PRIM access is invalid in ISBEWR") + } + NAK_ISBE_ACCESS_ATTR => IsbeAccessType::Attribute, + _ => panic!("Invalid ISBE access {}", flags.access()), + }; + + if matches!(access_type, IsbeAccessType::Attribute) + && !flags.per_primitive() + && !range.is_empty() + { + if let ShaderIoInfo::Vtg(io) = &mut self.info.io { + if flags.output() { + io.mark_store_req(range.clone()); + io.mark_attrs_written(range); + } else { + io.mark_attrs_read(range); + } + } else { + panic!("Must be a VTG stage"); + } + } + + b.push_op(OpIsbewr { + offset: self.get_src(&srcs[1]), + imm_offset: 0, + data: self.get_src(&srcs[0]), + output: flags.output(), + skew: flags.skew(), + mem_type: MemType::from_size(size_B, false), + access_type, + }); + } nir_intrinsic_vild_nv => { let dst = b.alloc_ssa(RegFile::GPR); let idx = self.get_src(&srcs[0]); diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index 0ab8d7cc28b..4c95178ac2c 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -7233,23 +7233,108 @@ impl DisplayOp for OpCS2R { } impl_display_for_op!(OpCS2R); +#[derive(Clone, Copy, Eq, Hash, PartialEq)] +pub enum IsbeAccessType { + Map, + Patch, + Primitive, + Attribute, +} + +impl fmt::Display for IsbeAccessType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + IsbeAccessType::Map => write!(f, "map"), + IsbeAccessType::Patch => write!(f, "patch"), + IsbeAccessType::Primitive => write!(f, "prim"), + IsbeAccessType::Attribute => write!(f, "attr"), + } + } +} + #[repr(C)] #[derive(SrcsAsSlice, DstsAsSlice)] pub struct OpIsberd { #[dst_type(GPR)] pub dst: Dst, - #[src_type(SSA)] - pub idx: Src, + #[src_type(GPR)] + pub offset: Src, + + pub imm_offset: u16, + pub mem_type: MemType, + pub access_type: IsbeAccessType, + pub output: bool, + pub skew: bool, } impl DisplayOp for OpIsberd { fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "isberd [{}]", self.idx) + write!(f, "isberd")?; + + if self.output { + write!(f, ".o")?; + } + + write!(f, ".{}", self.access_type)?; + + if self.skew { + write!(f, ".skew")?; + } + + write!(f, "{} {}", self.mem_type, self.dst)?; + + if self.imm_offset != 0 { + write!(f, " [{}+0x{:x}]", self.offset, self.imm_offset) + } else { + write!(f, " [{}]", self.offset) + } } } impl_display_for_op!(OpIsberd); +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpIsbewr { + #[src_type(GPR)] + pub offset: Src, + + #[src_type(GPR)] + pub data: Src, + + pub imm_offset: u16, + pub mem_type: MemType, + pub access_type: IsbeAccessType, + pub output: bool, + pub skew: bool, +} + +impl DisplayOp for OpIsbewr { + fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "isbewr")?; + + if self.output { + write!(f, ".o")?; + } + + write!(f, ".{}", self.access_type)?; + + if self.skew { + write!(f, ".skew")?; + } + + write!(f, "{}", self.mem_type)?; + if self.imm_offset != 0 { + write!(f, " [{}+0x{:x}]", self.offset, self.imm_offset)?; + } else { + write!(f, " [{}]", self.offset)?; + } + + write!(f, " {}", self.data) + } +} +impl_display_for_op!(OpIsbewr); + /// Vertex Index Load /// (Only available in Kepler) /// @@ -8094,6 +8179,7 @@ pub enum Op { TexDepBar(Box), CS2R(Box), Isberd(Box), + Isbewr(Box), ViLd(Box), Kill(Box), Nop(OpNop), @@ -8279,6 +8365,7 @@ impl Op { | Op::TexDepBar(_) | Op::CS2R(_) | Op::Isberd(_) + | Op::Isbewr(_) | Op::ViLd(_) | Op::Kill(_) | Op::PixLd(_) @@ -8461,6 +8548,7 @@ impl Op { | Op::TexDepBar(_) | Op::CS2R(_) | Op::Isberd(_) + | Op::Isbewr(_) | Op::ViLd(_) | Op::Kill(_) | Op::PixLd(_) @@ -8856,6 +8944,7 @@ impl Instr { | Op::RegOut(_) | Op::Out(_) | Op::OutFinal(_) + | Op::Isbewr(_) | Op::Annotate(_) => false, Op::BMov(op) => !op.clear, _ => true, @@ -9662,8 +9751,14 @@ impl IsbeSpaceSharingStateTracker { } pub fn visit_instr(&mut self, instr: &Instr) { - // Track attribute store. (XXX: ISBEWR) - self.has_attribute_store |= matches!(instr.op, Op::ASt(_)); + // Track attribute store. + match &instr.op { + Op::ASt(_) => self.has_attribute_store = true, + Op::Isbewr(op) if op.access_type == IsbeAccessType::Attribute => { + self.has_attribute_store = true; + } + _ => {} + } // Track attribute load. if matches!(instr.op, Op::ALd(_) | Op::Isberd(_)) { diff --git a/src/nouveau/compiler/nak/opt_instr_sched_common.rs b/src/nouveau/compiler/nak/opt_instr_sched_common.rs index d65cbc182c6..2d8be486f6d 100644 --- a/src/nouveau/compiler/nak/opt_instr_sched_common.rs +++ b/src/nouveau/compiler/nak/opt_instr_sched_common.rs @@ -213,6 +213,7 @@ pub fn side_effect_type(op: &Op) -> SideEffect { | Op::TexDepBar(_) | Op::CS2R(_) | Op::Isberd(_) + | Op::Isbewr(_) | Op::ViLd(_) | Op::Kill(_) | Op::S2R(_) => SideEffect::Barrier, @@ -329,6 +330,7 @@ pub fn estimate_variable_latency(sm: &ShaderModelInfo, op: &Op) -> u32 { | Op::TexDepBar(_) | Op::CS2R(_) | Op::Isberd(_) + | Op::Isbewr(_) | Op::ViLd(_) | Op::Kill(_) | Op::PixLd(_) diff --git a/src/nouveau/compiler/nak/sm120_instr_latencies.rs b/src/nouveau/compiler/nak/sm120_instr_latencies.rs index f8bc6569017..2a275ca596b 100644 --- a/src/nouveau/compiler/nak/sm120_instr_latencies.rs +++ b/src/nouveau/compiler/nak/sm120_instr_latencies.rs @@ -159,6 +159,7 @@ fn op_reg_latency(op: &Op, reader: bool, op_reg_idx: usize) -> RegLatencySM100 { Op::SuAtom(_) => Decoupled, Op::PixLd(_) => DecoupledAgu, Op::Isberd(_) => DecoupledAgu, + Op::Isbewr(_) => DecoupledAgu, Op::LdTram(_) => DecoupledAgu, Op::Shfl(_) => DecoupledAgu, Op::Ldsm(_) => DecoupledAgu, diff --git a/src/nouveau/compiler/nak/sm50.rs b/src/nouveau/compiler/nak/sm50.rs index f9aa923454d..14f51b65ba5 100644 --- a/src/nouveau/compiler/nak/sm50.rs +++ b/src/nouveau/compiler/nak/sm50.rs @@ -3153,9 +3153,13 @@ impl SM50Op for OpIsberd { } fn encode(&self, e: &mut SM50Encoder<'_>) { + assert!( + self.access_type == IsbeAccessType::Map && self.imm_offset == 0 + ); + e.set_opcode(0xefd0); e.set_dst(&self.dst); - e.set_reg_src(8..16, &self.idx); + e.set_reg_src(8..16, &self.offset); } } diff --git a/src/nouveau/compiler/nak/sm70_encode.rs b/src/nouveau/compiler/nak/sm70_encode.rs index c7fb4437f40..967a69c7e0e 100644 --- a/src/nouveau/compiler/nak/sm70_encode.rs +++ b/src/nouveau/compiler/nak/sm70_encode.rs @@ -3752,13 +3752,75 @@ impl SM70Op for OpCS2R { impl SM70Op for OpIsberd { fn legalize(&mut self, b: &mut LegalizeBuilder) { - b.copy_src_if_uniform(&mut self.idx); + b.copy_src_if_uniform(&mut self.offset); } fn encode(&self, e: &mut SM70Encoder<'_>) { + // The immediate offset is only supported on SM86+ + assert!(e.sm >= 86 || self.imm_offset == 0); + e.set_opcode(0x923); e.set_dst(&self.dst); - e.set_reg_src(24..32, &self.idx); + e.set_reg_src(24..32, &self.offset); + e.set_field(40..56, self.imm_offset); + e.set_field( + 74..76, + match self.mem_type { + MemType::U8 => 0_u8, + MemType::U16 => 1_u8, + MemType::B32 => 2_u8, + _ => panic!("Invalid ISBERD mem type"), + }, + ); + e.set_field( + 76..78, + match self.access_type { + IsbeAccessType::Map => 0_u8, + IsbeAccessType::Patch => 1_u8, + IsbeAccessType::Primitive => 2_u8, + IsbeAccessType::Attribute => 3_u8, + }, + ); + e.set_bit(78, self.skew); + e.set_bit(79, self.output); + } +} + +impl SM70Op for OpIsbewr { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + b.copy_src_if_uniform(&mut self.offset); + b.copy_src_if_uniform(&mut self.data); + } + + fn encode(&self, e: &mut SM70Encoder<'_>) { + assert!(e.sm >= 75); + + // The immediate offset is only supported on SM86+ + assert!(e.sm >= 86 || self.imm_offset == 0); + + e.set_opcode(0x927); + e.set_reg_src(24..32, &self.offset); + e.set_reg_src(32..40, &self.data); + e.set_field(40..56, self.imm_offset); + e.set_field( + 74..76, + match self.mem_type { + MemType::U8 => 0_u8, + MemType::U16 => 1_u8, + MemType::B32 => 2_u8, + _ => panic!("Invalid ISBEWR mem type"), + }, + ); + e.set_field( + 76..78, + match self.access_type { + IsbeAccessType::Map => 0_u8, + IsbeAccessType::Attribute => 3_u8, + _ => panic!("Invalid ISBEWR access type"), + }, + ); + e.set_bit(78, self.skew); + e.set_bit(79, self.output); } } @@ -4156,6 +4218,7 @@ macro_rules! sm70_op_match { Op::Bar($x) => $y, Op::CS2R($x) => $y, Op::Isberd($x) => $y, + Op::Isbewr($x) => $y, Op::Kill($x) => $y, Op::Nop($x) => $y, Op::PixLd($x) => $y, diff --git a/src/nouveau/compiler/nak/sm75_instr_latencies.rs b/src/nouveau/compiler/nak/sm75_instr_latencies.rs index f9b90b3b669..fd0f5dbbca3 100644 --- a/src/nouveau/compiler/nak/sm75_instr_latencies.rs +++ b/src/nouveau/compiler/nak/sm75_instr_latencies.rs @@ -194,6 +194,7 @@ impl RegLatencySM75 { Op::SuAtom(_) => Decoupled, Op::PixLd(_) => Decoupled, Op::Isberd(_) => Decoupled, + Op::Isbewr(_) => Decoupled, Op::LdTram(_) => Decoupled, Op::Shfl(_) => Decoupled, Op::Ldsm(_) => Decoupled, diff --git a/src/nouveau/compiler/nak/sm80_instr_latencies.rs b/src/nouveau/compiler/nak/sm80_instr_latencies.rs index d0a6d9ae46c..26a3449b1b6 100644 --- a/src/nouveau/compiler/nak/sm80_instr_latencies.rs +++ b/src/nouveau/compiler/nak/sm80_instr_latencies.rs @@ -243,6 +243,7 @@ impl RegLatencySM80 { Op::SuAtom(_) => Decoupled, Op::PixLd(_) => DecoupledAgu, Op::Isberd(_) => DecoupledAgu, + Op::Isbewr(_) => DecoupledAgu, Op::LdTram(_) => DecoupledAgu, Op::Shfl(_) => DecoupledAgu, Op::Ldsm(_) => DecoupledAgu, diff --git a/src/nouveau/compiler/nak_nir_lower_vtg_io.c b/src/nouveau/compiler/nak_nir_lower_vtg_io.c index 32e835fa35a..e48fe34aec8 100644 --- a/src/nouveau/compiler/nak_nir_lower_vtg_io.c +++ b/src/nouveau/compiler/nak_nir_lower_vtg_io.c @@ -146,7 +146,7 @@ lower_vtg_io_intrin(nir_builder *b, nir_def *idx = nir_iadd(b, nir_imul(b, lo, hi), vtx); const struct nak_nir_isbe_flags flags = { - .mode = NAK_ISBE_MODE_MAP, + .access = NAK_ISBE_ACCESS_MAP, .output = false, .skew = false, .per_primitive = false, diff --git a/src/nouveau/compiler/nak_private.h b/src/nouveau/compiler/nak_private.h index b312a218e1e..5fc66f98c9a 100644 --- a/src/nouveau/compiler/nak_private.h +++ b/src/nouveau/compiler/nak_private.h @@ -257,15 +257,15 @@ struct nak_nir_imadsp_flags { bool nak_nir_lower_vtg_io(nir_shader *nir, const struct nak_compiler *nak); -enum nak_isbe_mode { - NAK_ISBE_MODE_MAP, - NAK_ISBE_MODE_PATCH, - NAK_ISBE_MODE_PRIM, - NAK_ISBE_MODE_ATTR, +enum nak_isbe_access { + NAK_ISBE_ACCESS_MAP, + NAK_ISBE_ACCESS_PATCH, + NAK_ISBE_ACCESS_PRIM, + NAK_ISBE_ACCESS_ATTR, }; struct nak_nir_isbe_flags { - enum nak_isbe_mode mode : 2; + enum nak_isbe_access access : 2; bool output : 1; bool skew : 1; bool per_primitive : 1; From 11f57eba85904551d9bb1c707fd1b7c4312cc9e5 Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Tue, 10 Mar 2026 18:20:10 +0100 Subject: [PATCH 4/5] nak/nvdisasm_tests: Test ISBERD and ISBEWR Signed-off-by: Mary Guillemard Reviewed-by: Mel Henning --- src/nouveau/compiler/nak/nvdisasm_tests.rs | 127 +++++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/src/nouveau/compiler/nak/nvdisasm_tests.rs b/src/nouveau/compiler/nak/nvdisasm_tests.rs index 8d574383d7c..f703bebec83 100644 --- a/src/nouveau/compiler/nak/nvdisasm_tests.rs +++ b/src/nouveau/compiler/nak/nvdisasm_tests.rs @@ -882,3 +882,130 @@ pub fn test_plop3() { c.check(sm); } } + +#[test] +pub fn test_isberd() { + let r1 = RegRef::new(RegFile::GPR, 1, 1); + let r2 = RegRef::new(RegFile::GPR, 2, 1); + + let mem_types = [ + (MemType::U8, ""), + (MemType::U16, ".u16"), + (MemType::B32, ".32"), + ]; + + let output_type = [(false, ""), (true, ".o")]; + let skew_type = [(false, ""), (true, ".skew")]; + let access_type_list = [ + (IsbeAccessType::Map, ""), + (IsbeAccessType::Patch, ".patch"), + (IsbeAccessType::Primitive, ".prim"), + (IsbeAccessType::Attribute, ".attr"), + ]; + + for &sm in sm_list() { + if sm < 50 { + continue; + } + + let mut c = DisasmCheck::new(); + for (output, output_str) in output_type { + for (access_type, access_type_str) in access_type_list { + if access_type != IsbeAccessType::Map && sm < 75 { + continue; + } + + for (skew, skew_str) in skew_type { + if skew && sm < 75 { + continue; + } + + for (mem_type, mem_type_str) in mem_types { + if mem_type != MemType::U8 && sm < 75 { + continue; + } + + for imm_offset in [0, 0x42] { + if imm_offset != 0 && sm < 86 { + continue; + } + + let instr = OpIsberd { + dst: Dst::Reg(r1), + offset: SrcRef::Reg(r2).into(), + imm_offset, + mem_type, + access_type, + output, + skew, + }; + let disasm = if imm_offset != 0 { + format!("isberd{output_str}{access_type_str}{skew_str}{mem_type_str} r1, [r2+0x{imm_offset:x}];") + } else { + format!("isberd{output_str}{access_type_str}{skew_str}{mem_type_str} r1, [r2];") + }; + c.push(instr, disasm); + } + } + } + } + } + + c.check(sm); + } +} + +#[test] +pub fn test_isbewr() { + let r1 = RegRef::new(RegFile::GPR, 1, 1); + let r2 = RegRef::new(RegFile::GPR, 2, 1); + + let mem_types = [ + (MemType::U8, ""), + (MemType::U16, ".u16"), + (MemType::B32, ".32"), + ]; + + let skew_type = [(false, ""), (true, ".skew")]; + let access_type_list = [ + (IsbeAccessType::Map, ""), + (IsbeAccessType::Attribute, ".attr"), + ]; + + for &sm in sm_list() { + if sm < 75 { + continue; + } + + let mut c = DisasmCheck::new(); + for (access_type, access_type_str) in access_type_list { + for (skew, skew_str) in skew_type { + for (mem_type, mem_type_str) in mem_types { + for imm_offset in [0, 0x42] { + if imm_offset != 0 && sm < 86 { + continue; + } + + let instr = OpIsbewr { + offset: SrcRef::Reg(r2).into(), + data: SrcRef::Reg(r1).into(), + imm_offset, + mem_type, + access_type, + output: true, + skew, + }; + let disasm = if imm_offset != 0 { + format!("isbewr.o{access_type_str}{skew_str}{mem_type_str} [r2+0x{imm_offset:x}], r1;") + } else { + format!("isbewr.o{access_type_str}{skew_str}{mem_type_str} [r2], r1;") + }; + c.push(instr, disasm); + } + } + } + } + + c.check(sm); + } +} From 749d644ea3d846c68956f24725fe4676264c6fa9 Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Tue, 10 Mar 2026 15:30:23 +0100 Subject: [PATCH 5/5] nir, nvk, nak: Add base to isbewr_nv and isberd_nv On SM86+, we can use a 16-bit unsigned offset along side the register for it. This adds a new base indice that will be used for it, integration with nir_opt_offsets and a lowering pass to get ride of the base on unsupported generations. Signed-off-by: Mary Guillemard Reviewed-by: Mel Henning --- src/compiler/nir/nir_intrinsics.py | 4 ++-- src/compiler/nir/nir_opt_offsets.c | 4 ++++ src/nouveau/compiler/nak/from_nir.rs | 4 ++-- src/nouveau/compiler/nak_nir.c | 20 ++++++++++++++++++-- 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index d3e0fa6223a..618db60d340 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -2771,12 +2771,12 @@ intrinsic("load_sysval_nv", dest_comp=1, src_comp=[], bit_sizes=[32, 64], # src[] = { offset }. # FLAGS is struct nak_nir_isbe_flags intrinsic("isberd_nv", dest_comp=1, src_comp=[1], bit_sizes=[8, 16, 32], - indices=[RANGE_BASE, RANGE, FLAGS, ACCESS], + indices=[BASE, RANGE_BASE, RANGE, FLAGS, ACCESS], flags=[CAN_ELIMINATE]) # src[] = { data, offset }. # FLAGS is struct nak_nir_isbe_flags intrinsic("isbewr_nv", src_comp=[0, 1], - indices=[RANGE_BASE, RANGE, FLAGS, ACCESS], flags=[]) + indices=[BASE, RANGE_BASE, RANGE, FLAGS, ACCESS], flags=[]) intrinsic("vild_nv", dest_comp=1, src_comp=[1], bit_sizes=[32], flags=[CAN_ELIMINATE, CAN_REORDER], indices=[BASE]) diff --git a/src/compiler/nir/nir_opt_offsets.c b/src/compiler/nir/nir_opt_offsets.c index 06eedf92058..5e53ac297c2 100644 --- a/src/compiler/nir/nir_opt_offsets.c +++ b/src/compiler/nir/nir_opt_offsets.c @@ -383,6 +383,10 @@ process_instr(nir_builder *b, nir_instr *instr, void *s) /* Always signed offset */ case nir_intrinsic_cmat_load_shared_nv: return try_fold_load_store(b, intrin, state, 0, -8388608, 0x7fffff, false); + case nir_intrinsic_isberd_nv: + return try_fold_load_store(b, intrin, state, 0, 0, get_max(state, intrin, 0), false); + case nir_intrinsic_isbewr_nv: + return try_fold_load_store(b, intrin, state, 1, 0, get_max(state, intrin, 0), false); default: return false; } diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index 1deab68d783..a19f27b4046 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -3072,7 +3072,7 @@ impl<'a> ShaderFromNir<'a> { b.push_op(OpIsberd { dst: dst.into(), offset: self.get_src(&srcs[0]), - imm_offset: 0, + imm_offset: u16::try_from(intrin.base()).unwrap(), output: flags.output(), skew: flags.skew(), mem_type: MemType::from_size(size_B, false), @@ -3123,7 +3123,7 @@ impl<'a> ShaderFromNir<'a> { b.push_op(OpIsbewr { offset: self.get_src(&srcs[1]), - imm_offset: 0, + imm_offset: u16::try_from(intrin.base()).unwrap(), data: self.get_src(&srcs[0]), output: flags.output(), skew: flags.skew(), diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index 16e326867d7..5a48815468d 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -1148,8 +1148,19 @@ nak_nir_opt_offset_shift_nv(nir_shader *nir, const struct nak_compiler *nak) ); } -const static struct nir_opt_offsets_options nak_offset_options = { -}; +static uint32_t +nak_nir_max_imm_offset(nir_intrinsic_instr *intrin, const void *data) +{ + const struct nak_compiler *nak = data; + + switch (intrin->intrinsic) { + case nir_intrinsic_isberd_nv: + case nir_intrinsic_isbewr_nv: + return nak->sm >= 86 ? UINT16_MAX : 0; + default: + return 0; + } +} void nak_postprocess_nir(nir_shader *nir, @@ -1289,6 +1300,11 @@ nak_postprocess_nir(nir_shader *nir, } OPT(nir, nak_nir_lower_load_store, nak); + + struct nir_opt_offsets_options nak_offset_options = { + .max_offset_cb = nak_nir_max_imm_offset, + .cb_data = nak, + }; OPT(nir, nir_opt_offsets, &nak_offset_options); /* Should run after nir_opt_offsets, because nir_opt_algebraic will move