From 4ac4bd62c868f9dc662a4b3cbee7eaa03fedfc5d Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Fri, 27 Jun 2025 10:22:50 -0400 Subject: [PATCH] nak: Add a new TexDerivMode enum and plumb it through For most generations, this is just plumbing through a false bit. But on Blackwell, we need to set .dxy at least in compute shaders. Also, we had a bunch of .NDV comments on OpTxd but it has never existed there on ahy hardware generation, it's just a left-over from trying to copy+paste from codegen. Part-of: --- src/nouveau/compiler/nak/from_nir.rs | 14 +++++ src/nouveau/compiler/nak/ir.rs | 59 +++++++++++++++++++++- src/nouveau/compiler/nak/nvdisasm_tests.rs | 2 + src/nouveau/compiler/nak/sm20.rs | 11 ++++ src/nouveau/compiler/nak/sm32.rs | 13 ++++- src/nouveau/compiler/nak/sm50.rs | 13 ++++- src/nouveau/compiler/nak/sm70_encode.rs | 44 ++++++++++++++-- 7 files changed, 146 insertions(+), 10 deletions(-) diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index 8c7c78c57ce..e25cdfdde89 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -1877,6 +1877,17 @@ impl<'a> ShaderFromNir<'a> { _ => panic!("Invalid LOD mode"), }; + // Starting with Blackwell B, the shader stage check for derivatives + // is back to defaulting to disabled on compute and instead we have + // a new derivative mode to re-enable it. If tex_lod_mode == Zero, + // there is no implicit derivative so this doesn't matter. + let deriv_mode = + if self.sm.sm() >= 120 && lod_mode != TexLodMode::Zero { + TexDerivMode::DerivXY + } else { + TexDerivMode::Auto + }; + let offset_mode = match flags.offset_mode() { NAK_NIR_OFFSET_MODE_NONE => TexOffsetMode::None, NAK_NIR_OFFSET_MODE_AOFFI => TexOffsetMode::AddOffI, @@ -1908,12 +1919,14 @@ impl<'a> ShaderFromNir<'a> { channel_mask, }); } else if tex.op == nir_texop_lod { + assert!(lod_mode == TexLodMode::Auto); assert!(offset_mode == TexOffsetMode::None); b.push_op(OpTmml { dsts: dsts, tex: tex_ref, srcs: srcs, dim: dim, + deriv_mode, nodep: flags.nodep(), channel_mask, }); @@ -1955,6 +1968,7 @@ impl<'a> ShaderFromNir<'a> { srcs: srcs, dim: dim, lod_mode: lod_mode, + deriv_mode, z_cmpr: flags.has_z_cmpr(), offset_mode, mem_eviction_priority: MemEvictionPriority::Normal, diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index 4b82652c874..6aa0caa5bf7 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -1940,6 +1940,55 @@ impl fmt::Display for TexLodMode { } } +/// Derivative behavior for tex ops and FSwzAdd +/// +/// The descriptions here may not be wholly accurate as they come from cobbling +/// together a bunch of pieces. This is my (Faith's) best understanding of how +/// these things work. +#[allow(dead_code)] +#[derive(Clone, Copy, Eq, PartialEq)] +pub enum TexDerivMode { + /// Automatic + /// + /// For partial (not full) quads, the derivative will default to the value + /// of DEFAULT_PARTIAL in SET_SHADER_CONTROL. + /// + /// On Volta and earlier GPUs or on Blackwell B and later, derivatives in + /// all non-fragment shaders stages are assumed to be partial. + Auto, + + /// Assume a non-divergent (full) derivative + /// + /// Partial derivative checks are skipped and the hardware does the + /// derivative anyway, possibly on rubbish data. + NonDivergent, + + /// Force the derivative to be considered divergent (partial) + /// + /// This only exists as a separate thing on Blackwell A. On Hopper and + /// earlier, there is a .fdv that's part of the LodMode, but only for + /// LodMode::Clamp. On Blackwell B, it appears (according to the + /// disassembler) to be removed again in favor of DerivXY. + ForceDivergent, + + /// Attempt an X/Y derivative, ignoring shader stage + /// + /// This is (I think) identical to Auto except that it ignores the shader + /// stage checks. This is new on Blackwell B+. + DerivXY, +} + +impl fmt::Display for TexDerivMode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TexDerivMode::Auto => Ok(()), + TexDerivMode::NonDivergent => write!(f, ".ndv"), + TexDerivMode::ForceDivergent => write!(f, ".fdv"), + TexDerivMode::DerivXY => write!(f, ".dxy"), + } + } +} + #[derive(Clone, Copy, Eq, PartialEq)] pub struct ChannelMask(u8); @@ -5113,6 +5162,7 @@ pub struct OpTex { pub dim: TexDim, pub lod_mode: TexLodMode, + pub deriv_mode: TexDerivMode, pub z_cmpr: bool, pub offset_mode: TexOffsetMode, pub mem_eviction_priority: MemEvictionPriority, @@ -5122,7 +5172,11 @@ pub struct OpTex { impl DisplayOp for OpTex { fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "tex{}{}{}", self.dim, self.lod_mode, self.offset_mode)?; + write!( + f, + "tex{}{}{}{}", + self.dim, self.lod_mode, self.offset_mode, self.deriv_mode + )?; if self.z_cmpr { write!(f, ".dc")?; } @@ -5219,13 +5273,14 @@ pub struct OpTmml { pub srcs: [Src; 2], pub dim: TexDim, + pub deriv_mode: TexDerivMode, pub nodep: bool, pub channel_mask: ChannelMask, } impl DisplayOp for OpTmml { fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "tmml.lod{}", self.dim)?; + write!(f, "tmml.lod{}{}", self.dim, self.deriv_mode)?; if self.nodep { write!(f, ".nodep")?; } diff --git a/src/nouveau/compiler/nak/nvdisasm_tests.rs b/src/nouveau/compiler/nak/nvdisasm_tests.rs index e42694444c7..866b45cd7e8 100644 --- a/src/nouveau/compiler/nak/nvdisasm_tests.rs +++ b/src/nouveau/compiler/nak/nvdisasm_tests.rs @@ -453,6 +453,7 @@ pub fn test_texture() { dim: TexDim::_2D, lod_mode, + deriv_mode: TexDerivMode::Auto, z_cmpr: false, offset_mode: TexOffsetMode::None, mem_eviction_priority: MemEvictionPriority::First, @@ -527,6 +528,7 @@ pub fn test_texture() { srcs: [SrcRef::Reg(r1).into(), SrcRef::Reg(r3).into()], dim: TexDim::_2D, + deriv_mode: TexDerivMode::Auto, nodep: true, channel_mask: ChannelMask::for_comps(3), }; diff --git a/src/nouveau/compiler/nak/sm20.rs b/src/nouveau/compiler/nak/sm20.rs index 594254d6d97..354740171c7 100644 --- a/src/nouveau/compiler/nak/sm20.rs +++ b/src/nouveau/compiler/nak/sm20.rs @@ -1669,6 +1669,15 @@ impl SM20Encoder<'_> { ); } + fn set_tex_ndv(&mut self, bit: usize, deriv_mode: TexDerivMode) { + let ndv = match deriv_mode { + TexDerivMode::Auto => false, + TexDerivMode::NonDivergent => true, + _ => panic!("{deriv_mode} is not supported"), + }; + self.set_bit(bit, ndv); + } + fn set_tex_channel_mask( &mut self, range: Range, @@ -1719,6 +1728,7 @@ impl SM20Op for OpTex { assert!(self.fault.is_none()); e.set_reg_src(20..26, &self.srcs[0]); e.set_reg_src(26..32, &self.srcs[1]); + e.set_tex_ndv(45, self.deriv_mode); e.set_tex_channel_mask(46..50, self.channel_mask); e.set_tex_dim(51..54, self.dim); e.set_bit(54, self.offset_mode == TexOffsetMode::AddOffI); @@ -1848,6 +1858,7 @@ impl SM20Op for OpTmml { assert!(self.dsts[1].is_none()); e.set_reg_src(20..26, &self.srcs[0]); e.set_reg_src(26..32, &self.srcs[1]); + e.set_tex_ndv(45, self.deriv_mode); e.set_tex_channel_mask(46..50, self.channel_mask); e.set_tex_dim(51..54, self.dim); } diff --git a/src/nouveau/compiler/nak/sm32.rs b/src/nouveau/compiler/nak/sm32.rs index 4fb92b656af..12cec8a5fba 100644 --- a/src/nouveau/compiler/nak/sm32.rs +++ b/src/nouveau/compiler/nak/sm32.rs @@ -1939,6 +1939,15 @@ impl SM32Encoder<'_> { }, ); } + + fn set_tex_ndv(&mut self, bit: usize, deriv_mode: TexDerivMode) { + let ndv = match deriv_mode { + TexDerivMode::Auto => false, + TexDerivMode::NonDivergent => true, + _ => panic!("{deriv_mode} is not supported"), + }; + self.set_bit(bit, ndv); + } } /// Helper to legalize texture instructions @@ -1986,7 +1995,7 @@ impl SM32Op for OpTex { e.set_field(34..38, self.channel_mask.to_bits()); e.set_tex_dim(38..41, self.dim); - e.set_bit(41, false); // ToDo: NDV + e.set_tex_ndv(41, self.deriv_mode); e.set_bit(42, self.z_cmpr); e.set_bit(43, self.offset_mode == TexOffsetMode::AddOffI); e.set_tex_lod_mode(44..47, self.lod_mode); @@ -2113,6 +2122,7 @@ impl SM32Op for OpTmml { e.set_field(34..38, self.channel_mask.to_bits()); e.set_tex_dim(38..41, self.dim); + e.set_tex_ndv(41, self.deriv_mode); } } @@ -2149,7 +2159,6 @@ impl SM32Op for OpTxd { e.set_field(34..38, self.channel_mask.to_bits()); e.set_tex_dim(38..41, self.dim); - e.set_bit(41, false); // ToDo: NDV e.set_bit(54, self.offset_mode == TexOffsetMode::AddOffI); } } diff --git a/src/nouveau/compiler/nak/sm50.rs b/src/nouveau/compiler/nak/sm50.rs index b884bb7bdd3..7274b008074 100644 --- a/src/nouveau/compiler/nak/sm50.rs +++ b/src/nouveau/compiler/nak/sm50.rs @@ -2121,6 +2121,15 @@ impl SM50Encoder<'_> { ); } + fn set_tex_ndv(&mut self, bit: usize, deriv_mode: TexDerivMode) { + let ndv = match deriv_mode { + TexDerivMode::Auto => false, + TexDerivMode::NonDivergent => true, + _ => panic!("{deriv_mode} is not supported"), + }; + self.set_bit(bit, ndv); + } + fn set_tex_channel_mask( &mut self, range: Range, @@ -2172,7 +2181,7 @@ impl SM50Op for OpTex { e.set_tex_dim(28..31, self.dim); e.set_tex_channel_mask(31..35, self.channel_mask); - e.set_bit(35, false); // ToDo: NDV + e.set_tex_ndv(35, self.deriv_mode); e.set_bit(49, self.nodep); e.set_bit(50, self.z_cmpr); } @@ -2285,7 +2294,7 @@ impl SM50Op for OpTmml { e.set_tex_dim(28..31, self.dim); e.set_tex_channel_mask(31..35, self.channel_mask); - e.set_bit(35, false); // ToDo: NDV + e.set_tex_ndv(35, self.deriv_mode); e.set_bit(49, self.nodep); } } diff --git a/src/nouveau/compiler/nak/sm70_encode.rs b/src/nouveau/compiler/nak/sm70_encode.rs index ade429149e4..61f5855f83b 100644 --- a/src/nouveau/compiler/nak/sm70_encode.rs +++ b/src/nouveau/compiler/nak/sm70_encode.rs @@ -2352,6 +2352,39 @@ impl SM70Encoder<'_> { ); } + fn set_tex_ndv(&mut self, bit: usize, deriv_mode: TexDerivMode) { + let ndv = match deriv_mode { + TexDerivMode::Auto => false, + TexDerivMode::NonDivergent => true, + _ => panic!("{deriv_mode} is not supported"), + }; + self.set_bit(bit, ndv); + } + + fn set_tex_deriv_mode( + &mut self, + range: Range, + deriv_mode: TexDerivMode, + ) { + assert!(range.len() == 2); + assert!(self.sm >= 100); + self.set_field( + range, + match deriv_mode { + TexDerivMode::Auto => 0_u8, + TexDerivMode::NonDivergent => 1_u8, + TexDerivMode::ForceDivergent => { + assert!(self.sm >= 100 && self.sm < 110); + 2_u8 + } + TexDerivMode::DerivXY => { + assert!(self.sm >= 120); + 3_u8 + } + }, + ); + } + fn set_image_dim(&mut self, range: Range, dim: ImageDim) { assert!(range.len() == 3); self.set_field( @@ -2450,7 +2483,6 @@ impl SM70Op for OpTex { e.set_tex_dim(61..64, self.dim); e.set_tex_channel_mask(72..76, self.channel_mask); if e.sm >= 100 { - e.set_field(76..78, 3_u8); e.set_field( 56..58, match self.offset_mode { @@ -2459,9 +2491,10 @@ impl SM70Op for OpTex { TexOffsetMode::PerPx => panic!("Illegal offset value"), }, ); + e.set_tex_deriv_mode(76..78, self.deriv_mode); } else { e.set_bit(76, self.offset_mode == TexOffsetMode::AddOffI); - e.set_bit(77, false); // ToDo: NDV + e.set_tex_ndv(77, self.deriv_mode); } e.set_bit(78, self.z_cmpr); e.set_eviction_priority(&self.mem_eviction_priority); @@ -2644,7 +2677,11 @@ impl SM70Op for OpTmml { e.set_tex_dim(61..64, self.dim); e.set_tex_channel_mask(72..76, self.channel_mask); - e.set_bit(77, false); // ToDo: NDV + if e.sm >= 100 { + e.set_tex_deriv_mode(76..78, self.deriv_mode); + } else { + e.set_tex_ndv(77, self.deriv_mode); + } e.set_bit(90, self.nodep); } } @@ -2705,7 +2742,6 @@ impl SM70Op for OpTxd { e.set_tex_dim(61..64, self.dim); e.set_tex_channel_mask(72..76, self.channel_mask); - e.set_bit(77, false); // ToDo: NDV e.set_eviction_priority(&self.mem_eviction_priority); e.set_bit(90, self.nodep); }