nak: Add a new TexDerivMode enum and plumb it through

For most generations, this is just plumbing through a false bit.  But on
Blackwell, we need to set .dxy at least in compute shaders.

Also, we had a bunch of .NDV comments on OpTxd but it has never existed
there on ahy hardware generation, it's just a left-over from trying to
copy+paste from codegen.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35795>
This commit is contained in:
Faith Ekstrand 2025-06-27 10:22:50 -04:00 committed by Marge Bot
parent c6ad70551b
commit 4ac4bd62c8
7 changed files with 146 additions and 10 deletions

View file

@ -1877,6 +1877,17 @@ impl<'a> ShaderFromNir<'a> {
_ => panic!("Invalid LOD mode"),
};
// Starting with Blackwell B, the shader stage check for derivatives
// is back to defaulting to disabled on compute and instead we have
// a new derivative mode to re-enable it. If tex_lod_mode == Zero,
// there is no implicit derivative so this doesn't matter.
let deriv_mode =
if self.sm.sm() >= 120 && lod_mode != TexLodMode::Zero {
TexDerivMode::DerivXY
} else {
TexDerivMode::Auto
};
let offset_mode = match flags.offset_mode() {
NAK_NIR_OFFSET_MODE_NONE => TexOffsetMode::None,
NAK_NIR_OFFSET_MODE_AOFFI => TexOffsetMode::AddOffI,
@ -1908,12 +1919,14 @@ impl<'a> ShaderFromNir<'a> {
channel_mask,
});
} else if tex.op == nir_texop_lod {
assert!(lod_mode == TexLodMode::Auto);
assert!(offset_mode == TexOffsetMode::None);
b.push_op(OpTmml {
dsts: dsts,
tex: tex_ref,
srcs: srcs,
dim: dim,
deriv_mode,
nodep: flags.nodep(),
channel_mask,
});
@ -1955,6 +1968,7 @@ impl<'a> ShaderFromNir<'a> {
srcs: srcs,
dim: dim,
lod_mode: lod_mode,
deriv_mode,
z_cmpr: flags.has_z_cmpr(),
offset_mode,
mem_eviction_priority: MemEvictionPriority::Normal,

View file

@ -1940,6 +1940,55 @@ impl fmt::Display for TexLodMode {
}
}
/// Derivative behavior for tex ops and FSwzAdd
///
/// The descriptions here may not be wholly accurate as they come from cobbling
/// together a bunch of pieces. This is my (Faith's) best understanding of how
/// these things work.
#[allow(dead_code)]
#[derive(Clone, Copy, Eq, PartialEq)]
pub enum TexDerivMode {
/// Automatic
///
/// For partial (not full) quads, the derivative will default to the value
/// of DEFAULT_PARTIAL in SET_SHADER_CONTROL.
///
/// On Volta and earlier GPUs or on Blackwell B and later, derivatives in
/// all non-fragment shaders stages are assumed to be partial.
Auto,
/// Assume a non-divergent (full) derivative
///
/// Partial derivative checks are skipped and the hardware does the
/// derivative anyway, possibly on rubbish data.
NonDivergent,
/// Force the derivative to be considered divergent (partial)
///
/// This only exists as a separate thing on Blackwell A. On Hopper and
/// earlier, there is a .fdv that's part of the LodMode, but only for
/// LodMode::Clamp. On Blackwell B, it appears (according to the
/// disassembler) to be removed again in favor of DerivXY.
ForceDivergent,
/// Attempt an X/Y derivative, ignoring shader stage
///
/// This is (I think) identical to Auto except that it ignores the shader
/// stage checks. This is new on Blackwell B+.
DerivXY,
}
impl fmt::Display for TexDerivMode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
TexDerivMode::Auto => Ok(()),
TexDerivMode::NonDivergent => write!(f, ".ndv"),
TexDerivMode::ForceDivergent => write!(f, ".fdv"),
TexDerivMode::DerivXY => write!(f, ".dxy"),
}
}
}
#[derive(Clone, Copy, Eq, PartialEq)]
pub struct ChannelMask(u8);
@ -5113,6 +5162,7 @@ pub struct OpTex {
pub dim: TexDim,
pub lod_mode: TexLodMode,
pub deriv_mode: TexDerivMode,
pub z_cmpr: bool,
pub offset_mode: TexOffsetMode,
pub mem_eviction_priority: MemEvictionPriority,
@ -5122,7 +5172,11 @@ pub struct OpTex {
impl DisplayOp for OpTex {
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "tex{}{}{}", self.dim, self.lod_mode, self.offset_mode)?;
write!(
f,
"tex{}{}{}{}",
self.dim, self.lod_mode, self.offset_mode, self.deriv_mode
)?;
if self.z_cmpr {
write!(f, ".dc")?;
}
@ -5219,13 +5273,14 @@ pub struct OpTmml {
pub srcs: [Src; 2],
pub dim: TexDim,
pub deriv_mode: TexDerivMode,
pub nodep: bool,
pub channel_mask: ChannelMask,
}
impl DisplayOp for OpTmml {
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "tmml.lod{}", self.dim)?;
write!(f, "tmml.lod{}{}", self.dim, self.deriv_mode)?;
if self.nodep {
write!(f, ".nodep")?;
}

View file

@ -453,6 +453,7 @@ pub fn test_texture() {
dim: TexDim::_2D,
lod_mode,
deriv_mode: TexDerivMode::Auto,
z_cmpr: false,
offset_mode: TexOffsetMode::None,
mem_eviction_priority: MemEvictionPriority::First,
@ -527,6 +528,7 @@ pub fn test_texture() {
srcs: [SrcRef::Reg(r1).into(), SrcRef::Reg(r3).into()],
dim: TexDim::_2D,
deriv_mode: TexDerivMode::Auto,
nodep: true,
channel_mask: ChannelMask::for_comps(3),
};

View file

@ -1669,6 +1669,15 @@ impl SM20Encoder<'_> {
);
}
fn set_tex_ndv(&mut self, bit: usize, deriv_mode: TexDerivMode) {
let ndv = match deriv_mode {
TexDerivMode::Auto => false,
TexDerivMode::NonDivergent => true,
_ => panic!("{deriv_mode} is not supported"),
};
self.set_bit(bit, ndv);
}
fn set_tex_channel_mask(
&mut self,
range: Range<usize>,
@ -1719,6 +1728,7 @@ impl SM20Op for OpTex {
assert!(self.fault.is_none());
e.set_reg_src(20..26, &self.srcs[0]);
e.set_reg_src(26..32, &self.srcs[1]);
e.set_tex_ndv(45, self.deriv_mode);
e.set_tex_channel_mask(46..50, self.channel_mask);
e.set_tex_dim(51..54, self.dim);
e.set_bit(54, self.offset_mode == TexOffsetMode::AddOffI);
@ -1848,6 +1858,7 @@ impl SM20Op for OpTmml {
assert!(self.dsts[1].is_none());
e.set_reg_src(20..26, &self.srcs[0]);
e.set_reg_src(26..32, &self.srcs[1]);
e.set_tex_ndv(45, self.deriv_mode);
e.set_tex_channel_mask(46..50, self.channel_mask);
e.set_tex_dim(51..54, self.dim);
}

View file

@ -1939,6 +1939,15 @@ impl SM32Encoder<'_> {
},
);
}
fn set_tex_ndv(&mut self, bit: usize, deriv_mode: TexDerivMode) {
let ndv = match deriv_mode {
TexDerivMode::Auto => false,
TexDerivMode::NonDivergent => true,
_ => panic!("{deriv_mode} is not supported"),
};
self.set_bit(bit, ndv);
}
}
/// Helper to legalize texture instructions
@ -1986,7 +1995,7 @@ impl SM32Op for OpTex {
e.set_field(34..38, self.channel_mask.to_bits());
e.set_tex_dim(38..41, self.dim);
e.set_bit(41, false); // ToDo: NDV
e.set_tex_ndv(41, self.deriv_mode);
e.set_bit(42, self.z_cmpr);
e.set_bit(43, self.offset_mode == TexOffsetMode::AddOffI);
e.set_tex_lod_mode(44..47, self.lod_mode);
@ -2113,6 +2122,7 @@ impl SM32Op for OpTmml {
e.set_field(34..38, self.channel_mask.to_bits());
e.set_tex_dim(38..41, self.dim);
e.set_tex_ndv(41, self.deriv_mode);
}
}
@ -2149,7 +2159,6 @@ impl SM32Op for OpTxd {
e.set_field(34..38, self.channel_mask.to_bits());
e.set_tex_dim(38..41, self.dim);
e.set_bit(41, false); // ToDo: NDV
e.set_bit(54, self.offset_mode == TexOffsetMode::AddOffI);
}
}

View file

@ -2121,6 +2121,15 @@ impl SM50Encoder<'_> {
);
}
fn set_tex_ndv(&mut self, bit: usize, deriv_mode: TexDerivMode) {
let ndv = match deriv_mode {
TexDerivMode::Auto => false,
TexDerivMode::NonDivergent => true,
_ => panic!("{deriv_mode} is not supported"),
};
self.set_bit(bit, ndv);
}
fn set_tex_channel_mask(
&mut self,
range: Range<usize>,
@ -2172,7 +2181,7 @@ impl SM50Op for OpTex {
e.set_tex_dim(28..31, self.dim);
e.set_tex_channel_mask(31..35, self.channel_mask);
e.set_bit(35, false); // ToDo: NDV
e.set_tex_ndv(35, self.deriv_mode);
e.set_bit(49, self.nodep);
e.set_bit(50, self.z_cmpr);
}
@ -2285,7 +2294,7 @@ impl SM50Op for OpTmml {
e.set_tex_dim(28..31, self.dim);
e.set_tex_channel_mask(31..35, self.channel_mask);
e.set_bit(35, false); // ToDo: NDV
e.set_tex_ndv(35, self.deriv_mode);
e.set_bit(49, self.nodep);
}
}

View file

@ -2352,6 +2352,39 @@ impl SM70Encoder<'_> {
);
}
fn set_tex_ndv(&mut self, bit: usize, deriv_mode: TexDerivMode) {
let ndv = match deriv_mode {
TexDerivMode::Auto => false,
TexDerivMode::NonDivergent => true,
_ => panic!("{deriv_mode} is not supported"),
};
self.set_bit(bit, ndv);
}
fn set_tex_deriv_mode(
&mut self,
range: Range<usize>,
deriv_mode: TexDerivMode,
) {
assert!(range.len() == 2);
assert!(self.sm >= 100);
self.set_field(
range,
match deriv_mode {
TexDerivMode::Auto => 0_u8,
TexDerivMode::NonDivergent => 1_u8,
TexDerivMode::ForceDivergent => {
assert!(self.sm >= 100 && self.sm < 110);
2_u8
}
TexDerivMode::DerivXY => {
assert!(self.sm >= 120);
3_u8
}
},
);
}
fn set_image_dim(&mut self, range: Range<usize>, dim: ImageDim) {
assert!(range.len() == 3);
self.set_field(
@ -2450,7 +2483,6 @@ impl SM70Op for OpTex {
e.set_tex_dim(61..64, self.dim);
e.set_tex_channel_mask(72..76, self.channel_mask);
if e.sm >= 100 {
e.set_field(76..78, 3_u8);
e.set_field(
56..58,
match self.offset_mode {
@ -2459,9 +2491,10 @@ impl SM70Op for OpTex {
TexOffsetMode::PerPx => panic!("Illegal offset value"),
},
);
e.set_tex_deriv_mode(76..78, self.deriv_mode);
} else {
e.set_bit(76, self.offset_mode == TexOffsetMode::AddOffI);
e.set_bit(77, false); // ToDo: NDV
e.set_tex_ndv(77, self.deriv_mode);
}
e.set_bit(78, self.z_cmpr);
e.set_eviction_priority(&self.mem_eviction_priority);
@ -2644,7 +2677,11 @@ impl SM70Op for OpTmml {
e.set_tex_dim(61..64, self.dim);
e.set_tex_channel_mask(72..76, self.channel_mask);
e.set_bit(77, false); // ToDo: NDV
if e.sm >= 100 {
e.set_tex_deriv_mode(76..78, self.deriv_mode);
} else {
e.set_tex_ndv(77, self.deriv_mode);
}
e.set_bit(90, self.nodep);
}
}
@ -2705,7 +2742,6 @@ impl SM70Op for OpTxd {
e.set_tex_dim(61..64, self.dim);
e.set_tex_channel_mask(72..76, self.channel_mask);
e.set_bit(77, false); // ToDo: NDV
e.set_eviction_priority(&self.mem_eviction_priority);
e.set_bit(90, self.nodep);
}