From 072ea76a471434d18b9eea166e79dc044c823290 Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Fri, 28 Nov 2025 23:15:17 +0100 Subject: [PATCH] nvk: Implement ISBE space sharing on vertex stage When a vertex shader does not load any input attributes after any store of output attributes, we can enable bit 25 of SPH ("ISBE space sharing") Effectively this seems to allow input and output attributes to live in the same allocated space in ISBE and could improve occupancy. Found while researching geometry passthrough and mesh shaders. Signed-off-by: Mary Guillemard Reviewed-by: Mel Henning Part-of: --- src/nouveau/compiler/nak/api.rs | 2 +- src/nouveau/compiler/nak/from_nir.rs | 4 +- src/nouveau/compiler/nak/ir.rs | 110 ++++++++++++++++++++++++++- src/nouveau/compiler/nak/sph.rs | 14 +++- 4 files changed, 126 insertions(+), 4 deletions(-) diff --git a/src/nouveau/compiler/nak/api.rs b/src/nouveau/compiler/nak/api.rs index 74e2f9cbe17..53b9a2ec1fe 100644 --- a/src/nouveau/compiler/nak/api.rs +++ b/src/nouveau/compiler/nak/api.rs @@ -252,7 +252,7 @@ impl ShaderBin { let c_info = nak_shader_info { stage: match info.stage { ShaderStageInfo::Compute(_) => MESA_SHADER_COMPUTE, - ShaderStageInfo::Vertex => MESA_SHADER_VERTEX, + ShaderStageInfo::Vertex(_) => MESA_SHADER_VERTEX, ShaderStageInfo::Fragment(_) => MESA_SHADER_FRAGMENT, ShaderStageInfo::Geometry(_) => MESA_SHADER_GEOMETRY, ShaderStageInfo::TessellationInit(_) => MESA_SHADER_TESS_CTRL, diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index b808335a537..0ea053e8c02 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -47,7 +47,9 @@ fn init_info_from_nir(nak: &nak_compiler, nir: &nir_shader) -> ShaderInfo { smem_size: nir.info.shared_size.try_into().unwrap(), }) } - MESA_SHADER_VERTEX => ShaderStageInfo::Vertex, + MESA_SHADER_VERTEX => ShaderStageInfo::Vertex(VertexShaderInfo { + isbe_space_sharing_enable: false, + }), MESA_SHADER_FRAGMENT => { let info_fs = unsafe { &nir.info.__bindgen_anon_1.fs }; ShaderStageInfo::Fragment(FragmentShaderInfo { diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index f8aed97b6be..d949e280716 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -13,6 +13,7 @@ use crate::sph::{OutputTopology, PixelImap}; pub use crate::ssa_value::*; use compiler::as_slice::*; use compiler::cfg::CFG; +use compiler::dataflow::ForwardDataflow; use compiler::smallvec::SmallVec; use nak_ir_proc::*; use std::cmp::{max, min}; @@ -8900,6 +8901,11 @@ pub struct ComputeShaderInfo { pub smem_size: u16, } +#[derive(Debug)] +pub struct VertexShaderInfo { + pub isbe_space_sharing_enable: bool, +} + #[derive(Debug)] pub struct FragmentShaderInfo { pub uses_kill: bool, @@ -8971,7 +8977,7 @@ pub struct TessellationShaderInfo { #[derive(Debug)] pub enum ShaderStageInfo { Compute(ComputeShaderInfo), - Vertex, + Vertex(VertexShaderInfo), Fragment(FragmentShaderInfo), Geometry(GeometryShaderInfo), TessellationInit(TessellationInitShaderInfo), @@ -9315,6 +9321,98 @@ pub struct Shader<'a> { pub functions: Vec, } +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +struct IsbeSpaceSharingStateTracker { + has_attribute_store: bool, + has_attribute_load: bool, + can_overlap_io: bool, +} + +impl IsbeSpaceSharingStateTracker { + pub const fn new() -> Self { + Self { + has_attribute_store: false, + has_attribute_load: false, + can_overlap_io: true, + } + } + + pub fn visit_instr(&mut self, instr: &Instr) { + // Track attribute store. (XXX: ISBEWR) + self.has_attribute_store |= matches!(instr.op, Op::ASt(_)); + + // Track attribute load. + if matches!(instr.op, Op::ALd(_) | Op::Isberd(_)) { + self.has_attribute_load = true; + + // If we have any attribute load after an attribute store, + // we cannot overlap IO. + if self.has_attribute_store { + self.can_overlap_io = false; + } + } + } + + fn merge(&mut self, other: &Self) { + // Propagate details on attribute store and overlap IO. + self.has_attribute_store |= other.has_attribute_store; + self.can_overlap_io &= other.can_overlap_io; + + // If a previous block has any attribute store and we found an attribute load, + // we cannot overlap IO. + if other.has_attribute_store && self.has_attribute_load { + self.can_overlap_io = false; + } + } +} + +fn can_isbe_space_sharing_be_enabled(f: &Function) -> bool { + let mut state_in = Vec::new(); + for block in &f.blocks { + let mut sim = IsbeSpaceSharingStateTracker::new(); + + for instr in block.instrs.iter() { + sim.visit_instr(&instr); + } + + if !sim.can_overlap_io { + return false; + } + + state_in.push(sim); + } + + let mut state_out: Vec<_> = (0..f.blocks.len()) + .map(|_| IsbeSpaceSharingStateTracker::new()) + .collect(); + + ForwardDataflow { + cfg: &f.blocks, + block_in: &mut state_in[..], + block_out: &mut state_out[..], + transfer: |_block_idx, _block, sim_out, sim_in| { + if sim_out == sim_in { + false + } else { + *sim_out = *sim_in; + true + } + }, + join: |sim_in, pred_sim_out| { + sim_in.merge(pred_sim_out); + }, + } + .solve(); + + for state in state_in { + if !state.can_overlap_io { + return false; + } + } + + true +} + impl Shader<'_> { pub fn for_each_instr(&self, f: &mut impl FnMut(&Instr)) { for func in &self.functions { @@ -9376,6 +9474,16 @@ impl Shader<'_> { self.info.max_warps_per_sm = max_warps_per_sm( self.info.num_gprs as u32 + self.sm.hw_reserved_gprs(), ); + + if self.sm.sm() >= 50 { + if let ShaderStageInfo::Vertex(vertex_info) = &mut self.info.stage { + assert!(self.functions.len() == 1); + vertex_info.isbe_space_sharing_enable = + can_isbe_space_sharing_be_enabled( + self.functions.get(0).unwrap(), + ); + } + } } } diff --git a/src/nouveau/compiler/nak/sph.rs b/src/nouveau/compiler/nak/sph.rs index 36fd8768efb..3ed4cfb1530 100644 --- a/src/nouveau/compiler/nak/sph.rs +++ b/src/nouveau/compiler/nak/sph.rs @@ -30,7 +30,7 @@ pub enum ShaderType { impl From<&ShaderStageInfo> for ShaderType { fn from(value: &ShaderStageInfo) -> Self { match value { - ShaderStageInfo::Vertex => ShaderType::Vertex, + ShaderStageInfo::Vertex(_) => ShaderType::Vertex, ShaderStageInfo::Fragment(_) => ShaderType::Fragment, ShaderStageInfo::Geometry(_) => ShaderType::Geometry, ShaderStageInfo::TessellationInit(_) => { @@ -230,6 +230,15 @@ impl ShaderProgramHeader { self.set_bit(24, gs_passthrough_enable); } + #[inline] + pub fn set_isbe_space_sharing_enable( + &mut self, + isbe_space_sharing_enable: bool, + ) { + assert!(self.shader_type == ShaderType::Vertex); + self.set_bit(25, isbe_space_sharing_enable); + } + #[inline] pub fn set_does_load_or_store(&mut self, does_load_or_store: bool) { self.set_field(SPHV3_T1_DOES_LOAD_OR_STORE, does_load_or_store); @@ -535,6 +544,9 @@ pub fn encode_header( } match &shader_info.stage { + ShaderStageInfo::Vertex(stage) => { + sph.set_isbe_space_sharing_enable(stage.isbe_space_sharing_enable); + } ShaderStageInfo::Fragment(stage) => { let zs_self_dep = fs_key.is_some_and(|key| key.zs_self_dep); sph.set_kills_pixels(stage.uses_kill || zs_self_dep);