diff --git a/src/nouveau/compiler/nak/api.rs b/src/nouveau/compiler/nak/api.rs index 74e2f9cbe17..53b9a2ec1fe 100644 --- a/src/nouveau/compiler/nak/api.rs +++ b/src/nouveau/compiler/nak/api.rs @@ -252,7 +252,7 @@ impl ShaderBin { let c_info = nak_shader_info { stage: match info.stage { ShaderStageInfo::Compute(_) => MESA_SHADER_COMPUTE, - ShaderStageInfo::Vertex => MESA_SHADER_VERTEX, + ShaderStageInfo::Vertex(_) => MESA_SHADER_VERTEX, ShaderStageInfo::Fragment(_) => MESA_SHADER_FRAGMENT, ShaderStageInfo::Geometry(_) => MESA_SHADER_GEOMETRY, ShaderStageInfo::TessellationInit(_) => MESA_SHADER_TESS_CTRL, diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index b808335a537..0ea053e8c02 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -47,7 +47,9 @@ fn init_info_from_nir(nak: &nak_compiler, nir: &nir_shader) -> ShaderInfo { smem_size: nir.info.shared_size.try_into().unwrap(), }) } - MESA_SHADER_VERTEX => ShaderStageInfo::Vertex, + MESA_SHADER_VERTEX => ShaderStageInfo::Vertex(VertexShaderInfo { + isbe_space_sharing_enable: false, + }), MESA_SHADER_FRAGMENT => { let info_fs = unsafe { &nir.info.__bindgen_anon_1.fs }; ShaderStageInfo::Fragment(FragmentShaderInfo { diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index f8aed97b6be..d949e280716 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -13,6 +13,7 @@ use crate::sph::{OutputTopology, PixelImap}; pub use crate::ssa_value::*; use compiler::as_slice::*; use compiler::cfg::CFG; +use compiler::dataflow::ForwardDataflow; use compiler::smallvec::SmallVec; use nak_ir_proc::*; use std::cmp::{max, min}; @@ -8900,6 +8901,11 @@ pub struct ComputeShaderInfo { pub smem_size: u16, } +#[derive(Debug)] +pub struct VertexShaderInfo { + pub isbe_space_sharing_enable: bool, +} + #[derive(Debug)] pub struct FragmentShaderInfo { pub uses_kill: bool, @@ -8971,7 +8977,7 @@ pub struct TessellationShaderInfo { #[derive(Debug)] pub enum ShaderStageInfo { Compute(ComputeShaderInfo), - Vertex, + Vertex(VertexShaderInfo), Fragment(FragmentShaderInfo), Geometry(GeometryShaderInfo), TessellationInit(TessellationInitShaderInfo), @@ -9315,6 +9321,98 @@ pub struct Shader<'a> { pub functions: Vec, } +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +struct IsbeSpaceSharingStateTracker { + has_attribute_store: bool, + has_attribute_load: bool, + can_overlap_io: bool, +} + +impl IsbeSpaceSharingStateTracker { + pub const fn new() -> Self { + Self { + has_attribute_store: false, + has_attribute_load: false, + can_overlap_io: true, + } + } + + pub fn visit_instr(&mut self, instr: &Instr) { + // Track attribute store. (XXX: ISBEWR) + self.has_attribute_store |= matches!(instr.op, Op::ASt(_)); + + // Track attribute load. + if matches!(instr.op, Op::ALd(_) | Op::Isberd(_)) { + self.has_attribute_load = true; + + // If we have any attribute load after an attribute store, + // we cannot overlap IO. + if self.has_attribute_store { + self.can_overlap_io = false; + } + } + } + + fn merge(&mut self, other: &Self) { + // Propagate details on attribute store and overlap IO. + self.has_attribute_store |= other.has_attribute_store; + self.can_overlap_io &= other.can_overlap_io; + + // If a previous block has any attribute store and we found an attribute load, + // we cannot overlap IO. + if other.has_attribute_store && self.has_attribute_load { + self.can_overlap_io = false; + } + } +} + +fn can_isbe_space_sharing_be_enabled(f: &Function) -> bool { + let mut state_in = Vec::new(); + for block in &f.blocks { + let mut sim = IsbeSpaceSharingStateTracker::new(); + + for instr in block.instrs.iter() { + sim.visit_instr(&instr); + } + + if !sim.can_overlap_io { + return false; + } + + state_in.push(sim); + } + + let mut state_out: Vec<_> = (0..f.blocks.len()) + .map(|_| IsbeSpaceSharingStateTracker::new()) + .collect(); + + ForwardDataflow { + cfg: &f.blocks, + block_in: &mut state_in[..], + block_out: &mut state_out[..], + transfer: |_block_idx, _block, sim_out, sim_in| { + if sim_out == sim_in { + false + } else { + *sim_out = *sim_in; + true + } + }, + join: |sim_in, pred_sim_out| { + sim_in.merge(pred_sim_out); + }, + } + .solve(); + + for state in state_in { + if !state.can_overlap_io { + return false; + } + } + + true +} + impl Shader<'_> { pub fn for_each_instr(&self, f: &mut impl FnMut(&Instr)) { for func in &self.functions { @@ -9376,6 +9474,16 @@ impl Shader<'_> { self.info.max_warps_per_sm = max_warps_per_sm( self.info.num_gprs as u32 + self.sm.hw_reserved_gprs(), ); + + if self.sm.sm() >= 50 { + if let ShaderStageInfo::Vertex(vertex_info) = &mut self.info.stage { + assert!(self.functions.len() == 1); + vertex_info.isbe_space_sharing_enable = + can_isbe_space_sharing_be_enabled( + self.functions.get(0).unwrap(), + ); + } + } } } diff --git a/src/nouveau/compiler/nak/sph.rs b/src/nouveau/compiler/nak/sph.rs index 36fd8768efb..3ed4cfb1530 100644 --- a/src/nouveau/compiler/nak/sph.rs +++ b/src/nouveau/compiler/nak/sph.rs @@ -30,7 +30,7 @@ pub enum ShaderType { impl From<&ShaderStageInfo> for ShaderType { fn from(value: &ShaderStageInfo) -> Self { match value { - ShaderStageInfo::Vertex => ShaderType::Vertex, + ShaderStageInfo::Vertex(_) => ShaderType::Vertex, ShaderStageInfo::Fragment(_) => ShaderType::Fragment, ShaderStageInfo::Geometry(_) => ShaderType::Geometry, ShaderStageInfo::TessellationInit(_) => { @@ -230,6 +230,15 @@ impl ShaderProgramHeader { self.set_bit(24, gs_passthrough_enable); } + #[inline] + pub fn set_isbe_space_sharing_enable( + &mut self, + isbe_space_sharing_enable: bool, + ) { + assert!(self.shader_type == ShaderType::Vertex); + self.set_bit(25, isbe_space_sharing_enable); + } + #[inline] pub fn set_does_load_or_store(&mut self, does_load_or_store: bool) { self.set_field(SPHV3_T1_DOES_LOAD_OR_STORE, does_load_or_store); @@ -535,6 +544,9 @@ pub fn encode_header( } match &shader_info.stage { + ShaderStageInfo::Vertex(stage) => { + sph.set_isbe_space_sharing_enable(stage.isbe_space_sharing_enable); + } ShaderStageInfo::Fragment(stage) => { let zs_self_dep = fs_key.is_some_and(|key| key.zs_self_dep); sph.set_kills_pixels(stage.uses_kill || zs_self_dep);