diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index 13857b8bae9..188daa56fab 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -9215,6 +9215,14 @@ pub trait ShaderModel { /// Worst-case access-after-write latency fn worst_latency(&self, write: &Op, dst_idx: usize) -> u32; + /// Upper bound on latency + /// + /// Every '*_latency' function must return latencies that are + /// bounded. Ex: self.war_latency() <= self.latency_upper_bound(). + /// This is only used for compile-time optimization. If unsure, be + /// conservative. + fn latency_upper_bound(&self) -> u32; + /// Maximum encodable instruction delay fn max_instr_delay(&self) -> u8; diff --git a/src/nouveau/compiler/nak/sm20.rs b/src/nouveau/compiler/nak/sm20.rs index 88e50dbbf21..2d98273542d 100644 --- a/src/nouveau/compiler/nak/sm20.rs +++ b/src/nouveau/compiler/nak/sm20.rs @@ -7,7 +7,7 @@ use crate::legalize::{ }; use crate::sm30_instr_latencies::{ encode_kepler_shader, instr_exec_latency, instr_latency, - KeplerInstructionEncoder, + latency_upper_bound, KeplerInstructionEncoder, }; use bitview::*; @@ -104,6 +104,10 @@ impl ShaderModel for ShaderModel20 { 13 } + fn latency_upper_bound(&self) -> u32 { + latency_upper_bound() + } + fn worst_latency(&self, write: &Op, dst_idx: usize) -> u32 { instr_latency(self.sm, write, dst_idx) } diff --git a/src/nouveau/compiler/nak/sm30_instr_latencies.rs b/src/nouveau/compiler/nak/sm30_instr_latencies.rs index 0b2cc7607d0..5fe9346784b 100644 --- a/src/nouveau/compiler/nak/sm30_instr_latencies.rs +++ b/src/nouveau/compiler/nak/sm30_instr_latencies.rs @@ -26,6 +26,10 @@ pub fn instr_latency(_sm: u8, op: &Op, _dst_idx: usize) -> u32 { } } +pub fn latency_upper_bound() -> u32 { + 24 +} + pub fn instr_exec_latency(sm: u8, op: &Op) -> u32 { let is_kepler_a = sm == 30; match op { diff --git a/src/nouveau/compiler/nak/sm32.rs b/src/nouveau/compiler/nak/sm32.rs index 76ad114c958..3ad5b4be013 100644 --- a/src/nouveau/compiler/nak/sm32.rs +++ b/src/nouveau/compiler/nak/sm32.rs @@ -8,7 +8,7 @@ use crate::legalize::{ }; use crate::sm30_instr_latencies::{ encode_kepler_shader, instr_exec_latency, instr_latency, - KeplerInstructionEncoder, + latency_upper_bound, KeplerInstructionEncoder, }; use bitview::{ BitMutView, BitMutViewable, BitView, BitViewable, SetBit, SetField, @@ -110,6 +110,10 @@ impl ShaderModel for ShaderModel32 { instr_latency(self.sm, write, dst_idx) } + fn latency_upper_bound(&self) -> u32 { + latency_upper_bound() + } + fn max_instr_delay(&self) -> u8 { 32 } diff --git a/src/nouveau/compiler/nak/sm50.rs b/src/nouveau/compiler/nak/sm50.rs index 34c3f83942e..5a861d0df9b 100644 --- a/src/nouveau/compiler/nak/sm50.rs +++ b/src/nouveau/compiler/nak/sm50.rs @@ -151,6 +151,10 @@ impl ShaderModel for ShaderModel50 { 13 } + fn latency_upper_bound(&self) -> u32 { + 14 + } + fn worst_latency(&self, write: &Op, dst_idx: usize) -> u32 { instr_latency(self.sm, write, dst_idx) } diff --git a/src/nouveau/compiler/nak/sm70.rs b/src/nouveau/compiler/nak/sm70.rs index 309657caea6..26f882520d0 100644 --- a/src/nouveau/compiler/nak/sm70.rs +++ b/src/nouveau/compiler/nak/sm70.rs @@ -260,6 +260,18 @@ impl ShaderModel for ShaderModel70 { } } + fn latency_upper_bound(&self) -> u32 { + if self.is_blackwell() { + 30 + } else if self.is_ampere() || self.is_ada() { + 30 + } else if self.is_turing() { + 25 + } else { + 15 + } + } + fn worst_latency(&self, write: &Op, dst_idx: usize) -> u32 { if self.is_blackwell() { SM120Latency::raw(write, dst_idx, None, 0)