nak: Add latency_upper_bound to ShaderModel

Signed-off-by: Lorenzo Rossi <git@rossilorenzo.dev> Reviewed-by: Mel Henning <mhenning@darkrefraction.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37108>
2026-05-07 02:48:06 +02:00 · 2025-10-30 13:42:04 +01:00 · 2025-10-30 13:42:04 +01:00 · dac8fc93d9
commit dac8fc93d9
parent f1eb6d7d7b
6 changed files with 38 additions and 2 deletions
--- a/src/nouveau/compiler/nak/ir.rs
+++ b/src/nouveau/compiler/nak/ir.rs
@ -9215,6 +9215,14 @@ pub trait ShaderModel {
    /// Worst-case access-after-write latency
    fn worst_latency(&self, write: &Op, dst_idx: usize) -> u32;

+    /// Upper bound on latency
+    ///
+    /// Every '*_latency' function must return latencies that are
+    /// bounded.  Ex: self.war_latency() <= self.latency_upper_bound().
+    /// This is only used for compile-time optimization.  If unsure, be
+    /// conservative.
+    fn latency_upper_bound(&self) -> u32;
+
    /// Maximum encodable instruction delay
    fn max_instr_delay(&self) -> u8;

--- a/src/nouveau/compiler/nak/sm20.rs
+++ b/src/nouveau/compiler/nak/sm20.rs
@ -7,7 +7,7 @@ use crate::legalize::{
 };
 use crate::sm30_instr_latencies::{
    encode_kepler_shader, instr_exec_latency, instr_latency,
-    KeplerInstructionEncoder,
+    latency_upper_bound, KeplerInstructionEncoder,
 };
 use bitview::*;

@ -104,6 +104,10 @@ impl ShaderModel for ShaderModel20 {
        13
    }

+    fn latency_upper_bound(&self) -> u32 {
+        latency_upper_bound()
+    }
+
    fn worst_latency(&self, write: &Op, dst_idx: usize) -> u32 {
        instr_latency(self.sm, write, dst_idx)
    }
--- a/src/nouveau/compiler/nak/sm30_instr_latencies.rs
+++ b/src/nouveau/compiler/nak/sm30_instr_latencies.rs
@ -26,6 +26,10 @@ pub fn instr_latency(_sm: u8, op: &Op, _dst_idx: usize) -> u32 {
    }
 }

+pub fn latency_upper_bound() -> u32 {
+    24
+}
+
 pub fn instr_exec_latency(sm: u8, op: &Op) -> u32 {
    let is_kepler_a = sm == 30;
    match op {
--- a/src/nouveau/compiler/nak/sm32.rs
+++ b/src/nouveau/compiler/nak/sm32.rs
@ -8,7 +8,7 @@ use crate::legalize::{
 };
 use crate::sm30_instr_latencies::{
    encode_kepler_shader, instr_exec_latency, instr_latency,
-    KeplerInstructionEncoder,
+    latency_upper_bound, KeplerInstructionEncoder,
 };
 use bitview::{
    BitMutView, BitMutViewable, BitView, BitViewable, SetBit, SetField,
@ -110,6 +110,10 @@ impl ShaderModel for ShaderModel32 {
        instr_latency(self.sm, write, dst_idx)
    }

+    fn latency_upper_bound(&self) -> u32 {
+        latency_upper_bound()
+    }
+
    fn max_instr_delay(&self) -> u8 {
        32
    }
--- a/src/nouveau/compiler/nak/sm50.rs
+++ b/src/nouveau/compiler/nak/sm50.rs
@ -151,6 +151,10 @@ impl ShaderModel for ShaderModel50 {
        13
    }

+    fn latency_upper_bound(&self) -> u32 {
+        14
+    }
+
    fn worst_latency(&self, write: &Op, dst_idx: usize) -> u32 {
        instr_latency(self.sm, write, dst_idx)
    }
--- a/src/nouveau/compiler/nak/sm70.rs
+++ b/src/nouveau/compiler/nak/sm70.rs
@ -260,6 +260,18 @@ impl ShaderModel for ShaderModel70 {
        }
    }

+    fn latency_upper_bound(&self) -> u32 {
+        if self.is_blackwell() {
+            30
+        } else if self.is_ampere() || self.is_ada() {
+            30
+        } else if self.is_turing() {
+            25
+        } else {
+            15
+        }
+    }
+
    fn worst_latency(&self, write: &Op, dst_idx: usize) -> u32 {
        if self.is_blackwell() {
            SM120Latency::raw(write, dst_idx, None, 0)