From 7375dfd56da07d12dcb3ff4a9bc9047a5d7d3dcd Mon Sep 17 00:00:00 2001 From: Lorenzo Rossi Date: Wed, 13 Aug 2025 18:24:41 +0200 Subject: [PATCH] nak/kepler: Refine instruction scheduling Previously some KeplerA chips failed various dEQP tests when instruction scheduling was enabled. In particular, `memory_model.message_passing` had issues where a `membar` instruction canceled some in-flight predicate writes, and `barrier.write_image_tess_control_read_image_compute.image_128_r32_uint` had issues around the `Cont` instruction. This patch refines instruction scheduling to better match the output of nvcc. Fixing the various dEQP failing tests. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/13528 Fixes: c35990c4bcb ("nak: Add real instruction dependencies for Kepler") Signed-off-by: Lorenzo Rossi Part-of: --- src/nouveau/compiler/nak/calc_instr_deps.rs | 9 +++++++++ src/nouveau/compiler/nak/reg_tracker.rs | 12 ++++++++++++ src/nouveau/compiler/nak/sm30_instr_latencies.rs | 8 +++++++- 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/nouveau/compiler/nak/calc_instr_deps.rs b/src/nouveau/compiler/nak/calc_instr_deps.rs index d54f27392e8..e442c9e4751 100644 --- a/src/nouveau/compiler/nak/calc_instr_deps.rs +++ b/src/nouveau/compiler/nak/calc_instr_deps.rs @@ -766,6 +766,15 @@ fn calc_delays(f: &mut Function, sm: &dyn ShaderModel) -> u32 { uses.for_each_instr_src_mut(instr, |i, c| { c.add_read((ip, i)); }); + // Kepler A membar conflicts with predicate writes + if sm.is_kepler_a() && matches!(&instr.op, Op::MemBar(_)) { + uses.for_each_pred(|c| { + c.add_read((ip, usize::MAX)); + }); + uses.for_each_carry(|c| { + c.add_read((ip, usize::MAX)); + }); + } for (bar, c) in bars.iter_mut().enumerate() { if instr.deps.wt_bar_mask & (1 << bar) != 0 { *c = min_start; diff --git a/src/nouveau/compiler/nak/reg_tracker.rs b/src/nouveau/compiler/nak/reg_tracker.rs index 1f422786980..49341ca1e8f 100644 --- a/src/nouveau/compiler/nak/reg_tracker.rs +++ b/src/nouveau/compiler/nak/reg_tracker.rs @@ -83,6 +83,18 @@ impl RegTracker { } } } + + pub fn for_each_pred(&mut self, mut f: impl FnMut(&mut T)) { + for p in &mut self.pred[..] { + f(p); + } + } + + pub fn for_each_carry(&mut self, mut f: impl FnMut(&mut T)) { + for c in &mut self.carry { + f(c); + } + } } impl Index for RegTracker { diff --git a/src/nouveau/compiler/nak/sm30_instr_latencies.rs b/src/nouveau/compiler/nak/sm30_instr_latencies.rs index 011da013382..41980ae3fca 100644 --- a/src/nouveau/compiler/nak/sm30_instr_latencies.rs +++ b/src/nouveau/compiler/nak/sm30_instr_latencies.rs @@ -13,6 +13,9 @@ pub fn instr_latency(_sm: u8, op: &Op, _dst_idx: usize) -> u32 { Op::Ld(_) => 24, Op::ALd(_) => 24, Op::IMul(_) => 15, // This does not apply to imad, right? right??? + Op::ISetP(_) => 13, + Op::PSetP(_) => 13, + Op::IAdd2(o) if !o.carry_out.is_none() => 13, Op::Tex(_) | Op::Tld(_) | Op::Tld4(_) @@ -23,7 +26,8 @@ pub fn instr_latency(_sm: u8, op: &Op, _dst_idx: usize) -> u32 { } } -pub fn instr_exec_latency(_sm: u8, op: &Op) -> u32 { +pub fn instr_exec_latency(sm: u8, op: &Op) -> u32 { + let is_kepler_a = sm == 30; match op { Op::Tex(_) | Op::Tld(_) @@ -31,6 +35,8 @@ pub fn instr_exec_latency(_sm: u8, op: &Op) -> u32 { | Op::Tmml(_) | Op::Txd(_) | Op::Txq(_) => 17, + Op::MemBar(_) => 16, + Op::Cont(_) | Op::Brk(_) if is_kepler_a => 5, Op::Exit(_) => 15, _ => 1, }