diff --git a/src/nouveau/compiler/nak/calc_instr_deps.rs b/src/nouveau/compiler/nak/calc_instr_deps.rs index d54f27392e8..e442c9e4751 100644 --- a/src/nouveau/compiler/nak/calc_instr_deps.rs +++ b/src/nouveau/compiler/nak/calc_instr_deps.rs @@ -766,6 +766,15 @@ fn calc_delays(f: &mut Function, sm: &dyn ShaderModel) -> u32 { uses.for_each_instr_src_mut(instr, |i, c| { c.add_read((ip, i)); }); + // Kepler A membar conflicts with predicate writes + if sm.is_kepler_a() && matches!(&instr.op, Op::MemBar(_)) { + uses.for_each_pred(|c| { + c.add_read((ip, usize::MAX)); + }); + uses.for_each_carry(|c| { + c.add_read((ip, usize::MAX)); + }); + } for (bar, c) in bars.iter_mut().enumerate() { if instr.deps.wt_bar_mask & (1 << bar) != 0 { *c = min_start; diff --git a/src/nouveau/compiler/nak/reg_tracker.rs b/src/nouveau/compiler/nak/reg_tracker.rs index 1f422786980..49341ca1e8f 100644 --- a/src/nouveau/compiler/nak/reg_tracker.rs +++ b/src/nouveau/compiler/nak/reg_tracker.rs @@ -83,6 +83,18 @@ impl RegTracker { } } } + + pub fn for_each_pred(&mut self, mut f: impl FnMut(&mut T)) { + for p in &mut self.pred[..] { + f(p); + } + } + + pub fn for_each_carry(&mut self, mut f: impl FnMut(&mut T)) { + for c in &mut self.carry { + f(c); + } + } } impl Index for RegTracker { diff --git a/src/nouveau/compiler/nak/sm30_instr_latencies.rs b/src/nouveau/compiler/nak/sm30_instr_latencies.rs index 011da013382..41980ae3fca 100644 --- a/src/nouveau/compiler/nak/sm30_instr_latencies.rs +++ b/src/nouveau/compiler/nak/sm30_instr_latencies.rs @@ -13,6 +13,9 @@ pub fn instr_latency(_sm: u8, op: &Op, _dst_idx: usize) -> u32 { Op::Ld(_) => 24, Op::ALd(_) => 24, Op::IMul(_) => 15, // This does not apply to imad, right? right??? + Op::ISetP(_) => 13, + Op::PSetP(_) => 13, + Op::IAdd2(o) if !o.carry_out.is_none() => 13, Op::Tex(_) | Op::Tld(_) | Op::Tld4(_) @@ -23,7 +26,8 @@ pub fn instr_latency(_sm: u8, op: &Op, _dst_idx: usize) -> u32 { } } -pub fn instr_exec_latency(_sm: u8, op: &Op) -> u32 { +pub fn instr_exec_latency(sm: u8, op: &Op) -> u32 { + let is_kepler_a = sm == 30; match op { Op::Tex(_) | Op::Tld(_) @@ -31,6 +35,8 @@ pub fn instr_exec_latency(_sm: u8, op: &Op) -> u32 { | Op::Tmml(_) | Op::Txd(_) | Op::Txq(_) => 17, + Op::MemBar(_) => 16, + Op::Cont(_) | Op::Brk(_) if is_kepler_a => 5, Op::Exit(_) => 15, _ => 1, }