mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-30 07:50:11 +01:00
nak/kepler: Refine instruction scheduling
Previously some KeplerA chips failed various dEQP tests when instruction
scheduling was enabled.
In particular, `memory_model.message_passing` had issues where a
`membar` instruction canceled some in-flight predicate writes, and
`barrier.write_image_tess_control_read_image_compute.image_128_r32_uint`
had issues around the `Cont` instruction.
This patch refines instruction scheduling to better match the output of
nvcc. Fixing the various dEQP failing tests.
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/13528
Fixes: c35990c4bc ("nak: Add real instruction dependencies for Kepler")
Signed-off-by: Lorenzo Rossi <snowycoder@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36393>
This commit is contained in:
parent
42088cd602
commit
7375dfd56d
3 changed files with 28 additions and 1 deletions
|
|
@ -766,6 +766,15 @@ fn calc_delays(f: &mut Function, sm: &dyn ShaderModel) -> u32 {
|
|||
uses.for_each_instr_src_mut(instr, |i, c| {
|
||||
c.add_read((ip, i));
|
||||
});
|
||||
// Kepler A membar conflicts with predicate writes
|
||||
if sm.is_kepler_a() && matches!(&instr.op, Op::MemBar(_)) {
|
||||
uses.for_each_pred(|c| {
|
||||
c.add_read((ip, usize::MAX));
|
||||
});
|
||||
uses.for_each_carry(|c| {
|
||||
c.add_read((ip, usize::MAX));
|
||||
});
|
||||
}
|
||||
for (bar, c) in bars.iter_mut().enumerate() {
|
||||
if instr.deps.wt_bar_mask & (1 << bar) != 0 {
|
||||
*c = min_start;
|
||||
|
|
|
|||
|
|
@ -83,6 +83,18 @@ impl<T> RegTracker<T> {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn for_each_pred(&mut self, mut f: impl FnMut(&mut T)) {
|
||||
for p in &mut self.pred[..] {
|
||||
f(p);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn for_each_carry(&mut self, mut f: impl FnMut(&mut T)) {
|
||||
for c in &mut self.carry {
|
||||
f(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Index<RegRef> for RegTracker<T> {
|
||||
|
|
|
|||
|
|
@ -13,6 +13,9 @@ pub fn instr_latency(_sm: u8, op: &Op, _dst_idx: usize) -> u32 {
|
|||
Op::Ld(_) => 24,
|
||||
Op::ALd(_) => 24,
|
||||
Op::IMul(_) => 15, // This does not apply to imad, right? right???
|
||||
Op::ISetP(_) => 13,
|
||||
Op::PSetP(_) => 13,
|
||||
Op::IAdd2(o) if !o.carry_out.is_none() => 13,
|
||||
Op::Tex(_)
|
||||
| Op::Tld(_)
|
||||
| Op::Tld4(_)
|
||||
|
|
@ -23,7 +26,8 @@ pub fn instr_latency(_sm: u8, op: &Op, _dst_idx: usize) -> u32 {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn instr_exec_latency(_sm: u8, op: &Op) -> u32 {
|
||||
pub fn instr_exec_latency(sm: u8, op: &Op) -> u32 {
|
||||
let is_kepler_a = sm == 30;
|
||||
match op {
|
||||
Op::Tex(_)
|
||||
| Op::Tld(_)
|
||||
|
|
@ -31,6 +35,8 @@ pub fn instr_exec_latency(_sm: u8, op: &Op) -> u32 {
|
|||
| Op::Tmml(_)
|
||||
| Op::Txd(_)
|
||||
| Op::Txq(_) => 17,
|
||||
Op::MemBar(_) => 16,
|
||||
Op::Cont(_) | Op::Brk(_) if is_kepler_a => 5,
|
||||
Op::Exit(_) => 15,
|
||||
_ => 1,
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue