mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 20:38:06 +02:00
nak: Re-work Instr::get_latency()
We re-order and re-arrange the whole thing by instruction type. Also, instead of returning an Option<u32>, have a has_fixed_latench() method to check the instruction and then get_dst_latench() to get the latency from instruction launch to the given destination index being available. This lets us handle predicates properly which have a different number of cycles for some reason. Oh, it's now just as correct as the estimates in nv50_ir_target_gm107.cpp. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24998>
This commit is contained in:
parent
3ad3f1376c
commit
a07aa29f61
2 changed files with 78 additions and 40 deletions
|
|
@ -231,7 +231,7 @@ fn assign_barriers(f: &mut Function) {
|
|||
wait_mask &= !(1 << bar);
|
||||
}
|
||||
|
||||
if instr.get_latency().is_some() {
|
||||
if instr.has_fixed_latency() {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -301,10 +301,15 @@ fn calc_delays(f: &mut Function) {
|
|||
let mut ready = RegTracker::new(0_u32);
|
||||
for instr in b.instrs.iter_mut().rev() {
|
||||
let mut min_start = cycle + 1; /* TODO: co-issue */
|
||||
if let Some(latency) = instr.get_latency() {
|
||||
ready.for_each_instr_dst_mut(instr, |c| {
|
||||
min_start = max(min_start, *c + latency);
|
||||
});
|
||||
if instr.has_fixed_latency() {
|
||||
for (idx, dst) in instr.dsts().iter().enumerate() {
|
||||
if let Dst::Reg(reg) = dst {
|
||||
let latency = instr.get_dst_latency(idx);
|
||||
for c in &ready[*reg] {
|
||||
min_start = max(min_start, *c + latency);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let delay = min_start - cycle;
|
||||
|
|
|
|||
|
|
@ -3977,53 +3977,73 @@ impl Instr {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn get_latency(&self) -> Option<u32> {
|
||||
pub fn has_fixed_latency(&self) -> bool {
|
||||
match self.op {
|
||||
// Float ALU
|
||||
Op::FAdd(_)
|
||||
| Op::FFma(_)
|
||||
| Op::FMnMx(_)
|
||||
| Op::FMul(_)
|
||||
| Op::FSet(_)
|
||||
| Op::FSetP(_)
|
||||
| Op::MuFu(_)
|
||||
| Op::DAdd(_)
|
||||
| Op::IAbs(_)
|
||||
| Op::FSetP(_) => true,
|
||||
|
||||
// Multi-function unit is variable latency
|
||||
Op::MuFu(_) => false,
|
||||
|
||||
// Double-precision float ALU
|
||||
Op::DAdd(_) => false,
|
||||
|
||||
// Integer ALU
|
||||
Op::Brev(_) | Op::Flo(_) | Op::PopC(_) => false,
|
||||
Op::IAbs(_)
|
||||
| Op::INeg(_)
|
||||
| Op::IAdd3(_)
|
||||
| Op::IAdd3X(_)
|
||||
| Op::IMad(_)
|
||||
| Op::IMad64(_)
|
||||
| Op::IMnMx(_)
|
||||
| Op::Lop3(_)
|
||||
| Op::PLop3(_)
|
||||
| Op::ISetP(_)
|
||||
| Op::Shf(_) => Some(6),
|
||||
Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::Mov(_) | Op::FRnd(_) => {
|
||||
Some(15)
|
||||
}
|
||||
Op::Sel(_) => Some(15),
|
||||
Op::CS2R(_) => None,
|
||||
Op::S2R(_) => None,
|
||||
Op::ALd(_) => None,
|
||||
Op::ASt(_) => Some(15),
|
||||
Op::Ipa(_) => None,
|
||||
Op::Tex(_) => None,
|
||||
Op::Tld(_) => None,
|
||||
Op::Tld4(_) => None,
|
||||
Op::Tmml(_) => None,
|
||||
Op::Txd(_) => None,
|
||||
Op::Txq(_) => None,
|
||||
Op::SuLd(_) => None,
|
||||
Op::SuSt(_) => None,
|
||||
Op::SuAtom(_) => None,
|
||||
Op::Ld(_) => None,
|
||||
Op::Ldc(_) => None,
|
||||
Op::St(_) => None,
|
||||
Op::Atom(_) => None,
|
||||
Op::AtomCas(_) => None,
|
||||
Op::MemBar(_) => None,
|
||||
Op::Bar(_) => None,
|
||||
Op::Bra(_) | Op::Exit(_) => Some(15),
|
||||
| Op::Lop3(_)
|
||||
| Op::Shf(_) => true,
|
||||
|
||||
// Conversions are variable latency?!?
|
||||
Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::FRnd(_) => false,
|
||||
|
||||
// Move ops
|
||||
Op::Mov(_) | Op::Prmt(_) | Op::Sel(_) => true,
|
||||
|
||||
// Predicate ops
|
||||
Op::PLop3(_) => true,
|
||||
|
||||
// Texture ops
|
||||
Op::Tex(_)
|
||||
| Op::Tld(_)
|
||||
| Op::Tld4(_)
|
||||
| Op::Tmml(_)
|
||||
| Op::Txd(_)
|
||||
| Op::Txq(_) => false,
|
||||
|
||||
// Surface ops
|
||||
Op::SuLd(_) | Op::SuSt(_) | Op::SuAtom(_) => false,
|
||||
|
||||
// Memory ops
|
||||
Op::Ld(_)
|
||||
| Op::Ldc(_)
|
||||
| Op::St(_)
|
||||
| Op::Atom(_)
|
||||
| Op::AtomCas(_)
|
||||
| Op::ALd(_)
|
||||
| Op::ASt(_)
|
||||
| Op::Ipa(_)
|
||||
| Op::MemBar(_) => false,
|
||||
|
||||
// Control-flow ops
|
||||
Op::Bra(_) | Op::Exit(_) => true,
|
||||
|
||||
// Miscellaneous ops
|
||||
Op::Bar(_) | Op::CS2R(_) | Op::S2R(_) => false,
|
||||
|
||||
// Virtual ops
|
||||
Op::Undef(_)
|
||||
| Op::PhiSrcs(_)
|
||||
| Op::PhiDsts(_)
|
||||
|
|
@ -4033,7 +4053,20 @@ impl Instr {
|
|||
| Op::FSOut(_) => {
|
||||
panic!("Not a hardware opcode")
|
||||
}
|
||||
Op::PopC(_) | Op::Brev(_) | Op::Flo(_) | Op::Prmt(_) => Some(15),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_dst_latency(&self, dst_idx: usize) -> u32 {
|
||||
debug_assert!(self.has_fixed_latency());
|
||||
let file = match self.dsts()[dst_idx] {
|
||||
Dst::None => return 0,
|
||||
Dst::SSA(vec) => vec.file(),
|
||||
Dst::Reg(reg) => reg.file(),
|
||||
};
|
||||
if file.is_predicate() {
|
||||
13
|
||||
} else {
|
||||
6
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue