mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
nak: Add latency helpers to ShaderModel and use them
For now, these all just call into sched_common.rs but this gives us the interface we really want going forward. Reviewed-by: Dave Airlie <airlied@redhat.com> Reviewed-by: Mel Henning <mhenning@darkrefraction.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34302>
This commit is contained in:
parent
3112fbcc56
commit
8e2e1e43fa
3 changed files with 70 additions and 52 deletions
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
use crate::api::{GetDebugFlags, DEBUG};
|
||||
use crate::ir::*;
|
||||
use crate::sched_common::*;
|
||||
use crate::sched_common::RegTracker;
|
||||
|
||||
use std::cmp::max;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
|
@ -257,7 +257,7 @@ fn assign_barriers(f: &mut Function, sm: &dyn ShaderModel) {
|
|||
waits.extend_from_slice(u.deps());
|
||||
});
|
||||
|
||||
if instr.needs_scoreboard(sm.sm()) {
|
||||
if sm.op_needs_scoreboard(&instr.op) {
|
||||
let (rd, wr) = deps.add_instr(bi, ip);
|
||||
uses.for_each_instr_src_mut(instr, |_, u| {
|
||||
// Only mark a dep as signaled if we actually have
|
||||
|
|
@ -314,7 +314,7 @@ fn assign_barriers(f: &mut Function, sm: &dyn ShaderModel) {
|
|||
instr.deps.set_yield(true);
|
||||
}
|
||||
|
||||
if !instr.needs_scoreboard(sm.sm()) {
|
||||
if !sm.op_needs_scoreboard(&instr.op) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -360,7 +360,7 @@ fn calc_delays(f: &mut Function, sm: &dyn ShaderModel) -> u32 {
|
|||
|
||||
for ip in (0..b.instrs.len()).rev() {
|
||||
let instr = &b.instrs[ip];
|
||||
let mut min_start = cycle + exec_latency(sm.sm(), &instr.op);
|
||||
let mut min_start = cycle + sm.exec_latency(&instr.op);
|
||||
if let Some(bar) = instr.deps.rd_bar() {
|
||||
min_start = max(min_start, bars[usize::from(bar)] + 2);
|
||||
}
|
||||
|
|
@ -372,13 +372,12 @@ fn calc_delays(f: &mut Function, sm: &dyn ShaderModel) -> u32 {
|
|||
// We don't know how it will be used but it may be used in
|
||||
// the next block so we need at least assume the maximum
|
||||
// destination latency from the end of the block.
|
||||
let s = instr_latency(sm.sm(), &instr.op, i);
|
||||
let s = sm.worst_latency(&instr.op, i);
|
||||
min_start = max(min_start, s);
|
||||
}
|
||||
RegUse::Write((w_ip, w_dst_idx)) => {
|
||||
let s = instr_cycle[*w_ip]
|
||||
+ waw_latency(
|
||||
sm.sm(),
|
||||
+ sm.waw_latency(
|
||||
&instr.op,
|
||||
i,
|
||||
&b.instrs[*w_ip].op,
|
||||
|
|
@ -390,10 +389,9 @@ fn calc_delays(f: &mut Function, sm: &dyn ShaderModel) -> u32 {
|
|||
for (r_ip, r_src_idx) in reads {
|
||||
let c = instr_cycle[*r_ip];
|
||||
let s = if *r_src_idx == usize::MAX {
|
||||
c + paw_latency(sm.sm(), &instr.op, i)
|
||||
c + sm.paw_latency(&instr.op, i)
|
||||
} else {
|
||||
c + raw_latency(
|
||||
sm.sm(),
|
||||
c + sm.raw_latency(
|
||||
&instr.op,
|
||||
i,
|
||||
&b.instrs[*r_ip].op,
|
||||
|
|
@ -408,8 +406,7 @@ fn calc_delays(f: &mut Function, sm: &dyn ShaderModel) -> u32 {
|
|||
RegUse::None => (),
|
||||
RegUse::Write((w_ip, w_dst_idx)) => {
|
||||
let s = instr_cycle[*w_ip]
|
||||
+ war_latency(
|
||||
sm.sm(),
|
||||
+ sm.war_latency(
|
||||
&instr.op,
|
||||
i,
|
||||
&b.instrs[*w_ip].op,
|
||||
|
|
@ -458,7 +455,7 @@ fn calc_delays(f: &mut Function, sm: &dyn ShaderModel) -> u32 {
|
|||
if matches!(instr.op, Op::SrcBar(_)) {
|
||||
instr.op = Op::Nop(OpNop { label: None });
|
||||
MappedInstrs::One(instr)
|
||||
} else if exec_latency(sm.sm(), &instr.op) > 1 {
|
||||
} else if sm.exec_latency(&instr.op) > 1 {
|
||||
let mut nop = Instr::new_boxed(OpNop { label: None });
|
||||
nop.deps.set_delay(2);
|
||||
MappedInstrs::Many(vec![instr, nop])
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ use nak_bindings::*;
|
|||
|
||||
pub use crate::builder::{Builder, InstrBuilder, SSABuilder, SSAInstrBuilder};
|
||||
use crate::legalize::LegalizeBuilder;
|
||||
use crate::sched_common;
|
||||
use crate::sph::{OutputTopology, PixelImap};
|
||||
use compiler::as_slice::*;
|
||||
use compiler::cfg::CFG;
|
||||
|
|
@ -7002,14 +7003,6 @@ impl Instr {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn has_fixed_latency(&self, sm: u8) -> bool {
|
||||
self.op.has_fixed_latency(sm)
|
||||
}
|
||||
|
||||
pub fn needs_scoreboard(&self, sm: u8) -> bool {
|
||||
!self.has_fixed_latency(sm)
|
||||
}
|
||||
|
||||
pub fn needs_yield(&self) -> bool {
|
||||
matches!(&self.op, Op::Bar(_) | Op::BSync(_))
|
||||
}
|
||||
|
|
@ -7507,6 +7500,53 @@ pub trait ShaderModel {
|
|||
|
||||
fn op_can_be_uniform(&self, op: &Op) -> bool;
|
||||
|
||||
// Scheduling information
|
||||
fn op_needs_scoreboard(&self, op: &Op) -> bool {
|
||||
!op.has_fixed_latency(self.sm())
|
||||
}
|
||||
|
||||
fn exec_latency(&self, op: &Op) -> u32 {
|
||||
sched_common::exec_latency(self.sm(), op)
|
||||
}
|
||||
|
||||
fn raw_latency(
|
||||
&self,
|
||||
write: &Op,
|
||||
dst_idx: usize,
|
||||
read: &Op,
|
||||
src_idx: usize,
|
||||
) -> u32 {
|
||||
sched_common::raw_latency(self.sm(), write, dst_idx, read, src_idx)
|
||||
}
|
||||
|
||||
fn war_latency(
|
||||
&self,
|
||||
read: &Op,
|
||||
src_idx: usize,
|
||||
write: &Op,
|
||||
dst_idx: usize,
|
||||
) -> u32 {
|
||||
sched_common::war_latency(self.sm(), read, src_idx, write, dst_idx)
|
||||
}
|
||||
|
||||
fn waw_latency(
|
||||
&self,
|
||||
a: &Op,
|
||||
a_dst_idx: usize,
|
||||
b: &Op,
|
||||
b_dst_idx: usize,
|
||||
) -> u32 {
|
||||
sched_common::waw_latency(self.sm(), a, a_dst_idx, b, b_dst_idx)
|
||||
}
|
||||
|
||||
fn paw_latency(&self, write: &Op, dst_idx: usize) -> u32 {
|
||||
sched_common::paw_latency(self.sm(), write, dst_idx)
|
||||
}
|
||||
|
||||
fn worst_latency(&self, write: &Op, dst_idx: usize) -> u32 {
|
||||
sched_common::instr_latency(self.sm(), write, dst_idx)
|
||||
}
|
||||
|
||||
fn legalize_op(&self, b: &mut LegalizeBuilder, op: &mut Op);
|
||||
fn encode_shader(&self, s: &Shader<'_>) -> Vec<u32>;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,10 +3,7 @@
|
|||
|
||||
use crate::ir::*;
|
||||
use crate::opt_instr_sched_common::*;
|
||||
use crate::sched_common::{
|
||||
exec_latency, instr_latency, paw_latency, raw_latency, war_latency,
|
||||
waw_latency, RegTracker,
|
||||
};
|
||||
use crate::sched_common::RegTracker;
|
||||
use std::cmp::max;
|
||||
use std::cmp::Reverse;
|
||||
use std::collections::BinaryHeap;
|
||||
|
|
@ -74,29 +71,18 @@ fn generate_dep_graph(
|
|||
|
||||
uses.for_each_instr_dst_mut(instr, |i, u| {
|
||||
if let Some((w_ip, w_dst_idx)) = u.write {
|
||||
let latency = waw_latency(
|
||||
sm.sm(),
|
||||
&instr.op,
|
||||
i,
|
||||
&instrs[w_ip].op,
|
||||
w_dst_idx,
|
||||
);
|
||||
let latency =
|
||||
sm.waw_latency(&instr.op, i, &instrs[w_ip].op, w_dst_idx);
|
||||
g.add_edge(ip, w_ip, EdgeLabel { latency });
|
||||
}
|
||||
|
||||
for &(r_ip, r_src_idx) in &u.reads {
|
||||
let mut latency = if r_src_idx == usize::MAX {
|
||||
paw_latency(sm.sm(), &instr.op, i)
|
||||
sm.paw_latency(&instr.op, i)
|
||||
} else {
|
||||
raw_latency(
|
||||
sm.sm(),
|
||||
&instr.op,
|
||||
i,
|
||||
&instrs[r_ip].op,
|
||||
r_src_idx,
|
||||
)
|
||||
sm.raw_latency(&instr.op, i, &instrs[r_ip].op, r_src_idx)
|
||||
};
|
||||
if instr.needs_scoreboard(sm.sm()) {
|
||||
if sm.op_needs_scoreboard(&instr.op) {
|
||||
latency = max(
|
||||
latency,
|
||||
estimate_variable_latency(sm.sm(), &instr.op),
|
||||
|
|
@ -107,13 +93,8 @@ fn generate_dep_graph(
|
|||
});
|
||||
uses.for_each_instr_src_mut(instr, |i, u| {
|
||||
if let Some((w_ip, w_dst_idx)) = u.write {
|
||||
let latency = war_latency(
|
||||
sm.sm(),
|
||||
&instr.op,
|
||||
i,
|
||||
&instrs[w_ip].op,
|
||||
w_dst_idx,
|
||||
);
|
||||
let latency =
|
||||
sm.war_latency(&instr.op, i, &instrs[w_ip].op, w_dst_idx);
|
||||
g.add_edge(ip, w_ip, EdgeLabel { latency });
|
||||
}
|
||||
});
|
||||
|
|
@ -131,16 +112,16 @@ fn generate_dep_graph(
|
|||
|
||||
// Initialize this node's distance to the end
|
||||
let mut ready_cycle = (0..instr.dsts().len())
|
||||
.map(|i| instr_latency(sm.sm(), &instr.op, i))
|
||||
.map(|i| sm.worst_latency(&instr.op, i))
|
||||
.max()
|
||||
.unwrap_or(0);
|
||||
if instr.needs_scoreboard(sm.sm()) {
|
||||
if sm.op_needs_scoreboard(&instr.op) {
|
||||
let var_latency = estimate_variable_latency(sm.sm(), &instr.op)
|
||||
+ exec_latency(sm.sm(), &instrs[instrs.len() - 1].op);
|
||||
+ sm.exec_latency(&instrs[instrs.len() - 1].op);
|
||||
ready_cycle = max(ready_cycle, var_latency);
|
||||
}
|
||||
let label = &mut g.nodes[ip].label;
|
||||
label.exec_latency = exec_latency(sm.sm(), &instr.op);
|
||||
label.exec_latency = sm.exec_latency(&instr.op);
|
||||
label.ready_cycle = ready_cycle;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue