mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 05:10:11 +01:00
nak: Add a prepass instruction scheduler
Totals: CodeSize: 5750619392 -> 5817868528 (+1.17%); split: -0.32%, +1.49% Number of GPRs: 16276896 -> 16342962 (+0.41%); split: -1.00%, +1.41% SLM Size: 8927212 -> 8739732 (-2.10%); split: -2.59%, +0.49% Static cycle count: 1497053946 -> 1412275595 (-5.66%); split: -6.00%, +0.33% Spills to memory: 14248182 -> 14157708 (-0.63%); split: -1.25%, +0.62% Fills from memory: 14248182 -> 14157708 (-0.63%); split: -1.25%, +0.62% Spills to reg: 9143000 -> 9042885 (-1.09%); split: -1.22%, +0.13% Fills from reg: 6892354 -> 6808724 (-1.21%); split: -1.33%, +0.12% Max warps/SM: 6482016 -> 6567500 (+1.32%); split: +1.40%, -0.08% Totals from 189431 (96.40% of 196502) affected shaders: CodeSize: 5739697280 -> 5806946416 (+1.17%); split: -0.32%, +1.50% Number of GPRs: 16114477 -> 16180543 (+0.41%); split: -1.01%, +1.42% SLM Size: 8927180 -> 8739700 (-2.10%); split: -2.59%, +0.49% Static cycle count: 1495006918 -> 1410228567 (-5.67%); split: -6.00%, +0.33% Spills to memory: 14248182 -> 14157708 (-0.63%); split: -1.25%, +0.62% Fills from memory: 14248182 -> 14157708 (-0.63%); split: -1.25%, +0.62% Spills to reg: 9141040 -> 9040925 (-1.10%); split: -1.23%, +0.13% Fills from reg: 6890401 -> 6806771 (-1.21%); split: -1.34%, +0.12% Max warps/SM: 6149140 -> 6234624 (+1.39%); split: +1.47%, -0.08% Reviewed-by: Mary Guillemard <mary@mary.zone> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33306>
This commit is contained in:
parent
5caee114ec
commit
b55b8da012
5 changed files with 1202 additions and 6 deletions
|
|
@ -450,6 +450,7 @@ fn nak_compile_shader_internal(
|
||||||
pass!(s, opt_out);
|
pass!(s, opt_out);
|
||||||
pass!(s, legalize);
|
pass!(s, legalize);
|
||||||
pass!(s, opt_dce);
|
pass!(s, opt_dce);
|
||||||
|
pass!(s, opt_instr_sched_prepass);
|
||||||
pass!(s, assign_regs);
|
pass!(s, assign_regs);
|
||||||
pass!(s, lower_par_copies);
|
pass!(s, lower_par_copies);
|
||||||
pass!(s, lower_copy_swap);
|
pass!(s, lower_copy_swap);
|
||||||
|
|
|
||||||
|
|
@ -327,7 +327,7 @@ impl Iterator for RegFileSet {
|
||||||
///
|
///
|
||||||
/// This is used by several passes which need to replicate a data structure
|
/// This is used by several passes which need to replicate a data structure
|
||||||
/// per-register-file.
|
/// per-register-file.
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
pub struct PerRegFile<T> {
|
pub struct PerRegFile<T> {
|
||||||
per_file: [T; NUM_REG_FILES],
|
per_file: [T; NUM_REG_FILES],
|
||||||
}
|
}
|
||||||
|
|
@ -8264,6 +8264,167 @@ impl Op {
|
||||||
_ => false,
|
_ => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn is_virtual(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
// Float ALU
|
||||||
|
Op::F2FP(_)
|
||||||
|
| Op::FAdd(_)
|
||||||
|
| Op::FFma(_)
|
||||||
|
| Op::FMnMx(_)
|
||||||
|
| Op::FMul(_)
|
||||||
|
| Op::FSet(_)
|
||||||
|
| Op::FSetP(_)
|
||||||
|
| Op::HAdd2(_)
|
||||||
|
| Op::HFma2(_)
|
||||||
|
| Op::HMul2(_)
|
||||||
|
| Op::HSet2(_)
|
||||||
|
| Op::HSetP2(_)
|
||||||
|
| Op::HMnMx2(_)
|
||||||
|
| Op::FSwz(_)
|
||||||
|
| Op::FSwzAdd(_) => false,
|
||||||
|
|
||||||
|
// Multi-function unit
|
||||||
|
Op::Rro(_) | Op::MuFu(_) => false,
|
||||||
|
|
||||||
|
// Double-precision float ALU
|
||||||
|
Op::DAdd(_)
|
||||||
|
| Op::DFma(_)
|
||||||
|
| Op::DMnMx(_)
|
||||||
|
| Op::DMul(_)
|
||||||
|
| Op::DSetP(_) => false,
|
||||||
|
|
||||||
|
// Matrix Multiply Add
|
||||||
|
Op::Imma(_) | Op::Hmma(_) | Op::Ldsm(_) | Op::Movm(_) => false,
|
||||||
|
|
||||||
|
// Integer ALU
|
||||||
|
Op::BRev(_)
|
||||||
|
| Op::Flo(_)
|
||||||
|
| Op::PopC(_)
|
||||||
|
| Op::IMad(_)
|
||||||
|
| Op::IMul(_)
|
||||||
|
| Op::BMsk(_)
|
||||||
|
| Op::IAbs(_)
|
||||||
|
| Op::IAdd2(_)
|
||||||
|
| Op::IAdd2X(_)
|
||||||
|
| Op::IAdd3(_)
|
||||||
|
| Op::IAdd3X(_)
|
||||||
|
| Op::IDp4(_)
|
||||||
|
| Op::IMad64(_)
|
||||||
|
| Op::IMnMx(_)
|
||||||
|
| Op::ISetP(_)
|
||||||
|
| Op::Lea(_)
|
||||||
|
| Op::LeaX(_)
|
||||||
|
| Op::Lop2(_)
|
||||||
|
| Op::Lop3(_)
|
||||||
|
| Op::SuClamp(_)
|
||||||
|
| Op::SuBfm(_)
|
||||||
|
| Op::SuEau(_)
|
||||||
|
| Op::IMadSp(_)
|
||||||
|
| Op::Shf(_)
|
||||||
|
| Op::Shl(_)
|
||||||
|
| Op::Shr(_)
|
||||||
|
| Op::Bfe(_) => false,
|
||||||
|
|
||||||
|
// Conversions
|
||||||
|
Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::I2I(_) | Op::FRnd(_) => {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Move ops
|
||||||
|
Op::Mov(_)
|
||||||
|
| Op::Prmt(_)
|
||||||
|
| Op::Sel(_)
|
||||||
|
| Op::Sgxt(_)
|
||||||
|
| Op::Shfl(_) => false,
|
||||||
|
|
||||||
|
// Predicate ops
|
||||||
|
Op::PLop3(_) | Op::PSetP(_) => false,
|
||||||
|
|
||||||
|
// Uniform ops
|
||||||
|
Op::R2UR(op) => {
|
||||||
|
op.src.is_uniform() || op.dst.file() == Some(RegFile::UPred)
|
||||||
|
}
|
||||||
|
Op::Redux(_) => false,
|
||||||
|
|
||||||
|
// Texture ops
|
||||||
|
Op::Tex(_)
|
||||||
|
| Op::Tld(_)
|
||||||
|
| Op::Tld4(_)
|
||||||
|
| Op::Tmml(_)
|
||||||
|
| Op::Txd(_)
|
||||||
|
| Op::Txq(_) => false,
|
||||||
|
|
||||||
|
// Surface ops
|
||||||
|
Op::SuLd(_)
|
||||||
|
| Op::SuSt(_)
|
||||||
|
| Op::SuAtom(_)
|
||||||
|
| Op::SuLdGa(_)
|
||||||
|
| Op::SuStGa(_) => false,
|
||||||
|
|
||||||
|
// Memory ops
|
||||||
|
Op::Ld(_)
|
||||||
|
| Op::Ldc(_)
|
||||||
|
| Op::LdSharedLock(_)
|
||||||
|
| Op::St(_)
|
||||||
|
| Op::StSCheckUnlock(_)
|
||||||
|
| Op::Atom(_)
|
||||||
|
| Op::AL2P(_)
|
||||||
|
| Op::ALd(_)
|
||||||
|
| Op::ASt(_)
|
||||||
|
| Op::Ipa(_)
|
||||||
|
| Op::CCtl(_)
|
||||||
|
| Op::LdTram(_)
|
||||||
|
| Op::MemBar(_) => false,
|
||||||
|
|
||||||
|
// Control-flow ops
|
||||||
|
Op::BClear(_)
|
||||||
|
| Op::Break(_)
|
||||||
|
| Op::BSSy(_)
|
||||||
|
| Op::BSync(_)
|
||||||
|
| Op::SSy(_)
|
||||||
|
| Op::Sync(_)
|
||||||
|
| Op::Brk(_)
|
||||||
|
| Op::PBk(_)
|
||||||
|
| Op::Cont(_)
|
||||||
|
| Op::PCnt(_)
|
||||||
|
| Op::Bra(_)
|
||||||
|
| Op::Exit(_)
|
||||||
|
| Op::WarpSync(_) => false,
|
||||||
|
|
||||||
|
// Barrier
|
||||||
|
Op::BMov(_) => false,
|
||||||
|
|
||||||
|
// Geometry ops
|
||||||
|
Op::Out(_) | Op::OutFinal(_) => false,
|
||||||
|
|
||||||
|
// Miscellaneous ops
|
||||||
|
Op::Bar(_)
|
||||||
|
| Op::TexDepBar(_)
|
||||||
|
| Op::CS2R(_)
|
||||||
|
| Op::Isberd(_)
|
||||||
|
| Op::ViLd(_)
|
||||||
|
| Op::Kill(_)
|
||||||
|
| Op::PixLd(_)
|
||||||
|
| Op::S2R(_)
|
||||||
|
| Op::Match(_)
|
||||||
|
| Op::Nop(_)
|
||||||
|
| Op::Vote(_) => false,
|
||||||
|
|
||||||
|
// Virtual ops
|
||||||
|
Op::Undef(_)
|
||||||
|
| Op::SrcBar(_)
|
||||||
|
| Op::PhiSrcs(_)
|
||||||
|
| Op::PhiDsts(_)
|
||||||
|
| Op::Copy(_)
|
||||||
|
| Op::Pin(_)
|
||||||
|
| Op::Unpin(_)
|
||||||
|
| Op::Swap(_)
|
||||||
|
| Op::ParCopy(_)
|
||||||
|
| Op::RegOut(_)
|
||||||
|
| Op::Annotate(_) => true,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
|
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
|
||||||
|
|
@ -8523,6 +8684,13 @@ impl Instr {
|
||||||
self.op.src_types()
|
self.op.src_types()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn ssa_uses(&self) -> impl Iterator<Item = &SSAValue> {
|
||||||
|
self.srcs()
|
||||||
|
.iter()
|
||||||
|
.flat_map(|src| src.iter_ssa())
|
||||||
|
.chain(self.pred.pred_ref.iter_ssa())
|
||||||
|
}
|
||||||
|
|
||||||
pub fn for_each_ssa_use(&self, mut f: impl FnMut(&SSAValue)) {
|
pub fn for_each_ssa_use(&self, mut f: impl FnMut(&SSAValue)) {
|
||||||
for ssa in self.pred.iter_ssa() {
|
for ssa in self.pred.iter_ssa() {
|
||||||
f(ssa);
|
f(ssa);
|
||||||
|
|
@ -9435,6 +9603,7 @@ pub fn max_warps_per_sm(sm: &ShaderModelInfo, gprs: u32) -> u32 {
|
||||||
// TODO: Take local_size and shared mem limit into account for compute
|
// TODO: Take local_size and shared mem limit into account for compute
|
||||||
let total_regs: u32 = 65536;
|
let total_regs: u32 = 65536;
|
||||||
// GPRs are allocated in multiples of 8
|
// GPRs are allocated in multiples of 8
|
||||||
|
let gprs = max(gprs, 1);
|
||||||
let gprs = gprs.next_multiple_of(8);
|
let gprs = gprs.next_multiple_of(8);
|
||||||
let max_warps = prev_multiple_of((total_regs / 32) / gprs, 4);
|
let max_warps = prev_multiple_of((total_regs / 32) / gprs, 4);
|
||||||
min(max_warps, sm.warps_per_sm.into())
|
min(max_warps, sm.warps_per_sm.into())
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@ mod opt_crs;
|
||||||
mod opt_dce;
|
mod opt_dce;
|
||||||
mod opt_instr_sched_common;
|
mod opt_instr_sched_common;
|
||||||
mod opt_instr_sched_postpass;
|
mod opt_instr_sched_postpass;
|
||||||
|
mod opt_instr_sched_prepass;
|
||||||
mod opt_jump_thread;
|
mod opt_jump_thread;
|
||||||
mod opt_lop;
|
mod opt_lop;
|
||||||
mod opt_out;
|
mod opt_out;
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ use compiler::dataflow::BackwardDataflow;
|
||||||
use rustc_hash::{FxHashMap, FxHashSet};
|
use rustc_hash::{FxHashMap, FxHashSet};
|
||||||
use std::cmp::{max, min, Ord, Ordering};
|
use std::cmp::{max, min, Ord, Ordering};
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone, Default)]
|
||||||
pub struct LiveSet {
|
pub struct LiveSet {
|
||||||
live: PerRegFile<u32>,
|
live: PerRegFile<u32>,
|
||||||
set: FxHashSet<SSAValue>,
|
set: FxHashSet<SSAValue>,
|
||||||
|
|
@ -16,10 +16,12 @@ pub struct LiveSet {
|
||||||
|
|
||||||
impl LiveSet {
|
impl LiveSet {
|
||||||
pub fn new() -> LiveSet {
|
pub fn new() -> LiveSet {
|
||||||
LiveSet {
|
Default::default()
|
||||||
live: Default::default(),
|
}
|
||||||
set: Default::default(),
|
|
||||||
}
|
pub fn clear(&mut self) {
|
||||||
|
self.live = Default::default();
|
||||||
|
self.set.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn contains(&self, ssa: &SSAValue) -> bool {
|
pub fn contains(&self, ssa: &SSAValue) -> bool {
|
||||||
|
|
|
||||||
1023
src/nouveau/compiler/nak/opt_instr_sched_prepass.rs
Normal file
1023
src/nouveau/compiler/nak/opt_instr_sched_prepass.rs
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Reference in a new issue