diff --git a/src/nouveau/compiler/nak.rs b/src/nouveau/compiler/nak.rs index 4663ea46374..7086f9693f5 100644 --- a/src/nouveau/compiler/nak.rs +++ b/src/nouveau/compiler/nak.rs @@ -5,6 +5,7 @@ mod bitset; mod nak_assign_regs; +mod nak_calc_instr_deps; mod nak_encode_tu102; mod nak_from_nir; mod nak_ir; @@ -259,6 +260,7 @@ pub extern "C" fn nak_compile_shader( s.assign_regs_trivial(); s.lower_vec_split(); + s.calc_instr_deps(); s.lower_zero_to_gpr255(); println!("NAK IR:\n{}", &s); diff --git a/src/nouveau/compiler/nak_calc_instr_deps.rs b/src/nouveau/compiler/nak_calc_instr_deps.rs new file mode 100644 index 00000000000..2b2a3a2c913 --- /dev/null +++ b/src/nouveau/compiler/nak_calc_instr_deps.rs @@ -0,0 +1,250 @@ +/* + * Copyright © 2022 Collabora, Ltd. + * SPDX-License-Identifier: MIT + */ + +#![allow(unstable_name_collisions)] + +use crate::nak_ir::*; +use crate::util::NextMultipleOf; + +use std::cmp::max; +use std::ops::Range; + +struct RegTracker { + reg: [T; 255], + ureg: [T; 63], + pred: [T; 6], + upred: [T; 6], +} + +impl RegTracker { + pub fn new(v: T) -> Self { + Self { + reg: [v; 255], + ureg: [v; 63], + pred: [v; 6], + upred: [v; 6], + } + } + + fn get(&self, reg: &RegRef) -> &[T] { + let range = reg.idx_range(); + let range = Range { + start: usize::from(range.start), + end: usize::from(range.end), + }; + + match reg.file() { + RegFile::GPR => &self.reg[range], + RegFile::UGPR => &self.ureg[range], + RegFile::Pred => &self.pred[range], + RegFile::UPred => &self.upred[range], + } + } + + fn get_mut(&mut self, reg: &RegRef) -> &mut [T] { + let range = reg.idx_range(); + let range = Range { + start: usize::from(range.start), + end: usize::from(range.end), + }; + + match reg.file() { + RegFile::GPR => &mut self.reg[range], + RegFile::UGPR => &mut self.ureg[range], + RegFile::Pred => &mut self.pred[range], + RegFile::UPred => &mut self.upred[range], + } + } +} + +struct AllocBarriers { + active: u8, + tracker: RegTracker, +} + +impl AllocBarriers { + pub fn new() -> AllocBarriers { + AllocBarriers { + active: 0, + tracker: RegTracker::new(-1), + } + } + + fn alloc_barrier(&mut self) -> i8 { + let bar = self.active.trailing_ones(); + assert!(bar < 6); + self.active |= 1 << bar; + bar.try_into().unwrap() + } + + fn free_barrier(&mut self, bar: i8) { + self.free_barrier_mask(1 << bar); + } + + fn free_barrier_mask(&mut self, bar_mask: u8) { + //assert!(bar_mask < (1 << 7)); + //assert!((bar_mask & !self.active) == 0); + //self.active &= !bar_mask; + } + + fn reg_barrier_mask(&self, reg: &RegRef) -> u8 { + self.tracker + .get(reg) + .iter() + .map(|i| if i < &0 { 0 } else { 1_u8 << i }) + .reduce(|a, x| a | x) + .unwrap_or(0) + & self.active + } + + fn set_reg_barrier(&mut self, reg: &RegRef, bar: i8) { + for b in self.tracker.get_mut(reg) { + *b = bar; + } + } + + fn instr_read_barrier_mask(&self, instr: &Instr) -> u8 { + let mut bar_mask = 0_u8; + for src in instr.srcs() { + if let Some(reg) = src.get_reg() { + bar_mask |= self.reg_barrier_mask(reg); + } + } + bar_mask + } + + fn set_instr_read_barrier(&mut self, instr: &Instr, bar: i8) { + for src in instr.srcs() { + if let Some(reg) = src.get_reg() { + self.set_reg_barrier(reg, bar); + } + } + } + + fn instr_write_barrier_mask(&self, instr: &Instr) -> u8 { + let mut bar_mask = 0_u8; + for dst in instr.dsts() { + if let Some(reg) = dst.as_reg() { + bar_mask |= self.reg_barrier_mask(reg); + } + } + bar_mask + } + + fn set_instr_write_barrier(&mut self, instr: &Instr, bar: i8) { + for dst in instr.dsts() { + if let Some(reg) = dst.as_reg() { + self.set_reg_barrier(reg, bar); + } + } + } + + pub fn alloc_barriers(&mut self, s: &mut Shader) { + for f in &mut s.functions { + for b in &mut f.blocks.iter_mut() { + for instr in &mut b.instrs.iter_mut() { + /* TODO: Don't barrier read-after-read */ + let wait = self.instr_read_barrier_mask(instr) + | self.instr_write_barrier_mask(instr); + instr.deps.add_wt_bar_mask(wait); + self.free_barrier_mask(wait); + + if instr.get_latency().is_some() { + continue; + } + + if instr.num_srcs() > 0 { + let bar = self.alloc_barrier(); + instr.deps.set_rd_bar(bar.try_into().unwrap()); + self.set_instr_read_barrier(instr, bar); + } + if instr.num_dsts() > 0 { + let bar = self.alloc_barrier(); + instr.deps.set_wr_bar(bar.try_into().unwrap()); + self.set_instr_write_barrier(instr, bar); + } + } + } + } + } +} + +struct CalcDelay { + cycle: u32, + ready: RegTracker, +} + +impl CalcDelay { + pub fn new() -> CalcDelay { + CalcDelay { + cycle: 0, + ready: RegTracker::new(0), + } + } + + fn set_reg_ready(&mut self, reg: &RegRef, ready: u32) { + for r in self.ready.get_mut(reg) { + assert!(*r <= ready); + *r = ready; + } + } + + fn reg_ready(&self, reg: &RegRef) -> u32 { + *self.ready.get(reg).iter().max().unwrap_or(&0_u32) + } + + fn instr_dsts_ready(&self, instr: &Instr) -> u32 { + instr + .dsts() + .iter() + .map(|dst| match dst { + Dst::Zero => 0, + Dst::Reg(reg) => self.reg_ready(reg), + _ => panic!("Should be run after RA"), + }) + .max() + .unwrap_or(0) + } + + fn set_instr_ready(&mut self, instr: &Instr, ready: u32) { + for src in instr.srcs() { + if let Some(reg) = src.get_reg() { + self.set_reg_ready(reg, ready); + } + } + } + + fn calc_instr_delay(&mut self, instr: &mut Instr) { + let mut ready = self.cycle + 1; /* TODO: co-issue */ + if let Some(latency) = instr.get_latency() { + ready = max(ready, self.instr_dsts_ready(instr) + latency); + } + + self.set_instr_ready(instr, ready); + + let delay = ready - self.cycle; + let delay = delay.clamp(MIN_INSTR_DELAY.into(), MAX_INSTR_DELAY.into()); + instr.deps.set_delay(u8::try_from(delay).unwrap()); + + self.cycle = ready; + } + + pub fn calc_delay(&mut self, s: &mut Shader) { + for f in &mut s.functions { + for b in &mut f.blocks.iter_mut().rev() { + for instr in &mut b.instrs.iter_mut().rev() { + self.calc_instr_delay(instr); + } + } + } + } +} + +impl Shader { + pub fn calc_instr_deps(&mut self) { + AllocBarriers::new().alloc_barriers(self); + CalcDelay::new().calc_delay(self); + } +} diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs index e4747e4daed..9ab969cd0af 100644 --- a/src/nouveau/compiler/nak_ir.rs +++ b/src/nouveau/compiler/nak_ir.rs @@ -508,8 +508,8 @@ impl fmt::Display for MemAccess { } } -const MIN_INSTR_DELAY: u8 = 1; -const MAX_INSTR_DELAY: u8 = 15; +pub const MIN_INSTR_DELAY: u8 = 1; +pub const MAX_INSTR_DELAY: u8 = 15; pub struct InstrDeps { pub delay: u8, @@ -660,13 +660,7 @@ impl Instr { out_load: false, flags: 0, }; - let mut instr = Instr::new( - Opcode::ALD(attr), - slice::from_ref(&dst), - &[vtx, offset], - ); - instr.deps.set_wr_bar(0); - instr + Instr::new(Opcode::ALD(attr), slice::from_ref(&dst), &[vtx, offset]) } pub fn new_ast(attr_addr: u16, data: Src, vtx: Src, offset: Src) -> Instr { @@ -677,12 +671,7 @@ impl Instr { out_load: false, flags: 0, }; - let mut instr = - Instr::new(Opcode::AST(attr), &[], &[data, vtx, offset]); - instr.deps.set_delay(2); - instr.deps.set_rd_bar(0); - instr.deps.add_wt_bar(0); - instr + Instr::new(Opcode::AST(attr), &[], &[data, vtx, offset]) } pub fn new_ld(dst: Dst, access: MemAccess, addr: Src) -> Instr { @@ -757,6 +746,29 @@ impl Instr { _ => true, } } + + pub fn get_latency(&self) -> Option { + match self.op { + Opcode::FADD + | Opcode::FFMA + | Opcode::FMNMX + | Opcode::FMUL + | Opcode::IADD3 + | Opcode::SHL => Some(6), + Opcode::MOV => Some(15), + Opcode::S2R(_) => None, + Opcode::ALD(_) => None, + Opcode::AST(_) => Some(15), + Opcode::LD(_) => None, + Opcode::ST(_) => None, + Opcode::EXIT => Some(15), + Opcode::NOOP + | Opcode::META(_) + | Opcode::VEC + | Opcode::SPLIT + | Opcode::FS_OUT => panic!("Not a hardware opcode"), + } + } } pub struct MetaInstr {