diff --git a/src/nouveau/compiler/nak.rs b/src/nouveau/compiler/nak.rs index 3016a760963..b791d9167e8 100644 --- a/src/nouveau/compiler/nak.rs +++ b/src/nouveau/compiler/nak.rs @@ -20,6 +20,7 @@ mod nak_opt_copy_prop; mod nak_opt_dce; mod nak_opt_lop; mod nak_repair_ssa; +mod nak_spill_values; mod nak_to_cssa; mod nir; mod util; diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs index 85bed4a1a70..30f08e9048b 100644 --- a/src/nouveau/compiler/nak_ir.rs +++ b/src/nouveau/compiler/nak_ir.rs @@ -97,6 +97,18 @@ impl RegFile { } } +impl fmt::Display for RegFile { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + RegFile::GPR => write!(f, "GPR"), + RegFile::UGPR => write!(f, "UGPR"), + RegFile::Pred => write!(f, "Pred"), + RegFile::UPred => write!(f, "UPred"), + RegFile::Mem => write!(f, "Mem"), + } + } +} + impl From for u8 { fn from(value: RegFile) -> u8 { value as u8 @@ -913,6 +925,22 @@ impl Src { } } + pub fn as_bool(&self) -> Option { + match self.src_ref { + SrcRef::True => Some(!self.src_mod.is_bnot()), + SrcRef::False => Some(self.src_mod.is_bnot()), + SrcRef::SSA(vec) => { + assert!(vec.is_predicate() && vec.comps() == 1); + None + } + SrcRef::Reg(reg) => { + assert!(reg.is_predicate() && reg.comps() == 1); + None + } + _ => panic!("Not a boolean source"), + } + } + pub fn get_reg(&self) -> Option<&RegRef> { self.src_ref.get_reg() } diff --git a/src/nouveau/compiler/nak_spill_values.rs b/src/nouveau/compiler/nak_spill_values.rs new file mode 100644 index 00000000000..5ce2909dadf --- /dev/null +++ b/src/nouveau/compiler/nak_spill_values.rs @@ -0,0 +1,793 @@ +// Copyright © 2023 Collabora, Ltd. +// SPDX-License-Identifier: MIT + +#![allow(unstable_name_collisions)] + +use crate::bitset::BitSet; +use crate::nak_ir::*; +use crate::nak_liveness::{ + BlockLiveness, LiveSet, Liveness, NextUseBlockLiveness, NextUseLiveness, +}; +use crate::{GetDebugFlags, DEBUG}; + +use std::cell::RefCell; +use std::cmp::{max, Ordering, Reverse}; +use std::collections::{BinaryHeap, HashMap, HashSet}; + +struct PhiDstMap { + ssa_phi: HashMap, +} + +impl PhiDstMap { + fn new() -> PhiDstMap { + PhiDstMap { + ssa_phi: HashMap::new(), + } + } + + fn add_phi_dst(&mut self, phi_idx: u32, dst: Dst) { + let vec = dst.as_ssa().expect("Not an SSA destination"); + debug_assert!(vec.comps() == 1); + self.ssa_phi.insert(vec[0], phi_idx); + } + + pub fn from_block(block: &BasicBlock) -> PhiDstMap { + let mut map = PhiDstMap::new(); + if let Some(phi) = block.phi_dsts() { + for (idx, dst) in phi.dsts.iter() { + map.add_phi_dst(*idx, *dst); + } + } + map + } + + fn get_phi_idx(&self, ssa: &SSAValue) -> Option<&u32> { + self.ssa_phi.get(ssa) + } +} + +struct PhiSrcMap { + phi_src: HashMap, +} + +impl PhiSrcMap { + fn new() -> PhiSrcMap { + PhiSrcMap { + phi_src: HashMap::new(), + } + } + + fn add_phi_src(&mut self, phi_idx: u32, src: Src) { + debug_assert!(src.src_mod.is_none()); + let vec = src.src_ref.as_ssa().expect("Not an SSA source"); + debug_assert!(vec.comps() == 1); + self.phi_src.insert(phi_idx, vec[0]); + } + + pub fn from_block(block: &BasicBlock) -> PhiSrcMap { + let mut map = PhiSrcMap::new(); + if let Some(phi) = block.phi_srcs() { + for (idx, src) in phi.srcs.iter() { + map.add_phi_src(*idx, *src); + } + } + map + } + + pub fn get_src_ssa(&self, phi_idx: &u32) -> &SSAValue { + self.phi_src.get(phi_idx).expect("Phi source missing") + } +} + +trait Spill { + fn spill_file(&self, file: RegFile) -> RegFile; + fn spill(&self, dst: SSAValue, src: Src) -> Box; + fn fill(&self, dst: Dst, src: SSAValue) -> Box; +} + +struct SpillPred {} + +impl SpillPred { + fn new() -> Self { + Self {} + } +} + +impl Spill for SpillPred { + fn spill_file(&self, file: RegFile) -> RegFile { + match file { + RegFile::Pred => RegFile::GPR, + RegFile::UPred => RegFile::UGPR, + _ => panic!("Unsupported register file"), + } + } + + fn spill(&self, dst: SSAValue, src: Src) -> Box { + assert!(dst.file() == RegFile::GPR); + if let Some(b) = src.as_bool() { + let u32_src = if b { + Src::new_imm_u32(!0) + } else { + Src::new_zero() + }; + Instr::new_boxed(OpCopy { + dst: dst.into(), + src: u32_src, + }) + } else { + Instr::new_boxed(OpSel { + dst: dst.into(), + cond: src.bnot(), + srcs: [Src::new_zero(), Src::new_imm_u32(!0)], + }) + } + } + + fn fill(&self, dst: Dst, src: SSAValue) -> Box { + assert!(src.file() == RegFile::GPR); + Instr::new_boxed(OpISetP { + dst: dst, + set_op: PredSetOp::And, + cmp_op: IntCmpOp::Ne, + cmp_type: IntCmpType::U32, + srcs: [src.into(), Src::new_zero()], + accum: SrcRef::True.into(), + }) + } +} + +struct SpillGPR {} + +impl SpillGPR { + fn new() -> Self { + Self {} + } +} + +impl Spill for SpillGPR { + fn spill_file(&self, file: RegFile) -> RegFile { + assert!(file == RegFile::GPR); + RegFile::Mem + } + + fn spill(&self, dst: SSAValue, src: Src) -> Box { + assert!(dst.file() == RegFile::Mem); + Instr::new_boxed(OpCopy { + dst: dst.into(), + src: src, + }) + } + + fn fill(&self, dst: Dst, src: SSAValue) -> Box { + assert!(src.file() == RegFile::Mem); + Instr::new_boxed(OpCopy { + dst: dst, + src: src.into(), + }) + } +} + +#[derive(Eq, PartialEq)] +struct SSANextUse { + ssa: SSAValue, + next_use: usize, +} + +impl SSANextUse { + fn new(ssa: SSAValue, next_use: usize) -> SSANextUse { + SSANextUse { + ssa: ssa, + next_use: next_use, + } + } +} + +impl Ord for SSANextUse { + fn cmp(&self, other: &Self) -> Ordering { + self.next_use + .cmp(&other.next_use) + .then_with(|| self.ssa.idx().cmp(&other.ssa.idx())) + } +} + +impl PartialOrd for SSANextUse { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +struct SpillCache<'a, S: Spill> { + alloc: &'a mut SSAValueAllocator, + spill: S, + val_spill: HashMap, +} + +impl<'a, S: Spill> SpillCache<'a, S> { + fn new(alloc: &'a mut SSAValueAllocator, spill: S) -> SpillCache<'a, S> { + SpillCache { + alloc: alloc, + spill: spill, + val_spill: HashMap::new(), + } + } + + fn get_spill(&mut self, ssa: SSAValue) -> SSAValue { + *self.val_spill.entry(ssa).or_insert_with(|| { + self.alloc.alloc(self.spill.spill_file(ssa.file())) + }) + } + + fn spill_src(&mut self, ssa: SSAValue, src: Src) -> Box { + let dst = self.get_spill(ssa); + self.spill.spill(dst, src) + } + + fn spill(&mut self, ssa: SSAValue) -> Box { + self.spill_src(ssa, ssa.into()) + } + + fn fill_dst(&mut self, dst: Dst, ssa: SSAValue) -> Box { + let src = self.get_spill(ssa); + self.spill.fill(dst, src) + } + + fn fill(&mut self, ssa: SSAValue) -> Box { + self.fill_dst(ssa.into(), ssa) + } +} + +struct SpillChooser<'a> { + bl: &'a NextUseBlockLiveness, + ip: usize, + count: usize, + spills: BinaryHeap>, + min_next_use: usize, +} + +struct SpillChoiceIter { + spills: BinaryHeap>, +} + +impl<'a> SpillChooser<'a> { + pub fn new(bl: &'a NextUseBlockLiveness, ip: usize, count: usize) -> Self { + Self { + bl: bl, + ip: ip, + count: count, + spills: BinaryHeap::new(), + min_next_use: ip + 1, + } + } + + pub fn add_candidate(&mut self, ssa: SSAValue) { + let next_use = self.bl.next_use_after_or_at_ip(&ssa, self.ip).unwrap(); + + // Ignore anything used sonner than spill options we've already + // rejected. + if next_use < self.min_next_use { + return; + } + + self.spills.push(Reverse(SSANextUse::new(ssa, next_use))); + + if self.spills.len() > self.count { + /* Because we reversed the heap, pop actually removes the + * one with the lowest next_use which is what we want here. + */ + let old = self.spills.pop().unwrap(); + debug_assert!(self.spills.len() == self.count); + self.min_next_use = max(self.min_next_use, old.0.next_use); + } + } +} + +impl<'a> IntoIterator for SpillChooser<'a> { + type Item = SSAValue; + type IntoIter = SpillChoiceIter; + + fn into_iter(self) -> SpillChoiceIter { + SpillChoiceIter { + spills: self.spills, + } + } +} + +impl Iterator for SpillChoiceIter { + type Item = SSAValue; + + fn size_hint(&self) -> (usize, Option) { + let len = self.spills.len(); + (len, Some(len)) + } + + fn next(&mut self) -> Option { + self.spills.pop().map(|x| x.0.ssa) + } +} + +#[derive(Clone)] +struct SSAState { + // The set of variables which currently exist in registers + w: LiveSet, + // The set of variables which have already been spilled. These don't need + // to be spilled again. + s: HashSet, +} + +fn spill_values( + func: &mut Function, + file: RegFile, + limit: u32, + spill: S, +) { + let files = RegFileSet::from_iter([file]); + let live = NextUseLiveness::for_function(func, &files); + let blocks = &mut func.blocks; + + // Record the set of SSA values used within each loop + let mut phi_dst_maps = Vec::new(); + let mut phi_src_maps = Vec::new(); + let mut loop_uses = HashMap::new(); + for b_idx in 0..blocks.len() { + phi_dst_maps.push(PhiDstMap::from_block(&blocks[b_idx])); + phi_src_maps.push(PhiSrcMap::from_block(&blocks[b_idx])); + + if let Some(lh_idx) = blocks.loop_header_index(b_idx) { + let uses = loop_uses + .entry(lh_idx) + .or_insert_with(|| RefCell::new(HashSet::new())); + let uses = uses.get_mut(); + + for instr in &blocks[b_idx].instrs { + instr.for_each_ssa_use(|ssa| { + if ssa.file() == file { + uses.insert(*ssa); + } + }); + } + } + } + + if !loop_uses.is_empty() { + // The previous loop only added values to the uses set for the + // inner-most loop. Propagate from inner loops to outer loops. + for b_idx in (0..blocks.len()).rev() { + let Some(uses) = loop_uses.get(&b_idx) else { + continue; + }; + let uses = uses.borrow(); + + let Some(dom) = blocks.dom_parent_index(b_idx) else { + continue; + }; + + let Some(dom_lh_idx) = blocks.loop_header_index(dom) else { + continue; + }; + + let mut parent_uses = + loop_uses.get(&dom_lh_idx).unwrap().borrow_mut(); + for ssa in uses.iter() { + parent_uses.insert(*ssa); + } + } + } + + let mut spill = SpillCache::new(&mut func.ssa_alloc, spill); + let mut spilled_phis = BitSet::new(); + + let mut ssa_state_in: Vec = Vec::new(); + let mut ssa_state_out: Vec = Vec::new(); + + for b_idx in 0..blocks.len() { + let bl = live.block_live(b_idx); + + let preds = blocks.pred_indices(b_idx).to_vec(); + let w = if preds.is_empty() { + // This is the start block so we start with nothing in + // registers. + LiveSet::new() + } else if preds.len() == 1 { + // If we only have one predecessor then it can't possibly be a + // loop header and we can just copy the predecessor's w. + assert!(!blocks.is_loop_header(b_idx)); + assert!(preds[0] < b_idx); + let p_w = &ssa_state_out[preds[0]].w; + LiveSet::from_iter( + p_w.iter().filter(|ssa| bl.is_live_in(ssa)).cloned(), + ) + } else if blocks.is_loop_header(b_idx) { + let mut i_b: HashSet = + HashSet::from_iter(bl.iter_live_in().cloned()); + + if let Some(phi) = blocks[b_idx].phi_dsts() { + for (_, dst) in phi.dsts.iter() { + if let Dst::SSA(vec) = dst { + assert!(vec.comps() == 1); + let ssa = vec[0]; + if ssa.file() == file { + i_b.insert(ssa); + } + } + } + } + + let lu = loop_uses.get(&b_idx).unwrap().borrow(); + let mut w = LiveSet::new(); + + let mut some = BinaryHeap::new(); + for ssa in i_b.iter() { + if lu.contains(ssa) { + let next_use = bl.first_use(ssa).unwrap(); + some.push(Reverse(SSANextUse::new(*ssa, next_use))); + } + } + while w.count(file) < limit.into() { + let Some(entry) = some.pop() else { + break; + }; + w.insert(entry.0.ssa); + } + + // If we still have room, consider values which aren't used + // inside the loop. + if w.count(file) < limit.into() { + for ssa in i_b.iter() { + debug_assert!(ssa.file() == file); + if !lu.contains(ssa) { + let next_use = bl.first_use(ssa).unwrap(); + some.push(Reverse(SSANextUse::new(*ssa, next_use))); + } + } + + while w.count(file) < limit.into() { + let Some(entry) = some.pop() else { + break; + }; + w.insert(entry.0.ssa); + } + } + + w + } else { + let phi_dst_map = &phi_dst_maps[b_idx]; + + struct SSAPredInfo { + num_preds: usize, + next_use: usize, + } + let mut live: HashMap = HashMap::new(); + + for p_idx in &preds { + let phi_src_map = &phi_src_maps[*p_idx]; + + for mut ssa in ssa_state_out[*p_idx].w.iter().cloned() { + if let Some(phi) = phi_dst_map.get_phi_idx(&ssa) { + ssa = *phi_src_map.get_src_ssa(phi); + } + + if let Some(next_use) = bl.first_use(&ssa) { + live.entry(ssa) + .and_modify(|e| e.num_preds += 1) + .or_insert_with(|| SSAPredInfo { + num_preds: 1, + next_use: next_use, + }); + } + } + } + + let mut w = LiveSet::new(); + let mut some = BinaryHeap::new(); + + for (ssa, info) in live.drain() { + if info.num_preds == preds.len() { + /* This one is in all the input sets */ + w.insert(ssa); + } else { + some.push(Reverse(SSANextUse::new(ssa, info.next_use))); + } + } + while w.count(file) < limit.into() { + let Some(entry) = some.pop() else { + break; + }; + let ssa = entry.0.ssa; + assert!(ssa.file() == file); + w.insert(ssa); + } + + w + }; + + let s = if preds.is_empty() { + HashSet::new() + } else if preds.len() == 1 { + let p_s = &ssa_state_out[preds[0]].s; + HashSet::from_iter( + p_s.iter().filter(|ssa| bl.is_live_in(ssa)).cloned(), + ) + } else { + HashSet::from_iter( + bl.iter_live_in().filter(|ssa| !w.contains(ssa)).cloned(), + ) + }; + + let mut b = SSAState { w: w, s: s }; + + assert!(ssa_state_in.len() == b_idx); + ssa_state_in.push(b.clone()); + + let bb = &mut blocks[b_idx]; + + let mut instrs = Vec::new(); + for (ip, mut instr) in bb.instrs.drain(..).enumerate() { + match &mut instr.op { + Op::PhiDsts(phi) => { + // For phis, anything that is not in W needs to be spilled + // by setting the destination to some spill value. + for (idx, dst) in phi.dsts.iter_mut() { + let vec = dst.as_ssa().unwrap(); + debug_assert!(vec.comps() == 1); + let ssa = &vec[0]; + + if ssa.file() == file && !b.w.contains(ssa) { + spilled_phis.insert((*idx).try_into().unwrap()); + b.s.insert(*ssa); + *dst = spill.get_spill(*ssa).into(); + } + } + } + Op::PhiSrcs(_) => { + // We handle phi sources later. For now, leave them be. + } + Op::ParCopy(pcopy) => { + let mut num_w_dsts = 0_u32; + for (dst, src) in pcopy.dsts_srcs.iter_mut() { + let dst_vec = dst.as_ssa().unwrap(); + debug_assert!(dst_vec.comps() == 1); + let dst_ssa = &dst_vec[0]; + + debug_assert!(src.src_mod.is_none()); + let src_vec = src.src_ref.as_ssa().unwrap(); + debug_assert!(src_vec.comps() == 1); + let src_ssa = &src_vec[0]; + + debug_assert!(dst_ssa.file() == src_ssa.file()); + if src_ssa.file() != file { + continue; + } + + // If it's not resident, rewrite to just move from one + // spill to another, assuming that copying in spill + // space is efficient + if b.w.contains(src_ssa) { + num_w_dsts += 1; + } else { + debug_assert!(b.s.contains(src_ssa)); + b.s.insert(*dst_ssa); + *src = spill.get_spill(*src_ssa).into(); + *dst = spill.get_spill(*dst_ssa).into(); + } + } + + // We can now assume that a source is in W if and only if + // the file matches. Remove all killed sources from W. + for (_, src) in pcopy.dsts_srcs.iter() { + let src_ssa = &src.src_ref.as_ssa().unwrap()[0]; + if !bl.is_live_after_ip(src_ssa, ip) { + b.w.remove(src_ssa); + } + } + + let rel_limit = limit - b.w.count(file); + if num_w_dsts > rel_limit { + let count = num_w_dsts - rel_limit; + let count = count.try_into().unwrap(); + + let mut spills = SpillChooser::new(bl, ip, count); + for (dst, _) in pcopy.dsts_srcs.iter() { + let dst_ssa = &dst.as_ssa().unwrap()[0]; + spills.add_candidate(*dst_ssa); + } + + let spills: HashSet = + HashSet::from_iter(spills); + + for (dst, src) in pcopy.dsts_srcs.iter_mut() { + let dst_ssa = &dst.as_ssa().unwrap()[0]; + let src_ssa = &src.src_ref.as_ssa().unwrap()[0]; + if spills.contains(dst_ssa) { + if b.s.insert(*src_ssa) { + instrs.push(spill.spill(*src_ssa)); + } + b.s.insert(*dst_ssa); + *src = spill.get_spill(*src_ssa).into(); + *dst = spill.get_spill(*dst_ssa).into(); + } + } + } + + for (dst, _) in pcopy.dsts_srcs.iter() { + let dst_ssa = &dst.as_ssa().unwrap()[0]; + if dst_ssa.file() == file { + b.s.insert(*dst_ssa); + } + } + } + _ => { + // First compute fills even though those have to come + // after spills. + let mut fills = Vec::new(); + instr.for_each_ssa_use(|ssa| { + if ssa.file() == file && !b.w.contains(ssa) { + debug_assert!(b.s.contains(ssa)); + fills.push(spill.fill(*ssa)); + b.w.insert(*ssa); + } + }); + + let rel_pressure = bl.get_instr_pressure(ip, &instr)[file]; + let abs_pressure = + b.w.count(file) + u32::from(rel_pressure); + + if abs_pressure > limit.into() { + let count = abs_pressure - u32::from(limit); + let count = count.try_into().unwrap(); + + let mut spills = SpillChooser::new(bl, ip, count); + for ssa in b.w.iter() { + spills.add_candidate(*ssa); + } + + for ssa in spills { + debug_assert!(ssa.file() == file); + b.w.remove(&ssa); + instrs.push(spill.spill(ssa)); + b.s.insert(ssa); + } + } + + instrs.append(&mut fills); + + instr.for_each_ssa_use(|ssa| { + if ssa.file() == file { + debug_assert!(b.w.contains(ssa)); + } + }); + + b.w.insert_instr_top_down(ip, &instr, bl); + } + } + + instrs.push(instr); + } + bb.instrs = instrs; + + assert!(ssa_state_out.len() == b_idx); + ssa_state_out.push(b); + } + + // Now that everthing is spilled, we handle phi sources and connect the + // blocks by adding spills and fills as needed along edges. + for p_idx in 0..blocks.len() { + let succ = blocks.succ_indices(p_idx); + if succ.len() != 1 { + // We don't have any critical edges + for s_idx in succ { + debug_assert!(blocks.pred_indices(*s_idx).len() == 1); + } + continue; + } + let s_idx = succ[0]; + + // If blocks[p_idx] is the unique predecessor of blocks[s_idx] then the + // spill/fill sets for blocks[s_idx] are just those from blocks[p_idx], + // filtered for liveness and there is no phi source. There's nothing + // for us to do here. + if blocks.pred_indices(s_idx).len() == 1 { + continue; + } + + let pb = &mut blocks[p_idx]; + let p_out = &ssa_state_out[p_idx]; + let s_in = &ssa_state_in[s_idx]; + let phi_dst_map = &phi_dst_maps[s_idx]; + + let mut spills = Vec::new(); + let mut fills = Vec::new(); + + if let Some(phi) = pb.phi_srcs_mut() { + for (idx, src) in phi.srcs.iter_mut() { + debug_assert!(src.src_mod.is_none()); + let vec = src.src_ref.as_ssa().unwrap(); + debug_assert!(vec.comps() == 1); + let ssa = &vec[0]; + + if ssa.file() != file { + continue; + } + + if spilled_phis.get((*idx).try_into().unwrap()) { + if !p_out.s.contains(ssa) { + spills.push(*ssa); + } + *src = spill.get_spill(*ssa).into(); + } else { + if !p_out.w.contains(ssa) { + fills.push(*ssa); + } + } + } + } + + for ssa in s_in.s.iter() { + if p_out.w.contains(ssa) && !p_out.s.contains(ssa) { + spills.push(*ssa); + } + } + + for ssa in s_in.w.iter() { + if phi_dst_map.get_phi_idx(ssa).is_some() { + continue; + } + + if !p_out.w.contains(ssa) { + fills.push(*ssa); + } + } + + if spills.is_empty() && fills.is_empty() { + continue; + } + + // Sort to ensure stability of the algorithm + spills.sort_by_key(|ssa| ssa.idx()); + fills.sort_by_key(|ssa| ssa.idx()); + + let mut instrs = Vec::new(); + for ssa in spills { + instrs.push(spill.spill(ssa)); + } + for ssa in fills { + instrs.push(spill.fill(ssa)); + } + + // Insert spills and fills right after the phi (if any) + let mut ip = pb.instrs.len(); + while ip > 0 { + let instr = &pb.instrs[ip - 1]; + if !instr.is_branch() { + match instr.op { + Op::PhiSrcs(_) => (), + _ => break, + } + } + ip -= 1; + } + pb.instrs.splice(ip..ip, instrs.into_iter()); + } +} + +impl Function { + pub fn spill_values(&mut self, file: RegFile, limit: u32) { + match file { + RegFile::GPR => { + let spill = SpillGPR::new(); + spill_values(self, file, limit, spill); + } + RegFile::Pred => { + let spill = SpillPred::new(); + spill_values(self, file, limit, spill); + } + _ => panic!("Don't know how to spill {} registers", file), + } + + self.repair_ssa(); + self.opt_dce(); + + if DEBUG.print() { + eprintln!("NAK IR after spilling {}:\n{}", file, self); + } + } +}