mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-25 22:10:38 +02:00
nak: Phi coalescing via biased register coloring
Reduces code size by -29.08% on shaderdb + nvk-fossils-foss Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31498>
This commit is contained in:
parent
38f7ae5288
commit
537ada2308
1 changed files with 114 additions and 11 deletions
|
|
@ -4,6 +4,7 @@
|
|||
use crate::api::{GetDebugFlags, DEBUG};
|
||||
use crate::ir::*;
|
||||
use crate::liveness::{BlockLiveness, Liveness, SimpleLiveness};
|
||||
use crate::union_find::UnionFind;
|
||||
|
||||
use compiler::bitset::BitSet;
|
||||
use std::cmp::{max, Ordering};
|
||||
|
|
@ -149,6 +150,78 @@ impl SSAUseMap {
|
|||
}
|
||||
}
|
||||
|
||||
/// Tracks the most recent register assigned to a given phi web
|
||||
///
|
||||
/// During register assignment, we then try to assign this register
|
||||
/// to the next SSAValue in the same web.
|
||||
///
|
||||
/// This heuristic is inspired by the "Aggressive pre-coalescing" described in
|
||||
/// section 4 of Colombet et al 2011.
|
||||
///
|
||||
/// Q. Colombet, B. Boissinot, P. Brisk, S. Hack and F. Rastello,
|
||||
/// "Graph-coloring and treescan register allocation using repairing," 2011
|
||||
/// Proceedings of the 14th International Conference on Compilers,
|
||||
/// Architectures and Synthesis for Embedded Systems (CASES), Taipei,
|
||||
/// Taiwan, 2011, pp. 45-54, doi: 10.1145/2038698.2038708.
|
||||
struct PhiWebs {
|
||||
uf: UnionFind<SSAValue>,
|
||||
assignments: HashMap<SSAValue, u32>,
|
||||
}
|
||||
|
||||
impl PhiWebs {
|
||||
pub fn new(f: &Function) -> Self {
|
||||
let mut uf = UnionFind::new();
|
||||
|
||||
// Populate uf with phi equivalence classes
|
||||
//
|
||||
// Note that we intentionally don't pay attention to move instructions
|
||||
// below - the assumption is that any move instructions at this point
|
||||
// were inserted by cssa-conversion and will hurt the coalescing
|
||||
for b_idx in 0..f.blocks.len() {
|
||||
let Some(phi_dsts) = f.blocks[b_idx].phi_dsts() else {
|
||||
continue;
|
||||
};
|
||||
let dsts: HashMap<u32, &SSARef> = phi_dsts
|
||||
.dsts
|
||||
.iter()
|
||||
.map(|(idx, dst)| {
|
||||
let ssa_ref = dst.as_ssa().expect("Expected ssa form");
|
||||
(*idx, ssa_ref)
|
||||
})
|
||||
.collect();
|
||||
|
||||
for pred_idx in f.blocks.pred_indices(b_idx) {
|
||||
let phi_srcs =
|
||||
f.blocks[*pred_idx].phi_srcs().expect("Missing phi_srcs");
|
||||
for (src_idx, src) in phi_srcs.srcs.iter() {
|
||||
let a = src.as_ssa().expect("Expected ssa form");
|
||||
let b = dsts[src_idx];
|
||||
|
||||
assert_eq!(a.comps(), 1);
|
||||
assert_eq!(b.comps(), 1);
|
||||
|
||||
uf.union(a[0], b[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PhiWebs {
|
||||
uf,
|
||||
assignments: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get(&mut self, ssa: SSAValue) -> Option<u32> {
|
||||
let phi_web_id = self.uf.find(ssa);
|
||||
self.assignments.get(&phi_web_id).copied()
|
||||
}
|
||||
|
||||
pub fn set(&mut self, ssa: SSAValue, reg: u32) {
|
||||
let phi_web_id = self.uf.find(ssa);
|
||||
self.assignments.insert(phi_web_id, reg);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
|
||||
enum LiveRef {
|
||||
SSA(SSAValue),
|
||||
|
|
@ -338,8 +411,18 @@ impl RegAllocator {
|
|||
&mut self,
|
||||
ip: usize,
|
||||
sum: &SSAUseMap,
|
||||
phi_webs: &mut PhiWebs,
|
||||
ssa: SSAValue,
|
||||
) -> u32 {
|
||||
// Bias register assignment using the phi coalescing
|
||||
if let Some(reg) = phi_webs.get(ssa) {
|
||||
if !self.reg_is_used(reg) {
|
||||
self.assign_reg(ssa, reg);
|
||||
return reg;
|
||||
}
|
||||
}
|
||||
|
||||
// Otherwise, use SSAUseMap heuristics
|
||||
if let Some(u) = sum.find_vec_use_after(ssa, ip) {
|
||||
match u {
|
||||
SSAUse::FixedReg(reg) => {
|
||||
|
|
@ -657,13 +740,14 @@ fn instr_alloc_scalar_dsts_file(
|
|||
instr: &mut Instr,
|
||||
ip: usize,
|
||||
sum: &SSAUseMap,
|
||||
phi_webs: &mut PhiWebs,
|
||||
ra: &mut RegAllocator,
|
||||
) {
|
||||
for dst in instr.dsts_mut() {
|
||||
if let Dst::SSA(ssa) = dst {
|
||||
if ssa.file().unwrap() == ra.file() {
|
||||
assert!(ssa.comps() == 1);
|
||||
let reg = ra.alloc_scalar(ip, sum, ssa[0]);
|
||||
let reg = ra.alloc_scalar(ip, sum, phi_webs, ssa[0]);
|
||||
*dst = RegRef::new(ra.file(), reg, 1).into();
|
||||
}
|
||||
}
|
||||
|
|
@ -674,6 +758,7 @@ fn instr_assign_regs_file(
|
|||
instr: &mut Instr,
|
||||
ip: usize,
|
||||
sum: &SSAUseMap,
|
||||
phi_webs: &mut PhiWebs,
|
||||
killed: &KillSet,
|
||||
pcopy: &mut OpParCopy,
|
||||
ra: &mut RegAllocator,
|
||||
|
|
@ -707,7 +792,7 @@ fn instr_assign_regs_file(
|
|||
instr_remap_srcs_file(instr, &mut vra);
|
||||
vra.free_killed(killed);
|
||||
vra.finish(pcopy);
|
||||
instr_alloc_scalar_dsts_file(instr, ip, sum, ra);
|
||||
instr_alloc_scalar_dsts_file(instr, ip, sum, phi_webs, ra);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -786,7 +871,7 @@ fn instr_assign_regs_file(
|
|||
|
||||
vra.finish(pcopy);
|
||||
|
||||
instr_alloc_scalar_dsts_file(instr, ip, sum, ra);
|
||||
instr_alloc_scalar_dsts_file(instr, ip, sum, phi_webs, ra);
|
||||
} else if could_trivially_allocate {
|
||||
let mut vra = VecRegAllocator::new(ra);
|
||||
for vec_dst in vec_dsts {
|
||||
|
|
@ -799,7 +884,7 @@ fn instr_assign_regs_file(
|
|||
instr_remap_srcs_file(instr, &mut vra);
|
||||
vra.free_killed(killed);
|
||||
vra.finish(pcopy);
|
||||
instr_alloc_scalar_dsts_file(instr, ip, sum, ra);
|
||||
instr_alloc_scalar_dsts_file(instr, ip, sum, phi_webs, ra);
|
||||
} else {
|
||||
let mut vra = VecRegAllocator::new(ra);
|
||||
instr_remap_srcs_file(instr, &mut vra);
|
||||
|
|
@ -817,7 +902,7 @@ fn instr_assign_regs_file(
|
|||
vra.free_killed(killed);
|
||||
vra.finish(pcopy);
|
||||
|
||||
instr_alloc_scalar_dsts_file(instr, ip, sum, ra);
|
||||
instr_alloc_scalar_dsts_file(instr, ip, sum, phi_webs, ra);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -864,10 +949,11 @@ impl AssignRegsBlock {
|
|||
&mut self,
|
||||
ip: usize,
|
||||
sum: &SSAUseMap,
|
||||
phi_webs: &mut PhiWebs,
|
||||
ssa: SSAValue,
|
||||
) -> RegRef {
|
||||
let ra = &mut self.ra[ssa.file()];
|
||||
let reg = ra.alloc_scalar(ip, sum, ssa);
|
||||
let reg = ra.alloc_scalar(ip, sum, phi_webs, ssa);
|
||||
RegRef::new(ssa.file(), reg, 1)
|
||||
}
|
||||
|
||||
|
|
@ -915,6 +1001,7 @@ impl AssignRegsBlock {
|
|||
mut instr: Box<Instr>,
|
||||
ip: usize,
|
||||
sum: &SSAUseMap,
|
||||
phi_webs: &mut PhiWebs,
|
||||
srcs_killed: &KillSet,
|
||||
dsts_killed: &KillSet,
|
||||
pcopy: &mut OpParCopy,
|
||||
|
|
@ -923,7 +1010,7 @@ impl AssignRegsBlock {
|
|||
Op::Undef(undef) => {
|
||||
if let Dst::SSA(ssa) = undef.dst {
|
||||
assert!(ssa.comps() == 1);
|
||||
self.alloc_scalar(ip, sum, ssa[0]);
|
||||
self.alloc_scalar(ip, sum, phi_webs, ssa[0]);
|
||||
}
|
||||
assert!(srcs_killed.is_empty());
|
||||
self.ra.free_killed(dsts_killed);
|
||||
|
|
@ -949,7 +1036,7 @@ impl AssignRegsBlock {
|
|||
for (id, dst) in phi.dsts.iter() {
|
||||
if let Dst::SSA(ssa) = dst {
|
||||
assert!(ssa.comps() == 1);
|
||||
let reg = self.alloc_scalar(ip, sum, ssa[0]);
|
||||
let reg = self.alloc_scalar(ip, sum, phi_webs, ssa[0]);
|
||||
self.live_in.push(LiveValue {
|
||||
live_ref: LiveRef::Phi(*id),
|
||||
reg_ref: reg,
|
||||
|
|
@ -1025,7 +1112,9 @@ impl AssignRegsBlock {
|
|||
if self.try_coalesce(*dst_ssa, ©.src) {
|
||||
del_copy = true;
|
||||
} else {
|
||||
copy.dst = self.alloc_scalar(ip, sum, *dst_ssa).into();
|
||||
copy.dst = self
|
||||
.alloc_scalar(ip, sum, phi_webs, *dst_ssa)
|
||||
.into();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1117,7 +1206,9 @@ impl AssignRegsBlock {
|
|||
for (dst, _) in pcopy.dsts_srcs.iter_mut() {
|
||||
if let Dst::SSA(dst_vec) = dst {
|
||||
debug_assert!(dst_vec.comps() == 1);
|
||||
*dst = self.alloc_scalar(ip, sum, dst_vec[0]).into();
|
||||
*dst = self
|
||||
.alloc_scalar(ip, sum, phi_webs, dst_vec[0])
|
||||
.into();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1160,6 +1251,7 @@ impl AssignRegsBlock {
|
|||
&mut instr,
|
||||
ip,
|
||||
sum,
|
||||
phi_webs,
|
||||
srcs_killed,
|
||||
pcopy,
|
||||
file,
|
||||
|
|
@ -1176,6 +1268,7 @@ impl AssignRegsBlock {
|
|||
b: &mut BasicBlock,
|
||||
bl: &BL,
|
||||
pred_ra: Option<&PerRegFile<RegAllocator>>,
|
||||
phi_webs: &mut PhiWebs,
|
||||
) {
|
||||
// Populate live in from the register file we're handed. We'll add more
|
||||
// live in when we process the OpPhiDst, if any.
|
||||
|
|
@ -1236,6 +1329,7 @@ impl AssignRegsBlock {
|
|||
instr,
|
||||
ip,
|
||||
&sum,
|
||||
phi_webs,
|
||||
&srcs_killed,
|
||||
&dsts_killed,
|
||||
&mut pcopy,
|
||||
|
|
@ -1262,6 +1356,13 @@ impl AssignRegsBlock {
|
|||
}
|
||||
}
|
||||
|
||||
// Update phi_webs with the registers assigned in this block
|
||||
for ra in self.ra.values() {
|
||||
for (ssa, reg) in &ra.ssa_reg {
|
||||
phi_webs.set(*ssa, *reg);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort live-in to maintain determinism
|
||||
self.live_in.sort();
|
||||
|
||||
|
|
@ -1367,6 +1468,8 @@ impl Shader<'_> {
|
|||
}
|
||||
});
|
||||
|
||||
let mut phi_webs = PhiWebs::new(f);
|
||||
|
||||
let mut blocks: Vec<AssignRegsBlock> = Vec::new();
|
||||
for b_idx in 0..f.blocks.len() {
|
||||
let pred = f.blocks.pred_indices(b_idx);
|
||||
|
|
@ -1380,7 +1483,7 @@ impl Shader<'_> {
|
|||
let bl = live.block_live(b_idx);
|
||||
|
||||
let mut arb = AssignRegsBlock::new(&limit, tmp_gprs);
|
||||
arb.first_pass(&mut f.blocks[b_idx], bl, pred_ra);
|
||||
arb.first_pass(&mut f.blocks[b_idx], bl, pred_ra, &mut phi_webs);
|
||||
|
||||
assert!(blocks.len() == b_idx);
|
||||
blocks.push(arb);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue