nak: coalesce phi vectors

This reduces the amount of mov instructions RA leaves behind to fullfill
vec alignments.

Totals:
CodeSize: 936472720 -> 935988384 (-0.05%); split: -0.06%, +0.01%
Static cycle count: 224173176 -> 224196648 (+0.01%); split: -0.02%, +0.03%

Totals from 6794 (7.75% of 87622) affected shaders:
CodeSize: 217320112 -> 216835776 (-0.22%); split: -0.24%, +0.02%
Static cycle count: 52683340 -> 52706812 (+0.04%); split: -0.09%, +0.13%
This commit is contained in:
Karol Herbst 2025-05-28 16:14:57 +02:00
parent d06aff2243
commit acabca78b4

View file

@ -80,6 +80,7 @@ fn src_set_reg(src: &mut Src, reg: RegRef) {
enum SSAUse {
FixedReg(u32),
Vec(SSARef),
Phi { ssa: Vec<SSAValue>, comp: u32 },
}
struct SSAUseMap {
@ -103,6 +104,29 @@ impl SSAUseMap {
}
}
fn add_vec_use_phi(
&mut self,
ip: usize,
ssa: &[SSAValue],
vec: &[SSAValue],
c: u8,
) {
// TODO: It might be interesting to properly align vec2 used in vec4,
// but that doesn't seem to be a problem for now.
if vec.len() == 1 || ssa.len() != 1 {
return;
}
let v = self.ssa_map.entry(ssa[0]).or_default();
v.push((
ip,
SSAUse::Phi {
ssa: vec.to_vec(),
comp: c.into(),
},
));
}
fn find_vec_use_after(&self, ssa: SSAValue, ip: usize) -> Option<&SSAUse> {
if let Some(v) = self.ssa_map.get(&ssa) {
let p = v.partition_point(|(uip, _)| *uip <= ip);
@ -117,7 +141,7 @@ impl SSAUseMap {
}
}
pub fn add_block(&mut self, b: &BasicBlock) {
pub fn add_block(&mut self, b: &BasicBlock, phi_webs: &mut PhiWebs) {
for (ip, instr) in b.instrs.iter().enumerate() {
match &instr.op {
Op::RegOut(op) => {
@ -132,20 +156,30 @@ impl SSAUseMap {
_ => {
// We don't care about predicates because they're scalar
for src in instr.srcs() {
if let Some(ssa) = src_ssa_ref(src) {
self.add_vec_use(ip, ssa);
}
let Some(ssa) = src_ssa_ref(src) else {
continue;
};
self.add_vec_use(ip, ssa);
let phi_web_id = phi_webs.uf.find(ssa[0]);
let Some((phi_use, c)) =
phi_webs.vec_usages.get(&phi_web_id)
else {
continue;
};
self.add_vec_use_phi(ip, ssa, phi_use, *c);
}
}
}
}
}
pub fn for_block(b: &BasicBlock) -> SSAUseMap {
pub fn for_block(b: &BasicBlock, phi_webs: &mut PhiWebs) -> SSAUseMap {
let mut am = SSAUseMap {
ssa_map: Default::default(),
};
am.add_block(b);
am.add_block(b, phi_webs);
am
}
}
@ -166,12 +200,17 @@ impl SSAUseMap {
struct PhiWebs {
uf: UnionFind<SSAValue, FxBuildHasher>,
assignments: FxHashMap<SSAValue, u32>,
/// phi_web_id -> [phi_web_id, ...] + component usages
vec_usages: FxHashMap<SSAValue, (Vec<SSAValue>, u8)>,
}
impl PhiWebs {
pub fn new(f: &Function) -> Self {
let mut uf = UnionFind::new();
let mut phis_in: FxHashMap<SSAValue, (Vec<SSAValue>, u8)> =
Default::default();
// Populate uf with phi equivalence classes
//
// Note that we intentionally don't pay attention to move instructions
@ -205,9 +244,33 @@ impl PhiWebs {
}
}
for block in f.blocks.iter() {
for instr in &block.instrs {
for vec in instr
.dsts()
.iter()
.filter_map(Dst::as_ssa)
.chain(instr.srcs().iter().filter_map(Src::as_ssa))
{
if vec.comps() == 1 {
continue;
}
let vec: Vec<_> =
vec.iter().map(|ssa| uf.find(*ssa)).collect();
for (c, phi_web_id) in vec.iter().enumerate() {
phis_in
.entry(*phi_web_id)
.or_insert((vec.clone(), c as u8));
}
}
}
}
PhiWebs {
uf,
assignments: Default::default(),
vec_usages: phis_in,
}
}
@ -426,6 +489,25 @@ impl RegAllocator {
return *reg;
}
}
SSAUse::Phi { ssa: vec, comp } => {
// Only do this if we haven't found the value within the phi_webs
if phi_webs.get(ssa).is_none() {
let align = vec.len().next_power_of_two();
if let Some(reg) = self.try_find_unused_reg_range(
0,
vec.len() as u8,
align as u8,
0,
) {
self.assign_reg(ssa, reg + *comp);
for (idx, ssa) in vec.iter().enumerate() {
phi_webs.set(*ssa, reg + idx as u32);
}
return reg + *comp;
}
}
}
SSAUse::Vec(vec) => {
let mut comp = u8::MAX;
for c in 0..vec.comps() {
@ -1303,7 +1385,7 @@ impl AssignRegsBlock {
}
}
let sum = SSAUseMap::for_block(b);
let sum = SSAUseMap::for_block(b, phi_webs);
let mut instrs = Vec::new();
let mut srcs_killed = KillSet::new();