diff --git a/src/nouveau/compiler/nak/assign_regs.rs b/src/nouveau/compiler/nak/assign_regs.rs index 42d5c99ebbc..d5f8f3f20ce 100644 --- a/src/nouveau/compiler/nak/assign_regs.rs +++ b/src/nouveau/compiler/nak/assign_regs.rs @@ -80,6 +80,7 @@ fn src_set_reg(src: &mut Src, reg: RegRef) { enum SSAUse { FixedReg(u32), Vec(SSARef), + Phi { ssa: Vec, comp: u32 }, } struct SSAUseMap { @@ -103,6 +104,29 @@ impl SSAUseMap { } } + fn add_vec_use_phi( + &mut self, + ip: usize, + ssa: &[SSAValue], + vec: &[SSAValue], + c: u8, + ) { + // TODO: It might be interesting to properly align vec2 used in vec4, + // but that doesn't seem to be a problem for now. + if vec.len() == 1 || ssa.len() != 1 { + return; + } + + let v = self.ssa_map.entry(ssa[0]).or_default(); + v.push(( + ip, + SSAUse::Phi { + ssa: vec.to_vec(), + comp: c.into(), + }, + )); + } + fn find_vec_use_after(&self, ssa: SSAValue, ip: usize) -> Option<&SSAUse> { if let Some(v) = self.ssa_map.get(&ssa) { let p = v.partition_point(|(uip, _)| *uip <= ip); @@ -117,7 +141,7 @@ impl SSAUseMap { } } - pub fn add_block(&mut self, b: &BasicBlock) { + pub fn add_block(&mut self, b: &BasicBlock, phi_webs: &mut PhiWebs) { for (ip, instr) in b.instrs.iter().enumerate() { match &instr.op { Op::RegOut(op) => { @@ -132,20 +156,30 @@ impl SSAUseMap { _ => { // We don't care about predicates because they're scalar for src in instr.srcs() { - if let Some(ssa) = src_ssa_ref(src) { - self.add_vec_use(ip, ssa); - } + let Some(ssa) = src_ssa_ref(src) else { + continue; + }; + self.add_vec_use(ip, ssa); + + let phi_web_id = phi_webs.uf.find(ssa[0]); + let Some((phi_use, c)) = + phi_webs.vec_usages.get(&phi_web_id) + else { + continue; + }; + + self.add_vec_use_phi(ip, ssa, phi_use, *c); } } } } } - pub fn for_block(b: &BasicBlock) -> SSAUseMap { + pub fn for_block(b: &BasicBlock, phi_webs: &mut PhiWebs) -> SSAUseMap { let mut am = SSAUseMap { ssa_map: Default::default(), }; - am.add_block(b); + am.add_block(b, phi_webs); am } } @@ -166,12 +200,17 @@ impl SSAUseMap { struct PhiWebs { uf: UnionFind, assignments: FxHashMap, + /// phi_web_id -> [phi_web_id, ...] + component usages + vec_usages: FxHashMap, u8)>, } impl PhiWebs { pub fn new(f: &Function) -> Self { let mut uf = UnionFind::new(); + let mut phis_in: FxHashMap, u8)> = + Default::default(); + // Populate uf with phi equivalence classes // // Note that we intentionally don't pay attention to move instructions @@ -205,9 +244,33 @@ impl PhiWebs { } } + for block in f.blocks.iter() { + for instr in &block.instrs { + for vec in instr + .dsts() + .iter() + .filter_map(Dst::as_ssa) + .chain(instr.srcs().iter().filter_map(Src::as_ssa)) + { + if vec.comps() == 1 { + continue; + } + + let vec: Vec<_> = + vec.iter().map(|ssa| uf.find(*ssa)).collect(); + for (c, phi_web_id) in vec.iter().enumerate() { + phis_in + .entry(*phi_web_id) + .or_insert((vec.clone(), c as u8)); + } + } + } + } + PhiWebs { uf, assignments: Default::default(), + vec_usages: phis_in, } } @@ -426,6 +489,25 @@ impl RegAllocator { return *reg; } } + SSAUse::Phi { ssa: vec, comp } => { + // Only do this if we haven't found the value within the phi_webs + if phi_webs.get(ssa).is_none() { + let align = vec.len().next_power_of_two(); + if let Some(reg) = self.try_find_unused_reg_range( + 0, + vec.len() as u8, + align as u8, + 0, + ) { + self.assign_reg(ssa, reg + *comp); + + for (idx, ssa) in vec.iter().enumerate() { + phi_webs.set(*ssa, reg + idx as u32); + } + return reg + *comp; + } + } + } SSAUse::Vec(vec) => { let mut comp = u8::MAX; for c in 0..vec.comps() { @@ -1303,7 +1385,7 @@ impl AssignRegsBlock { } } - let sum = SSAUseMap::for_block(b); + let sum = SSAUseMap::for_block(b, phi_webs); let mut instrs = Vec::new(); let mut srcs_killed = KillSet::new();