diff --git a/src/nouveau/compiler/nak.rs b/src/nouveau/compiler/nak.rs index c57f81aae47..423c210d7d1 100644 --- a/src/nouveau/compiler/nak.rs +++ b/src/nouveau/compiler/nak.rs @@ -445,7 +445,6 @@ pub extern "C" fn nak_compile_shader( } s.assign_regs(); - //s.assign_regs_trivial(); if DEBUG.print() { println!("NAK IR:\n{}", &s); } diff --git a/src/nouveau/compiler/nak_assign_regs.rs b/src/nouveau/compiler/nak_assign_regs.rs index 2a0066eaaa5..3fe2ecf5b78 100644 --- a/src/nouveau/compiler/nak_assign_regs.rs +++ b/src/nouveau/compiler/nak_assign_regs.rs @@ -41,36 +41,24 @@ impl KillSet { self.set.contains(ssa) } + pub fn contains_all(&self, slice: &[SSAValue]) -> bool { + for ssa in slice { + if !self.contains(ssa) { + return false; + } + } + true + } + pub fn iter(&self) -> std::slice::Iter<'_, SSAValue> { self.vec.iter() } } -#[derive(Clone, Copy, Eq, Hash, PartialEq)] -struct PhiComp { - v: SSAValue, -} - -impl PhiComp { - pub fn new(idx: u32, comp: u8) -> PhiComp { - PhiComp { - v: SSAValue::new(RegFile::GPR, idx, comp + 1), - } - } - - pub fn idx(&self) -> u32 { - self.v.idx() - } - - pub fn comp(&self) -> u8 { - self.v.comps() - 1 - } -} - #[derive(Clone, Copy, Eq, Hash, PartialEq)] enum LiveRef { - SSA(SSAComp), - Phi(PhiComp), + SSA(SSAValue), + Phi(u32), } #[derive(Clone, Copy, Eq, Hash, PartialEq)] @@ -107,8 +95,8 @@ struct RegFileAllocation { max_reg: u8, used: BitSet, pinned: BitSet, - reg_ssa: Vec, - ssa_reg: HashMap, + reg_ssa: Vec, + ssa_reg: HashMap, } impl RegFileAllocation { @@ -141,11 +129,11 @@ impl RegFileAllocation { } } - pub fn get_reg_comp(&self, ssa: SSAComp) -> u8 { + pub fn get_reg(&self, ssa: SSAValue) -> u8 { *self.ssa_reg.get(&ssa).unwrap() } - pub fn get_ssa_comp(&self, reg: u8) -> Option { + pub fn get_ssa(&self, reg: u8) -> Option { if self.used.get(reg.into()) { Some(self.reg_ssa[usize::from(reg)]) } else { @@ -153,12 +141,12 @@ impl RegFileAllocation { } } - pub fn try_get_reg(&self, ssa: SSAValue) -> Option { - let align = ssa.comps().next_power_of_two(); - let reg = self.get_reg_comp(ssa.comp(0)); + pub fn try_get_vec_reg(&self, vec: SSARef) -> Option { + let align = vec.comps().next_power_of_two(); + let reg = self.get_reg(vec[0]); if reg % align == 0 { - for i in 1..ssa.comps() { - if self.get_reg_comp(ssa.comp(i)) != reg + i { + for i in 1..vec.comps() { + if self.get_reg(vec[usize::from(i)]) != reg + i { return None; } } @@ -168,7 +156,7 @@ impl RegFileAllocation { } } - pub fn free_ssa_comp(&mut self, ssa: SSAComp) -> u8 { + pub fn free_ssa(&mut self, ssa: SSAValue) -> u8 { assert!(ssa.file() == self.file); let reg = self.ssa_reg.remove(&ssa).unwrap(); assert!(self.used.get(reg.into())); @@ -176,9 +164,9 @@ impl RegFileAllocation { reg } - pub fn free_ssa(&mut self, ssa: SSAValue) { - for i in 0..ssa.comps() { - self.free_ssa_comp(ssa.comp(i)); + pub fn free_ssa_ref(&mut self, ssa_ref: SSARef) { + for ssa in ssa_ref.iter() { + self.free_ssa(*ssa); } } @@ -190,14 +178,13 @@ impl RegFileAllocation { } } - pub fn assign_reg_comp(&mut self, ssa: SSAComp, reg: u8) -> RegRef { + pub fn assign_reg(&mut self, ssa: SSAValue, reg: u8) -> RegRef { assert!(ssa.file() == self.file); assert!(reg <= self.max_reg); assert!(!self.used.get(reg.into())); if usize::from(reg) >= self.reg_ssa.len() { - self.reg_ssa - .resize(usize::from(reg) + 1, SSAComp::new(RegFile::GPR, 0, 0)); + self.reg_ssa.resize(usize::from(reg) + 1, SSAValue::NONE); } self.reg_ssa[usize::from(reg)] = ssa; self.ssa_reg.insert(ssa, reg); @@ -207,14 +194,18 @@ impl RegFileAllocation { RegRef::new(self.file, reg, 1) } - pub fn assign_reg(&mut self, ssa: SSAValue, reg: u8) -> RegRef { + pub fn assign_vec_reg(&mut self, ssa: SSARef, reg: u8) -> RegRef { for i in 0..ssa.comps() { - self.assign_reg_comp(ssa.comp(i), reg + i); + self.assign_reg(ssa[usize::from(i)], reg + i); } RegRef::new(self.file, reg, ssa.comps()) } - pub fn try_assign_reg(&mut self, ssa: SSAValue, reg: u8) -> Option { + pub fn try_assign_vec_reg( + &mut self, + ssa: SSARef, + reg: u8, + ) -> Option { if ssa.file() != self.file() { return None; } @@ -226,10 +217,14 @@ impl RegFileAllocation { return None; } } - Some(self.assign_reg(ssa, reg)) + Some(self.assign_vec_reg(ssa, reg)) } - pub fn try_find_unused_reg(&self, start_reg: u8, comps: u8) -> Option { + pub fn try_find_unused_reg_range( + &self, + start_reg: u8, + comps: u8, + ) -> Option { assert!(comps > 0); let comps_mask = u32::MAX >> (32 - comps); let align = comps.next_power_of_two(); @@ -271,6 +266,7 @@ impl RegFileAllocation { } if let Ok(reg) = u8::try_from(self.used.words().len() * 32) { + let reg = max(start_reg, reg); if self.is_reg_in_bounds(reg, comps) { Some(reg) } else { @@ -281,36 +277,39 @@ impl RegFileAllocation { } } - fn get_reg_near_reg(&self, reg: u8, comps: u8) -> u8 { + fn try_find_unpinned_reg_range( + &self, + start_reg: u8, + comps: u8, + ) -> Option { let align = comps.next_power_of_two(); - /* Pick something properly aligned near component 0 */ - let mut reg = reg & (align - 1); - if !self.is_reg_in_bounds(reg, comps) { - reg -= align; - } - reg - } - - pub fn get_reg_near_ssa(&self, ssa: SSAValue) -> u8 { - /* Get something near component 0 */ - self.get_reg_near_reg(self.get_reg_comp(ssa.comp(0)), ssa.comps()) - } - - pub fn get_any_reg(&self, comps: u8) -> u8 { - let mut pick_comps = comps; - while pick_comps > 0 { - if let Some(reg) = self.try_find_unused_reg(0, pick_comps) { - return self.get_reg_near_reg(reg, comps); + let mut reg = start_reg.next_multiple_of(align); + while self.is_reg_in_bounds(reg, comps) { + let mut is_pinned = false; + for i in 0..comps { + if self.pinned.get((reg + i).into()) { + is_pinned = true; + break; + } } - pick_comps = pick_comps >> 1; + if !is_pinned { + return Some(reg); + } + reg += align; } - panic!("Failed to find any free registers"); + + None } - pub fn get_scalar(&mut self, ssa: SSAComp) -> RegRef { + pub fn try_find_unpinned_reg_near_ssa(&self, ssa: SSARef) -> Option { + /* Get something near component 0 */ + self.try_find_unpinned_reg_range(self.get_reg(ssa[0]), ssa.comps()) + } + + pub fn get_scalar(&mut self, ssa: SSAValue) -> RegRef { assert!(ssa.file() == self.file); - let reg = self.get_reg_comp(ssa); + let reg = self.get_reg(ssa); self.pinned.insert(reg.into()); RegRef::new(self.file, reg, 1) } @@ -318,54 +317,79 @@ impl RegFileAllocation { pub fn move_to_reg( &mut self, pcopy: &mut OpParCopy, - ssa: SSAValue, + ssa: SSARef, reg: u8, ) -> RegRef { for c in 0..ssa.comps() { - let old_reg = self.get_reg_comp(ssa.comp(c)); + let old_reg = self.get_reg(ssa[usize::from(c)]); if old_reg == reg + c { continue; } - self.free_ssa_comp(ssa.comp(c)); + self.free_ssa(ssa[usize::from(c)]); /* If something already exists in the destination, swap it to the * source. */ - if let Some(evicted) = self.get_ssa_comp(reg + c) { - self.free_ssa_comp(evicted); + if let Some(evicted) = self.get_ssa(reg + c) { + self.free_ssa(evicted); pcopy.srcs.push(RegRef::new(self.file, reg + c, 1).into()); pcopy.dsts.push(RegRef::new(self.file, old_reg, 1).into()); - self.assign_reg_comp(evicted, old_reg); + self.assign_reg(evicted, old_reg); } pcopy.srcs.push(RegRef::new(self.file, old_reg, 1).into()); pcopy.dsts.push(RegRef::new(self.file, reg + c, 1).into()); - self.assign_reg_comp(ssa.comp(c), reg + c); + self.assign_reg(ssa[usize::from(c)], reg + c); } RegRef::new(self.file, reg, ssa.comps()) } - pub fn get_vector( - &mut self, - pcopy: &mut OpParCopy, - ssa: SSAValue, - ) -> RegRef { - let reg = if let Some(reg) = self.try_get_reg(ssa) { - reg - } else if let Some(reg) = self.try_find_unused_reg(0, ssa.comps()) { - reg - } else { - self.get_reg_near_ssa(ssa) - }; + pub fn get_vector(&mut self, pcopy: &mut OpParCopy, ssa: SSARef) -> RegRef { + let reg = self + .try_get_vec_reg(ssa) + .or_else(|| self.try_find_unused_reg_range(0, ssa.comps())) + .or_else(|| self.try_find_unpinned_reg_near_ssa(ssa)) + .or_else(|| self.try_find_unpinned_reg_range(0, ssa.comps())) + .expect("Failed to find an unpinned register range"); + for c in 0..ssa.comps() { + self.pinned.insert((reg + c).into()); + } self.move_to_reg(pcopy, ssa, reg) } - pub fn alloc_scalar(&mut self, ssa: SSAComp) -> RegRef { - let reg = self.try_find_unused_reg(0, 1).unwrap(); - self.assign_reg_comp(ssa, reg) + pub fn alloc_scalar(&mut self, ssa: SSAValue) -> RegRef { + let reg = self.try_find_unused_reg_range(0, 1).unwrap(); + self.assign_reg(ssa, reg) + } + + pub fn alloc_vector( + &mut self, + pcopy: &mut OpParCopy, + ssa: SSARef, + ) -> RegRef { + let reg = self + .try_find_unused_reg_range(0, ssa.comps()) + .or_else(|| self.try_find_unpinned_reg_range(0, ssa.comps())) + .expect("Failed to find an unpinned register range"); + + for c in 0..ssa.comps() { + self.pinned.insert((reg + c).into()); + } + + for c in 0..ssa.comps() { + if let Some(evicted) = self.get_ssa(reg + c) { + self.free_ssa(evicted); + let new_reg = self.try_find_unused_reg_range(0, 1).unwrap(); + pcopy.srcs.push(RegRef::new(self.file, reg + c, 1).into()); + pcopy.dsts.push(RegRef::new(self.file, new_reg, 1).into()); + self.assign_reg(evicted, new_reg); + } + } + + self.assign_vec_reg(ssa, reg) } } @@ -376,7 +400,7 @@ fn instr_remap_srcs_file( ) { if let Pred::SSA(pred) = instr.pred { if pred.file() == ra.file() { - instr.pred = ra.get_scalar(pred.as_comp()).into(); + instr.pred = ra.get_scalar(pred).into(); } } @@ -392,8 +416,9 @@ fn instr_remap_srcs_file( fn instr_alloc_scalar_dsts_file(instr: &mut Instr, ra: &mut RegFileAllocation) { for dst in instr.dsts_mut() { if let Dst::SSA(ssa) = dst { + assert!(ssa.comps() == 1); if ssa.file() == ra.file() { - *dst = ra.alloc_scalar(ssa.as_comp()).into(); + *dst = ra.alloc_scalar(ssa[0]).into(); } } } @@ -408,7 +433,7 @@ fn instr_assign_regs_file( struct VecDst { dst_idx: usize, comps: u8, - killed: Option, + killed: Option, reg: u8, } @@ -446,12 +471,25 @@ fn instr_assign_regs_file( */ assert!(!ra.file().is_predicate()); + let mut avail = killed.set.clone(); let mut killed_vecs = Vec::new(); - let mut killed_vec_comps = 0; - for ssa in killed.iter() { - if ssa.file() == ra.file() && ssa.comps() > 1 { - killed_vecs.push(ssa); - killed_vec_comps += ssa.comps(); + for src in instr.srcs() { + if let SrcRef::SSA(vec) = src.src_ref { + if vec.comps() > 1 { + let mut vec_killed = true; + for ssa in vec.iter() { + if ssa.file() != ra.file() || !avail.contains(ssa) { + vec_killed = false; + break; + } + } + if vec_killed { + for ssa in vec.iter() { + avail.remove(ssa); + } + killed_vecs.push(vec); + } + } } } @@ -465,7 +503,7 @@ fn instr_assign_regs_file( while !killed_vecs.is_empty() { let src = killed_vecs.pop().unwrap(); if src.comps() >= vec_dst.comps { - vec_dst.killed = Some(*src); + vec_dst.killed = Some(src); break; } } @@ -473,7 +511,9 @@ fn instr_assign_regs_file( vec_dsts_map_to_killed_srcs = false; } - if let Some(reg) = ra.try_find_unused_reg(next_dst_reg, vec_dst.comps) { + if let Some(reg) = + ra.try_find_unused_reg_range(next_dst_reg, vec_dst.comps) + { vec_dst.reg = reg; next_dst_reg = reg + vec_dst.comps; } else { @@ -487,76 +527,41 @@ fn instr_assign_regs_file( instr_remap_srcs_file(instr, pcopy, ra); for vec_dst in &mut vec_dsts { - vec_dst.reg = ra.try_get_reg(vec_dst.killed.unwrap()).unwrap(); + vec_dst.reg = ra.try_get_vec_reg(vec_dst.killed.unwrap()).unwrap(); } ra.free_killed(killed); for vec_dst in vec_dsts { let dst = &mut instr.dsts_mut()[vec_dst.dst_idx]; - *dst = ra.assign_reg(*dst.as_ssa().unwrap(), vec_dst.reg).into(); + *dst = ra + .assign_vec_reg(*dst.as_ssa().unwrap(), vec_dst.reg) + .into(); } instr_alloc_scalar_dsts_file(instr, ra); } else if could_trivially_allocate { for vec_dst in vec_dsts { let dst = &mut instr.dsts_mut()[vec_dst.dst_idx]; - *dst = ra.assign_reg(*dst.as_ssa().unwrap(), vec_dst.reg).into(); + *dst = ra + .assign_vec_reg(*dst.as_ssa().unwrap(), vec_dst.reg) + .into(); } instr_remap_srcs_file(instr, pcopy, ra); ra.free_killed(killed); instr_alloc_scalar_dsts_file(instr, ra); } else { - /* We're all out of tricks. We need to allocate enough space for all - * the vector destinations and all the killed SSA values and shuffle the - * killed values into the new space. - */ - let vec_comps = max(killed_vec_comps, vec_dst_comps); - let vec_reg = ra.get_any_reg(vec_comps); - - let mut ssa_reg = HashMap::new(); - let mut src_vec_reg = vec_reg; - for src in instr.srcs_mut() { - if let SrcRef::SSA(ssa) = src.src_ref { - if ssa.file() == ra.file() { - if killed.contains(&ssa) && ssa.comps() > 1 { - let reg = *ssa_reg.entry(ssa).or_insert_with(|| { - let align = ssa.comps().next_power_of_two(); - let reg = src_vec_reg; - src_vec_reg += ssa.comps(); - /* We assume vector sources are in order of - * decreasing alignment. This is true for texture - * opcodes which should be the only interesting - * case. - */ - assert!(reg % align == 0); - ra.move_to_reg(pcopy, ssa, reg) - }); - src.src_ref = reg.into(); - } - } - } - } - - /* Handle the scalar and not killed sources */ instr_remap_srcs_file(instr, pcopy, ra); - ra.free_killed(killed); - let mut dst_vec_reg = vec_reg; + /* Allocate vector destinations first so we have the most freedom. + * Scalar destinations can fill in holes. + */ for dst in instr.dsts_mut() { if let Dst::SSA(ssa) = dst { - if ssa.comps() > 1 { - let align = ssa.comps().next_power_of_two(); - let reg = dst_vec_reg; - dst_vec_reg += ssa.comps(); - /* We assume vector destinations are in order of decreasing - * alignment. This is true for texture opcodes which should - * be the only interesting case. - */ - assert!(reg % align == 0); - *dst = ra.assign_reg(*ssa, reg).into(); + if ssa.file() == ra.file() && ssa.comps() > 1 { + *dst = ra.alloc_vector(pcopy, *ssa).into(); } } } @@ -570,7 +575,7 @@ fn instr_assign_regs_file( #[derive(Clone)] struct RegAllocation { files: [RegFileAllocation; 4], - phi_ssa: HashMap, + phi_ssa: HashMap, } impl RegAllocation { @@ -608,17 +613,21 @@ impl RegAllocation { self.file_mut(ssa.file()).free_ssa(ssa); } + pub fn free_ssa_ref(&mut self, ssa: SSARef) { + self.file_mut(ssa.file()).free_ssa_ref(ssa); + } + pub fn free_killed(&mut self, killed: &KillSet) { for ssa in killed.iter() { self.free_ssa(*ssa); } } - pub fn get_scalar(&mut self, ssa: SSAComp) -> RegRef { + pub fn get_scalar(&mut self, ssa: SSAValue) -> RegRef { self.file_mut(ssa.file()).get_scalar(ssa) } - pub fn alloc_scalar(&mut self, ssa: SSAComp) -> RegRef { + pub fn alloc_scalar(&mut self, ssa: SSAValue) -> RegRef { self.file_mut(ssa.file()).alloc_scalar(ssa) } } @@ -626,7 +635,7 @@ impl RegAllocation { struct AssignRegsBlock { ra: RegAllocation, live_in: Vec, - phi_out: HashMap, + phi_out: HashMap, } impl AssignRegsBlock { @@ -638,58 +647,6 @@ impl AssignRegsBlock { } } - fn assign_regs_split( - &mut self, - split: &OpSplit, - killed: &KillSet, - pcopy: &mut OpParCopy, - ) { - let src = split.src.src_ref.as_ssa().unwrap(); - let comps = src.comps(); - assert!(usize::from(comps) == split.dsts.len()); - - let mut coalesced = BitSet::new(); - if killed.contains(src) { - for c in 0..comps { - /* Feee the component regardless of any dest checks */ - let src_ra = self.ra.file_mut(src.file()); - let reg = src_ra.free_ssa_comp(src.comp(c)); - let src_ref = RegRef::new(src.file(), reg, 1); - - /* If we have an OpSplit which kills its source, we can coalesce - * on the spot into the destinations. - */ - if let Dst::SSA(dst) = &split.dsts[usize::from(c)] { - if dst.file() == src.file() { - /* Assign destinations to source components when the - * register files match. - */ - let dst_ra = src_ra; - dst_ra.assign_reg_comp(dst.as_comp(), reg); - coalesced.insert(c.into()); - } else { - /* Otherwise, they come from different files so - * allocating a destination register won't affect the - * source and it's okay to alloc before we've finished - * freeing the source. - */ - let dst_ra = self.ra.file_mut(dst.file()); - let dst_ref = dst_ra.alloc_scalar(dst.as_comp()); - pcopy.srcs.push(src_ref.into()); - pcopy.dsts.push(dst_ref.into()); - } - } - } - } else { - for c in 0..comps { - if let Dst::SSA(dst) = &split.dsts[usize::from(c)] { - pcopy.srcs.push(self.ra.get_scalar(src.comp(c)).into()); - pcopy.dsts.push(self.ra.alloc_scalar(dst.as_comp()).into()); - } - } - } - } - fn assign_regs_instr( &mut self, mut instr: Instr, @@ -697,22 +654,15 @@ impl AssignRegsBlock { pcopy: &mut OpParCopy, ) -> Option { match &instr.op { - Op::Split(split) => { - assert!(instr.pred.is_none()); - assert!(split.src.src_mod.is_none()); - self.assign_regs_split(split, killed, pcopy); - None - } Op::PhiSrcs(phi) => { for (id, src) in phi.iter() { assert!(src.src_mod.is_none()); if let SrcRef::SSA(ssa) = src.src_ref { - for c in 0..ssa.comps() { - let src = self.ra.get_scalar(ssa.comp(c)).into(); - self.phi_out.insert(PhiComp::new(*id, 0), src); - } + assert!(ssa.comps() == 1); + let src = self.ra.get_scalar(ssa[0]).into(); + self.phi_out.insert(*id, src); } else { - self.phi_out.insert(PhiComp::new(*id, 0), src.src_ref); + self.phi_out.insert(*id, src.src_ref); } } self.ra.free_killed(killed); @@ -723,12 +673,11 @@ impl AssignRegsBlock { for (id, dst) in phi.iter() { if let Dst::SSA(ssa) = dst { - for c in 0..ssa.comps() { - self.live_in.push(LiveValue { - live_ref: LiveRef::Phi(PhiComp::new(*id, c)), - reg_ref: self.ra.alloc_scalar(ssa.as_comp()), - }); - } + assert!(ssa.comps() == 1); + self.live_in.push(LiveValue { + live_ref: LiveRef::Phi(*id), + reg_ref: self.ra.alloc_scalar(ssa[0]), + }); } } @@ -748,9 +697,9 @@ impl AssignRegsBlock { * live in when we process the OpPhiDst, if any. */ for raf in &self.ra.files { - for (comp, reg) in &raf.ssa_reg { + for (ssa, reg) in &raf.ssa_reg { self.live_in.push(LiveValue { - live_ref: LiveRef::SSA(*comp), + live_ref: LiveRef::SSA(*ssa), reg_ref: RegRef::new(raf.file(), *reg, 1), }); } @@ -768,7 +717,7 @@ impl AssignRegsBlock { } } for src in instr.srcs() { - if let SrcRef::SSA(ssa) = &src.src_ref { + for ssa in src.iter_ssa() { if !bl.is_live_after(ssa, ip) { killed.insert(*ssa); } @@ -800,7 +749,7 @@ impl AssignRegsBlock { for lv in &target.live_in { let src = match lv.live_ref { LiveRef::SSA(ssa) => { - let reg = self.ra.file(ssa.file()).get_reg_comp(ssa); + let reg = self.ra.file(ssa.file()).get_reg(ssa); SrcRef::from(RegRef::new(ssa.file(), reg, 1)) } LiveRef::Phi(phi) => *self.phi_out.get(&phi).unwrap(), @@ -874,143 +823,3 @@ impl Shader { } } } - -struct TrivialRegAlloc { - next_reg: u8, - next_ureg: u8, - next_pred: u8, - next_upred: u8, - reg_map: HashMap, - phi_map: HashMap, -} - -impl TrivialRegAlloc { - pub fn new() -> TrivialRegAlloc { - TrivialRegAlloc { - next_reg: 16, /* Leave some space for FS outputs */ - next_ureg: 0, - next_pred: 0, - next_upred: 0, - reg_map: HashMap::new(), - phi_map: HashMap::new(), - } - } - - fn alloc_reg(&mut self, file: RegFile, comps: u8) -> RegRef { - let align = comps.next_power_of_two(); - let idx = match file { - RegFile::GPR => { - let idx = self.next_reg.next_multiple_of(align); - self.next_reg = idx + comps; - idx - } - RegFile::UGPR => { - let idx = self.next_ureg.next_multiple_of(align); - self.next_ureg = idx + comps; - idx - } - RegFile::Pred => { - let idx = self.next_pred.next_multiple_of(align); - self.next_pred = idx + comps; - idx - } - RegFile::UPred => { - let idx = self.next_upred.next_multiple_of(align); - self.next_upred = idx + comps; - idx - } - }; - RegRef::new(file, idx, comps) - } - - fn alloc_ssa(&mut self, ssa: SSAValue) -> RegRef { - let reg = self.alloc_reg(ssa.file(), ssa.comps()); - let old = self.reg_map.insert(ssa, reg); - assert!(old.is_none()); - reg - } - - fn get_ssa_reg(&self, ssa: SSAValue) -> RegRef { - *self.reg_map.get(&ssa).unwrap() - } - - fn map_src(&self, mut src: Src) -> Src { - if let SrcRef::SSA(ssa) = src.src_ref { - src.src_ref = self.get_ssa_reg(ssa).into(); - } - src - } - - pub fn do_alloc(&mut self, s: &mut Shader) { - for f in &mut s.functions { - for b in &mut f.blocks { - for instr in &mut b.instrs { - match &instr.op { - Op::PhiDsts(phi) => { - let mut pcopy = OpParCopy::new(); - - assert!(phi.ids.len() == phi.dsts.len()); - for (id, dst) in phi.iter() { - let dst_ssa = dst.as_ssa().unwrap(); - let dst_reg = self.alloc_ssa(*dst_ssa); - let src_reg = self - .alloc_reg(dst_ssa.file(), dst_ssa.comps()); - self.phi_map.insert(*id, src_reg); - pcopy.srcs.push(src_reg.into()); - pcopy.dsts.push(dst_reg.into()); - } - - instr.op = Op::ParCopy(pcopy); - } - _ => (), - } - } - } - } - - for f in &mut s.functions { - for b in &mut f.blocks { - for instr in &mut b.instrs { - match &instr.op { - Op::PhiSrcs(phi) => { - assert!(phi.ids.len() == phi.srcs.len()); - instr.op = Op::ParCopy(OpParCopy { - srcs: phi - .srcs - .iter() - .map(|src| self.map_src(*src)) - .collect(), - dsts: phi - .ids - .iter() - .map(|id| { - (*self.phi_map.get(id).unwrap()).into() - }) - .collect(), - }); - } - _ => { - if let Pred::SSA(ssa) = instr.pred { - instr.pred = self.get_ssa_reg(ssa).into(); - } - for dst in instr.dsts_mut() { - if let Dst::SSA(ssa) = dst { - *dst = self.alloc_ssa(*ssa).into(); - } - } - for src in instr.srcs_mut() { - *src = self.map_src(*src); - } - } - } - } - } - } - } -} - -impl Shader { - pub fn assign_regs_trivial(&mut self) { - TrivialRegAlloc::new().do_alloc(self); - } -} diff --git a/src/nouveau/compiler/nak_from_nir.rs b/src/nouveau/compiler/nak_from_nir.rs index 92ad70bb703..f8dda9bb540 100644 --- a/src/nouveau/compiler/nak_from_nir.rs +++ b/src/nouveau/compiler/nak_from_nir.rs @@ -22,8 +22,9 @@ struct ShaderFromNir<'a> { instrs: Vec, fs_out_regs: Vec, end_block_id: u32, + ssa_map: HashMap>, num_phis: u32, - phis: HashMap, + phi_map: HashMap<(u32, u8), u32>, } impl<'a> ShaderFromNir<'a> { @@ -41,32 +42,46 @@ impl<'a> ShaderFromNir<'a> { instrs: Vec::new(), fs_out_regs: fs_out_regs, end_block_id: 0, + ssa_map: HashMap::new(), num_phis: 0, - phis: HashMap::new(), + phi_map: HashMap::new(), } } - pub fn alloc_ssa(&mut self, file: RegFile, comps: u8) -> SSAValue { - self.func.as_mut().unwrap().ssa_alloc.alloc(file, comps) + fn alloc_ssa(&mut self, file: RegFile) -> SSAValue { + self.func.as_mut().unwrap().ssa_alloc.alloc(file) } - fn get_ssa(&self, def: &nir_def) -> SSAValue { - if def.bit_size == 1 { - SSAValue::new(RegFile::Pred, def.index, def.num_components) - } else { - assert!(def.bit_size == 32 || def.bit_size == 64); - let dwords = (def.bit_size / 32) * def.num_components; - //Src::new_ssa(def.index, dwords, !def.divergent) - SSAValue::new(RegFile::GPR, def.index, dwords) + fn get_ssa(&mut self, def: &nir_def) -> &[SSAValue] { + self.ssa_map.entry(def.index).or_insert_with(|| { + let (file, comps) = if def.bit_size == 1 { + (RegFile::Pred, def.num_components) + } else { + assert!(def.bit_size == 32 || def.bit_size == 64); + let comps = (def.bit_size / 32) * def.num_components; + (RegFile::GPR, comps) + }; + let mut vec = Vec::new(); + for i in 0..comps { + vec.push(self.func.as_mut().unwrap().ssa_alloc.alloc(file)) + } + vec + }) + } + + fn get_ssa_comp(&mut self, def: &nir_def, c: u8) -> SSARef { + let vec = self.get_ssa(def); + match def.bit_size { + 1 | 32 => vec[usize::from(c)].into(), + 64 => [vec[usize::from(c) * 2], vec[usize::from(c) * 2 + 1]].into(), + _ => panic!("Unsupported bit size"), } } - fn get_src(&self, src: &nir_src) -> Src { - self.get_ssa(&src.as_def()).into() - } - - fn get_dst(&self, dst: &nir_def) -> Dst { - self.get_ssa(dst).into() + fn get_src(&mut self, src: &nir_src) -> Src { + SSARef::try_from(self.get_ssa(&src.as_def())) + .unwrap() + .into() } fn get_io_addr_offset( @@ -81,85 +96,70 @@ impl<'a> ShaderFromNir<'a> { if let Some(base_def) = std::ptr::NonNull::new(addr_offset.base.def) { let base_def = unsafe { base_def.as_ref() }; - assert!(addr_offset.base.comp == 0); - let base = self.get_ssa(base_def); + let base_comp = u8::try_from(addr_offset.base.comp).unwrap(); + let base = self.get_ssa_comp(base_def, base_comp); (base.into(), addr_offset.offset) } else { (SrcRef::Zero.into(), addr_offset.offset) } } - fn get_alu_src(&mut self, alu_src: &nir_alu_src) -> Src { - if alu_src.src.num_components() == 1 { - self.get_src(&alu_src.src) - } else { - assert!(alu_src.src.bit_size() == 32); - let vec_src = self.get_src(&alu_src.src); - let comp = - self.alloc_ssa(vec_src.src_ref.as_ssa().unwrap().file(), 1); - let mut dsts = Vec::new(); - for c in 0..alu_src.src.num_components() { - if c == alu_src.swizzle[0] { - dsts.push(comp.into()); - } else { - dsts.push(Dst::None); - } - } - self.instrs.push(Instr::new_split(&dsts, vec_src)); - comp.into() - } + fn get_dst(&mut self, dst: &nir_def) -> Dst { + SSARef::try_from(self.get_ssa(dst)).unwrap().into() } - fn get_phi_id(&mut self, phi: &nir_phi_instr) -> u32 { - match self.phis.get(&phi.def.index) { - Some(id) => *id, - None => { - let id = self.num_phis; - self.num_phis += 1; - self.phis.insert(phi.def.index, id); - id - } - } - } - - fn split64(&mut self, ssa: SSAValue) -> [SSAValue; 2] { - assert!(ssa.comps() == 2); - let split = - [self.alloc_ssa(ssa.file(), 1), self.alloc_ssa(ssa.file(), 1)]; - let dsts = [split[0].into(), split[1].into()]; - self.instrs.push(Instr::new_split(&dsts, ssa.into())); - split - } - - fn split(&mut self, ssa: SSAValue) -> Vec { - if ssa.comps() == 1 { - return vec![ssa]; - } - - let mut split_ssa = Vec::new(); - let mut dsts = Vec::new(); - for c in 0..ssa.comps() { - let dst_ssa = self.alloc_ssa(ssa.file(), 1); - split_ssa.push(dst_ssa); - dsts.push(Dst::from(dst_ssa)); - } - self.instrs.push(Instr::new_split(&dsts, ssa.into())); - split_ssa - } - - fn vec(&mut self, ssa: &[SSAValue]) -> SSAValue { - let dst = self.alloc_ssa(ssa[0].file(), ssa.len().try_into().unwrap()); - let srcs: Vec = ssa.iter().map(|s| (*s).into()).collect(); - self.instrs.push(Instr::new_vec(dst.into(), &srcs)); - dst + fn get_phi_id(&mut self, phi: &nir_phi_instr, comp: u8) -> u32 { + let ssa = phi.def.as_def(); + *self.phi_map.entry((ssa.index, comp)).or_insert_with(|| { + let id = self.num_phis; + self.num_phis += 1; + id + }) } fn parse_alu(&mut self, alu: &nir_alu_instr) { let mut srcs = Vec::new(); - for alu_src in alu.srcs_as_slice() { - srcs.push(self.get_alu_src(alu_src)); + for (i, alu_src) in alu.srcs_as_slice().iter().enumerate() { + let bit_size = alu_src.src.bit_size(); + let comps = alu.src_components(i.try_into().unwrap()); + + let alu_src_ssa = self.get_ssa(&alu_src.src.as_def()); + let mut src_comps = Vec::new(); + for c in 0..comps { + let s = usize::from(alu_src.swizzle[usize::from(c)]); + if bit_size == 1 || bit_size == 32 { + src_comps.push(alu_src_ssa[s]); + } else if bit_size == 64 { + src_comps.push(alu_src_ssa[s * 2]); + src_comps.push(alu_src_ssa[s * 2 + 1]); + } else { + panic!("Unhandled bit size"); + } + } + srcs.push(Src::from(SSARef::try_from(src_comps).unwrap())); + } + + /* Handle vectors as a special case since they're the only ALU ops that + * can produce more than a 16B of data. + */ + match alu.op { + nir_op_mov | nir_op_vec2 | nir_op_vec3 | nir_op_vec4 + | nir_op_vec5 | nir_op_vec8 | nir_op_vec16 => { + let mut pcopy = OpParCopy::new(); + for src in srcs { + for v in src.as_ssa().unwrap().iter() { + pcopy.srcs.push((*v).into()); + } + } + for v in self.get_ssa(&alu.def.as_def()) { + pcopy.dsts.push((*v).into()); + } + assert!(pcopy.srcs.len() == pcopy.dsts.len()); + self.instrs.push(Instr::new(Op::ParCopy(pcopy))); + return; + } + _ => (), } - let srcs = srcs; let dst = self.get_dst(&alu.def); @@ -293,7 +293,7 @@ impl<'a> ShaderFromNir<'a> { }))); } nir_op_fsign => { - let lz = self.alloc_ssa(RegFile::GPR, 1); + let lz = self.alloc_ssa(RegFile::GPR); self.instrs.push(Instr::new_fset( lz.into(), FloatCmpOp::OrdLt, @@ -301,7 +301,7 @@ impl<'a> ShaderFromNir<'a> { Src::new_zero(), )); - let gz = self.alloc_ssa(RegFile::GPR, 1); + let gz = self.alloc_ssa(RegFile::GPR); self.instrs.push(Instr::new_fset( gz.into(), FloatCmpOp::OrdGt, @@ -332,15 +332,12 @@ impl<'a> ShaderFromNir<'a> { }))); } nir_op_iadd => { - if alu.def.bit_size() == 64 { - let x = self.split64(*srcs[0].as_ssa().unwrap()); - let y = self.split64(*srcs[1].as_ssa().unwrap()); - let carry = self.alloc_ssa(RegFile::Pred, 1); + if alu.def.bit_size == 64 { + let x = srcs[0].as_ssa().unwrap(); + let y = srcs[1].as_ssa().unwrap(); + let sum = dst.as_ssa().unwrap(); + let carry = self.alloc_ssa(RegFile::Pred); - let sum = [ - self.alloc_ssa(dst.as_ssa().unwrap().file(), 1), - self.alloc_ssa(dst.as_ssa().unwrap().file(), 1), - ]; self.instrs.push(Instr::new(Op::IAdd3(OpIAdd3 { dst: sum[0].into(), overflow: carry.into(), @@ -353,9 +350,6 @@ impl<'a> ShaderFromNir<'a> { srcs: [x[1].into(), y[1].into(), Src::new_zero()], carry: carry.into(), }))); - - let sum = [sum[0].into(), sum[1].into()]; - self.instrs.push(Instr::new_vec(dst, &sum)); } else { self.instrs.push(Instr::new_iadd(dst, srcs[0], srcs[1])); } @@ -458,14 +452,13 @@ impl<'a> ShaderFromNir<'a> { }))); } nir_op_imul_high | nir_op_umul_high => { - let dst64 = self.alloc_ssa(RegFile::GPR, 2); + let dst_hi = dst.as_ssa().unwrap()[0]; + let dst_lo = self.alloc_ssa(RegFile::GPR); self.instrs.push(Instr::new(Op::IMad64(OpIMad64 { - dst: dst64.into(), + dst: [dst_lo, dst_hi].into(), srcs: [srcs[0], srcs[1], Src::new_zero()], signed: alu.op == nir_op_imul_high, }))); - self.instrs - .push(Instr::new_split(&[Dst::None, dst], dst64.into())); } nir_op_ineg => { self.instrs.push(Instr::new(Op::IMov(OpIMov { @@ -539,7 +532,11 @@ impl<'a> ShaderFromNir<'a> { self.instrs.push(Instr::new_mov(dst, srcs[0])); } nir_op_pack_64_2x32_split => { - self.instrs.push(Instr::new_vec(dst, &[srcs[0], srcs[1]])); + let dst_ssa = dst.as_ssa().unwrap(); + let mut pcopy = OpParCopy::new(); + pcopy.push(srcs[0], dst_ssa[0].into()); + pcopy.push(srcs[1], dst_ssa[1].into()); + self.instrs.push(Instr::new(Op::ParCopy(pcopy))); } nir_op_u2f32 => { self.instrs.push(Instr::new_u2f(dst, srcs[0])); @@ -563,12 +560,12 @@ impl<'a> ShaderFromNir<'a> { )); } nir_op_unpack_64_2x32_split_x => { - self.instrs - .push(Instr::new_split(&[dst, Dst::None], srcs[0])); + let src0_x = srcs[0].as_ssa().unwrap()[0]; + self.instrs.push(Instr::new_mov(dst, src0_x.into())); } nir_op_unpack_64_2x32_split_y => { - self.instrs - .push(Instr::new_split(&[Dst::None, dst], srcs[0])); + let src0_y = srcs[0].as_ssa().unwrap()[1]; + self.instrs.push(Instr::new_mov(dst, src0_y.into())); } nir_op_ushr => { self.instrs.push(Instr::new(Op::Shf(OpShf { @@ -582,9 +579,6 @@ impl<'a> ShaderFromNir<'a> { dst_high: false, }))); } - nir_op_vec2 | nir_op_vec3 | nir_op_vec4 => { - self.instrs.push(Instr::new_vec(dst, &srcs)); - } _ => panic!("Unsupported ALU instruction: {}", alu.info().name()), } } @@ -644,24 +638,38 @@ impl<'a> ShaderFromNir<'a> { let mask = tex.def.components_read(); let mask = u8::try_from(mask).unwrap(); - let dst_comps = u8::try_from(mask.count_ones()).unwrap(); + let tex_dst = *self.get_dst(&tex.def).as_ssa().unwrap(); + let mut dst_comps = Vec::new(); + for (i, comp) in tex_dst.iter().enumerate() { + if mask & (1 << i) == 0 { + self.instrs + .push(Instr::new_mov((*comp).into(), Src::new_zero())); + } else { + dst_comps.push(*comp); + } + } + let mut dsts = [Dst::None; 2]; - dsts[0] = self.alloc_ssa(RegFile::GPR, min(dst_comps, 2)).into(); - if dst_comps > 2 { - dsts[1] = self.alloc_ssa(RegFile::GPR, dst_comps - 2).into(); + dsts[0] = SSARef::try_from(&dst_comps[..min(dst_comps.len(), 2)]) + .unwrap() + .into(); + if dst_comps.len() > 2 { + dsts[1] = SSARef::try_from(&dst_comps[2..]).unwrap().into(); } if tex.op == nir_texop_hdr_dim_nv { + let src = self.get_src(&srcs[0].src); self.instrs.push(Instr::new(Op::Txq(OpTxq { dsts: dsts, - src: self.get_src(&srcs[0].src), + src: src, query: TexQuery::Dimension, mask: mask, }))); } else if tex.op == nir_texop_tex_type_nv { + let src = self.get_src(&srcs[0].src); self.instrs.push(Instr::new(Op::Txq(OpTxq { dsts: dsts, - src: self.get_src(&srcs[0].src), + src: src, query: TexQuery::TextureType, mask: mask, }))); @@ -742,22 +750,6 @@ impl<'a> ShaderFromNir<'a> { }))); } } - - let mut dst_comps = Vec::new(); - for dst in dsts { - if let Dst::SSA(dst) = dst { - for comp in self.split(dst) { - dst_comps.push(comp.into()); - } - } - } - for c in 0..tex.def.num_components() { - if mask & (1 << c) == 0 { - dst_comps.insert(c.into(), SrcRef::Zero.into()); - } - } - self.instrs - .push(Instr::new_vec(self.get_dst(&tex.def), &dst_comps)); } fn parse_intrinsic(&mut self, intrin: &nir_intrinsic_instr) { @@ -797,21 +789,16 @@ impl<'a> ShaderFromNir<'a> { nir_intrinsic_load_barycentric_pixel => InterpLoc::Default, _ => panic!("Unsupported interp mode"), }; - let dst = self.get_dst(&intrin.def); - - let mut comps = Vec::new(); + let dst = *self.get_dst(&intrin.def).as_ssa().unwrap(); for c in 0..intrin.num_components { - let tmp = self.alloc_ssa(RegFile::GPR, 1); self.instrs.push(Instr::new(Op::Ipa(OpIpa { - dst: tmp.into(), - addr: addr + 4 * u16::try_from(c).unwrap(), + dst: dst[usize::from(c)].into(), + addr: addr + 4 * u16::from(c), freq: freq, loc: loc, offset: SrcRef::Zero.into(), }))); - comps.push(tmp.into()); } - self.instrs.push(Instr::new_vec(dst, &comps)); } nir_intrinsic_load_per_vertex_input => { let addr = u16::try_from(intrin.base()).unwrap(); @@ -828,21 +815,20 @@ impl<'a> ShaderFromNir<'a> { nir_intrinsic_load_ubo => { let idx = srcs[0]; let offset = srcs[1]; - let dst = self.get_dst(&intrin.def); - let dwords = - (intrin.def.bit_size() / 32) * intrin.def.num_components(); + let dst = *self.get_dst(&intrin.def).as_ssa().unwrap(); if let Some(imm_idx) = idx.as_uint() { let imm_idx = u8::try_from(imm_idx).unwrap(); if let Some(imm_offset) = offset.as_uint() { let imm_offset = u16::try_from(imm_offset).unwrap(); - let mut srcs = Vec::new(); - for i in 0..dwords { - srcs.push(Src::new_cbuf( + let mut pcopy = OpParCopy::new(); + for (i, dst) in dst.iter().enumerate() { + let src = Src::new_cbuf( imm_idx, - imm_offset + u16::from(i) * 4, - )); + imm_offset + u16::try_from(i).unwrap() * 4, + ); + pcopy.push(src, (*dst).into()); } - self.instrs.push(Instr::new_vec(dst, &srcs)); + self.instrs.push(Instr::new(Op::ParCopy(pcopy))); } else { panic!("Indirect UBO offsets not yet supported"); } @@ -869,16 +855,13 @@ impl<'a> ShaderFromNir<'a> { /* We assume these only ever happen in the last block. * This is ensured by nir_lower_io_to_temporaries() */ - let data = self.get_src(&srcs[0]); + let data = *self.get_src(&srcs[0]).as_ssa().unwrap(); assert!(srcs[1].is_zero()); let base: u8 = intrin.base().try_into().unwrap(); - let mut dsts = Vec::new(); for c in 0..intrin.num_components { - let tmp = self.alloc_ssa(RegFile::GPR, 1); - self.fs_out_regs[(base + c) as usize] = tmp.into(); - dsts.push(Dst::from(tmp)); + self.fs_out_regs[usize::from(base + c)] = + data[usize::from(c)].into(); } - self.instrs.push(Instr::new_split(&dsts, data)) } else { let data = self.get_src(&srcs[0]); let vtx = Src::new_zero(); @@ -895,23 +878,35 @@ impl<'a> ShaderFromNir<'a> { } fn parse_load_const(&mut self, load_const: &nir_load_const_instr) { - let dst = self.get_dst(&load_const.def); - let mut srcs = Vec::new(); + fn src_for_u32(u: u32) -> Src { + if u == 0 { + Src::new_zero() + } else { + Src::new_imm_u32(u) + } + } + + let mut pcopy = OpParCopy::new(); for c in 0..load_const.def.num_components { if load_const.def.bit_size == 1 { let imm_b1 = unsafe { load_const.values()[c as usize].b }; - srcs.push(Src::new_imm_bool(imm_b1)); - } else { - assert!(load_const.def.bit_size == 32); + pcopy.srcs.push(Src::new_imm_bool(imm_b1)); + } else if load_const.def.bit_size == 32 { let imm_u32 = unsafe { load_const.values()[c as usize].u32_ }; - srcs.push(if imm_u32 == 0 { - Src::new_zero() - } else { - Src::new_imm_u32(imm_u32) - }); + pcopy.srcs.push(src_for_u32(imm_u32)); + } else if load_const.def.bit_size == 64 { + let imm_u64 = unsafe { load_const.values()[c as usize].u64_ }; + pcopy.srcs.push(src_for_u32(imm_u64 as u32)); + pcopy.srcs.push(src_for_u32((imm_u64 >> 32) as u32)); } } - self.instrs.push(Instr::new_vec(dst, &srcs)); + + for sv in self.get_ssa(&load_const.def) { + pcopy.dsts.push((*sv).into()); + } + + assert!(pcopy.srcs.len() == pcopy.dsts.len()); + self.instrs.push(Instr::new(Op::ParCopy(pcopy))); } fn parse_undef(&mut self, _ssa_undef: &nir_undef_instr) { @@ -919,16 +914,15 @@ impl<'a> ShaderFromNir<'a> { } fn parse_block(&mut self, nb: &nir_block) { - let mut phi = OpPhiDsts { - ids: Vec::new(), - dsts: Vec::new(), - }; - + let mut phi = OpPhiDsts::new(); for ni in nb.iter_instr_list() { if ni.type_ == nir_instr_type_phi { let np = ni.as_phi().unwrap(); - phi.ids.push(self.get_phi_id(np)); - phi.dsts.push(self.get_dst(&np.def)); + let dst = *self.get_dst(&np.def).as_ssa().unwrap(); + for (i, dst) in dst.iter().enumerate() { + let phi_id = self.get_phi_id(np, i.try_into().unwrap()); + phi.push(phi_id, (*dst).into()); + } } else { break; } @@ -964,10 +958,7 @@ impl<'a> ShaderFromNir<'a> { None => continue, }; - let mut phi = OpPhiSrcs { - srcs: Vec::new(), - ids: Vec::new(), - }; + let mut phi = OpPhiSrcs::new(); for i in sb.iter_instr_list() { let np = match i.as_phi() { @@ -977,8 +968,12 @@ impl<'a> ShaderFromNir<'a> { for ps in np.iter_srcs() { if ps.pred().index == nb.index { - phi.srcs.push(self.get_src(&ps.src)); - phi.ids.push(self.get_phi_id(np)); + let src = *self.get_src(&ps.src).as_ssa().unwrap(); + for (i, src) in src.iter().enumerate() { + let phi_id = + self.get_phi_id(np, i.try_into().unwrap()); + phi.push(phi_id, (*src).into()); + } break; } } @@ -1007,7 +1002,7 @@ impl<'a> ShaderFromNir<'a> { } fn parse_if(&mut self, ni: &nir_if) { - let cond = self.get_ssa(&ni.condition.as_def()); + let cond = self.get_ssa(&ni.condition.as_def())[0]; let if_bra = self.blocks.last_mut().unwrap().branch_mut().unwrap(); if_bra.pred = cond.into(); @@ -1040,7 +1035,7 @@ impl<'a> ShaderFromNir<'a> { } pub fn parse_function_impl(&mut self, nfi: &nir_function_impl) -> Function { - self.func = Some(Function::new(0, nfi.ssa_alloc)); + self.func = Some(Function::new(0)); self.end_block_id = nfi.end_block().index; self.parse_cf_list(nfi.iter_body()); diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs index c5a8daa4be8..68f2fec9067 100644 --- a/src/nouveau/compiler/nak_ir.rs +++ b/src/nouveau/compiler/nak_ir.rs @@ -8,7 +8,7 @@ extern crate nak_ir_proc; use nak_ir_proc::*; use std::fmt; use std::iter::Zip; -use std::ops::{BitAnd, BitOr, Not, Range}; +use std::ops::{BitAnd, BitOr, Deref, DerefMut, Not, Range}; use std::slice; #[repr(u8)] @@ -111,32 +111,23 @@ pub struct SSAValue { } impl SSAValue { - pub fn new(file: RegFile, idx: u32, comps: u8) -> SSAValue { - assert!(idx < (1 << 27)); + pub const NONE: Self = SSAValue { packed: 0 }; + + pub fn new(file: RegFile, idx: u32) -> SSAValue { + /* Reserve 2 numbers for use for SSARef::comps() */ + assert!(idx > 0 && idx < (1 << 30) - 2); let mut packed = idx; - assert!(comps > 0 && comps <= 8); - packed |= u32::from(comps - 1) << 27; assert!(u8::from(file) < 4); packed |= u32::from(u8::from(file)) << 30; SSAValue { packed: packed } } pub fn idx(&self) -> u32 { - self.packed & 0x07ffffff + self.packed & 0x3fffffff } - pub fn comps(&self) -> u8 { - (((self.packed >> 27) & 0x7) + 1).try_into().unwrap() - } - - pub fn comp(&self, comp: u8) -> SSAComp { - assert!(comp < self.comps()); - SSAComp::new(self.file(), self.idx(), comp) - } - - pub fn as_comp(&self) -> SSAComp { - assert!(self.comps() == 1); - SSAComp::new(self.file(), self.idx(), 0) + pub fn is_none(&self) -> bool { + self.packed == 0 } } @@ -148,38 +139,125 @@ impl HasRegFile for SSAValue { impl fmt::Display for SSAValue { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if self.is_uniform() { - write!(f, "USSA{}@{}", self.idx(), self.comps()) - } else { - write!(f, "SSA{}@{}", self.idx(), self.comps()) + match self.file() { + RegFile::GPR => write!(f, "S")?, + RegFile::UGPR => write!(f, "US")?, + RegFile::Pred => write!(f, "PS")?, + RegFile::UPred => write!(f, "UPS")?, } + write!(f, "{}", self.idx()) } } #[derive(Clone, Copy, Eq, Hash, PartialEq)] -pub struct SSAComp { - v: SSAValue, +pub struct SSARef { + v: [SSAValue; 4], } -impl SSAComp { - pub fn new(file: RegFile, idx: u32, comp: u8) -> SSAComp { - SSAComp { - v: SSAValue::new(file, idx, comp + 1), +impl SSARef { + #[inline] + fn new(comps: &[SSAValue]) -> SSARef { + assert!(comps.len() > 0 && comps.len() <= 4); + let mut r = SSARef { + v: [SSAValue::NONE; 4], + }; + for i in 0..comps.len() { + r.v[i] = comps[i]; + } + if comps.len() < 4 { + r.v[3].packed = (comps.len() as u32).wrapping_neg(); + } + r + } + + pub fn comps(&self) -> u8 { + if self.v[3].packed >= u32::MAX - 2 { + self.v[3].packed.wrapping_neg() as u8 + } else { + 4 } } +} - pub fn idx(&self) -> u32 { - self.v.idx() - } - - pub fn comp(&self) -> u8 { - self.v.comps() - 1 +impl HasRegFile for SSARef { + fn file(&self) -> RegFile { + let comps = usize::from(self.comps()); + for i in 1..comps { + assert!(self.v[i].file() == self.v[0].file()); + } + self.v[0].file() } } -impl HasRegFile for SSAComp { - fn file(&self) -> RegFile { - self.v.file() +impl Deref for SSARef { + type Target = [SSAValue]; + + fn deref(&self) -> &[SSAValue] { + let comps = usize::from(self.comps()); + &self.v[..comps] + } +} + +impl DerefMut for SSARef { + fn deref_mut(&mut self) -> &mut [SSAValue] { + let comps = usize::from(self.comps()); + &mut self.v[..comps] + } +} + +impl TryFrom<&[SSAValue]> for SSARef { + type Error = &'static str; + + fn try_from(comps: &[SSAValue]) -> Result { + if comps.len() == 0 { + Err("Empty vector") + } else if comps.len() > 4 { + Err("Too many vector components") + } else { + Ok(SSARef::new(comps)) + } + } +} + +impl TryFrom> for SSARef { + type Error = &'static str; + + fn try_from(comps: Vec) -> Result { + SSARef::try_from(&comps[..]) + } +} + +macro_rules! impl_ssa_ref_from_arr { + ($n: expr) => { + impl From<[SSAValue; $n]> for SSARef { + fn from(comps: [SSAValue; $n]) -> Self { + SSARef::new(&comps[..]) + } + } + }; +} +impl_ssa_ref_from_arr!(1); +impl_ssa_ref_from_arr!(2); +impl_ssa_ref_from_arr!(3); +impl_ssa_ref_from_arr!(4); + +impl From for SSARef { + fn from(val: SSAValue) -> Self { + [val].into() + } +} + +impl fmt::Display for SSARef { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.comps() == 1 { + write!(f, "{}", self[0]) + } else { + write!(f, "{{")?; + for v in self.iter() { + write!(f, " {}", v)?; + } + write!(f, " }}") + } } } @@ -188,20 +266,17 @@ pub struct SSAValueAllocator { } impl SSAValueAllocator { - pub fn new(initial_count: u32) -> SSAValueAllocator { - SSAValueAllocator { - count: initial_count, - } + pub fn new() -> SSAValueAllocator { + SSAValueAllocator { count: 0 } } pub fn count(&self) -> u32 { self.count } - pub fn alloc(&mut self, file: RegFile, comps: u8) -> SSAValue { - let idx = self.count; + pub fn alloc(&mut self, file: RegFile) -> SSAValue { self.count += 1; - SSAValue::new(file, idx, comps) + SSAValue::new(file, self.count) } } @@ -282,7 +357,7 @@ impl fmt::Display for RegRef { #[derive(Clone, Copy)] pub enum Dst { None, - SSA(SSAValue), + SSA(SSARef), Reg(RegRef), } @@ -294,7 +369,7 @@ impl Dst { } } - pub fn as_ssa(&self) -> Option<&SSAValue> { + pub fn as_ssa(&self) -> Option<&SSARef> { match self { Dst::SSA(r) => Some(r), _ => None, @@ -308,9 +383,9 @@ impl From for Dst { } } -impl From for Dst { - fn from(ssa: SSAValue) -> Dst { - Dst::SSA(ssa) +impl> From for Dst { + fn from(ssa: T) -> Dst { + Dst::SSA(ssa.into()) } } @@ -345,7 +420,7 @@ pub enum SrcRef { False, Imm32(u32), CBuf(CBufRef), - SSA(SSAValue), + SSA(SSARef), Reg(RegRef), } @@ -357,7 +432,7 @@ impl SrcRef { } } - pub fn as_ssa(&self) -> Option<&SSAValue> { + pub fn as_ssa(&self) -> Option<&SSARef> { match self { SrcRef::SSA(r) => Some(r), _ => None, @@ -379,19 +454,20 @@ impl SrcRef { } } - pub fn get_ssa(&self) -> Option<&SSAValue> { + pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> { match self { SrcRef::Zero | SrcRef::True | SrcRef::False | SrcRef::Imm32(_) - | SrcRef::Reg(_) => None, + | SrcRef::Reg(_) => &[], SrcRef::CBuf(cb) => match &cb.buf { - CBuf::Binding(_) | CBuf::BindlessGPR(_) => None, - CBuf::BindlessSSA(ssa) => Some(ssa), + CBuf::Binding(_) | CBuf::BindlessGPR(_) => &[], + CBuf::BindlessSSA(ssa) => slice::from_ref(ssa), }, - SrcRef::SSA(ssa) => Some(ssa), + SrcRef::SSA(ssa) => ssa, } + .iter() } } @@ -401,9 +477,9 @@ impl From for SrcRef { } } -impl From for SrcRef { - fn from(ssa: SSAValue) -> SrcRef { - SrcRef::SSA(ssa) +impl> From for SrcRef { + fn from(ssa: T) -> SrcRef { + SrcRef::SSA(ssa.into()) } } @@ -484,7 +560,7 @@ impl SrcMod { } } - pub fn abs(&self) -> SrcMod { + pub fn abs(self) -> SrcMod { match self { SrcMod::None | SrcMod::Abs | SrcMod::Neg | SrcMod::NegAbs => { SrcMod::Abs @@ -493,7 +569,7 @@ impl SrcMod { } } - pub fn neg(&self) -> SrcMod { + pub fn neg(self) -> SrcMod { match self { SrcMod::None => SrcMod::Neg, SrcMod::Abs => SrcMod::NegAbs, @@ -503,13 +579,23 @@ impl SrcMod { } } - pub fn not(&self) -> SrcMod { + pub fn not(self) -> SrcMod { match self { SrcMod::None => SrcMod::Not, SrcMod::Not => SrcMod::None, _ => panic!("Not a boolean source modifier"), } } + + pub fn modify(self, other: SrcMod) -> SrcMod { + match other { + SrcMod::None => self, + SrcMod::Abs => self.abs(), + SrcMod::Neg => self.neg(), + SrcMod::NegAbs => self.abs().neg(), + SrcMod::Not => self.not(), + } + } } #[derive(Clone, Copy)] @@ -560,7 +646,7 @@ impl Src { } } - pub fn as_ssa(&self) -> Option<&SSAValue> { + pub fn as_ssa(&self) -> Option<&SSARef> { if self.src_mod.is_none() { self.src_ref.as_ssa() } else { @@ -572,8 +658,8 @@ impl Src { self.src_ref.get_reg() } - pub fn get_ssa(&self) -> Option<&SSAValue> { - self.src_ref.get_ssa() + pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> { + self.src_ref.iter_ssa() } pub fn is_uniform(&self) -> bool { @@ -615,27 +701,15 @@ impl Src { } } -impl From for Src { - fn from(src_ref: SrcRef) -> Src { +impl> From for Src { + fn from(value: T) -> Src { Src { - src_ref: src_ref, + src_ref: value.into(), src_mod: SrcMod::None, } } } -impl From for Src { - fn from(reg: RegRef) -> Src { - SrcRef::from(reg).into() - } -} - -impl From for Src { - fn from(ssa: SSAValue) -> Src { - SrcRef::from(ssa).into() - } -} - impl fmt::Display for Src { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self.src_mod { @@ -1928,6 +2002,24 @@ impl fmt::Display for OpFMov { } } +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpDMov { + pub dst: Dst, + pub src: Src, + pub saturate: bool, +} + +impl fmt::Display for OpDMov { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "DMOV")?; + if self.saturate { + write!(f, ".SAT")?; + } + write!(f, " {} {}", self.dst, self.src) + } +} + #[repr(C)] #[derive(SrcsAsSlice, DstsAsSlice)] pub struct OpIMov { @@ -1941,60 +2033,6 @@ impl fmt::Display for OpIMov { } } -#[repr(C)] -#[derive(DstsAsSlice)] -pub struct OpVec { - pub dst: Dst, - pub srcs: Vec, -} - -impl SrcsAsSlice for OpVec { - fn srcs_as_slice(&self) -> &[Src] { - &self.srcs - } - - fn srcs_as_mut_slice(&mut self) -> &mut [Src] { - &mut self.srcs - } -} - -impl fmt::Display for OpVec { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "VEC {} {{ {}", self.dst, self.srcs[0])?; - for src in &self.srcs[1..] { - write!(f, " {}", src)?; - } - write!(f, " }}") - } -} - -#[repr(C)] -#[derive(SrcsAsSlice)] -pub struct OpSplit { - pub dsts: Vec, - pub src: Src, -} - -impl DstsAsSlice for OpSplit { - fn dsts_as_slice(&self) -> &[Dst] { - &self.dsts - } - - fn dsts_as_mut_slice(&mut self) -> &mut [Dst] { - &mut self.dsts - } -} - -impl fmt::Display for OpSplit { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "SPLIT {{ {}", self.dsts[0])?; - for dst in &self.dsts[1..] { - write!(f, " {}", dst)?; - } - write!(f, " }} {}", self.src) - } -} - #[repr(C)] #[derive(DstsAsSlice)] pub struct OpPhiSrcs { @@ -2019,6 +2057,12 @@ impl OpPhiSrcs { assert!(self.ids.len() == self.srcs.len()); self.ids.iter().zip(self.srcs.iter()) } + + pub fn push(&mut self, id: u32, src: Src) { + assert!(self.ids.len() == self.srcs.len()); + self.ids.push(id); + self.srcs.push(src); + } } impl SrcsAsSlice for OpPhiSrcs { @@ -2053,6 +2097,13 @@ pub struct OpPhiDsts { } impl OpPhiDsts { + pub fn new() -> OpPhiDsts { + OpPhiDsts { + ids: Vec::new(), + dsts: Vec::new(), + } + } + pub fn is_empty(&self) -> bool { assert!(self.ids.len() == self.dsts.len()); self.ids.is_empty() @@ -2062,6 +2113,12 @@ impl OpPhiDsts { assert!(self.ids.len() == self.dsts.len()); self.ids.iter().zip(self.dsts.iter()) } + + pub fn push(&mut self, id: u32, dst: Dst) { + assert!(self.ids.len() == self.dsts.len()); + self.ids.push(id); + self.dsts.push(dst); + } } impl DstsAsSlice for OpPhiDsts { @@ -2128,6 +2185,12 @@ impl OpParCopy { assert!(self.srcs.len() == self.dsts.len()); self.srcs.iter().zip(&self.dsts) } + + pub fn push(&mut self, src: Src, dst: Dst) { + assert!(self.srcs.len() == self.dsts.len()); + self.srcs.push(src); + self.dsts.push(dst); + } } impl SrcsAsSlice for OpParCopy { @@ -2226,11 +2289,10 @@ pub enum Op { Exit(OpExit), S2R(OpS2R), FMov(OpFMov), + DMov(OpDMov), IMov(OpIMov), PhiSrcs(OpPhiSrcs), PhiDsts(OpPhiDsts), - Vec(OpVec), - Split(OpSplit), Swap(OpSwap), ParCopy(OpParCopy), FSOut(OpFSOut), @@ -2281,13 +2343,7 @@ impl fmt::Display for Pred { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Pred::None => (), - Pred::SSA(v) => { - if v.is_uniform() { - write!(f, "USSA{}@{}", v.idx(), v.comps())?; - } else { - write!(f, "SSA{}@{}", v.idx(), v.comps())?; - } - } + Pred::SSA(s) => s.fmt(f)?, Pred::Reg(r) => r.fmt(f)?, } Ok(()) @@ -2598,20 +2654,6 @@ impl Instr { Instr::new(Op::S2R(OpS2R { dst: dst, idx: idx })) } - pub fn new_vec(dst: Dst, srcs: &[Src]) -> Instr { - Instr::new(Op::Vec(OpVec { - dst: dst, - srcs: srcs.to_vec(), - })) - } - - pub fn new_split(dsts: &[Dst], src: Src) -> Instr { - Instr::new(Op::Split(OpSplit { - dsts: dsts.to_vec(), - src: src, - })) - } - pub fn new_swap(x: RegRef, y: RegRef) -> Instr { assert!(x.file() == y.file()); Instr::new(Op::Swap(OpSwap { @@ -2694,11 +2736,10 @@ impl Instr { Op::St(_) => None, Op::Bra(_) | Op::Exit(_) => Some(15), Op::FMov(_) + | Op::DMov(_) | Op::IMov(_) | Op::PhiSrcs(_) | Op::PhiDsts(_) - | Op::Vec(_) - | Op::Split(_) | Op::Swap(_) | Op::ParCopy(_) | Op::FSOut(_) => { @@ -2796,10 +2837,10 @@ pub struct Function { } impl Function { - pub fn new(id: u32, reserved_ssa_count: u32) -> Function { + pub fn new(id: u32) -> Function { Function { id: id, - ssa_alloc: SSAValueAllocator::new(reserved_ssa_count), + ssa_alloc: SSAValueAllocator::new(), blocks: Vec::new(), } } @@ -2864,46 +2905,6 @@ impl Shader { carry: Src::new_imm_bool(false), }))] } - Op::Vec(vec) => { - let comps = u8::try_from(vec.srcs.len()).unwrap(); - let vec_dst = vec.dst.as_reg().unwrap(); - assert!(comps == vec_dst.comps()); - - let mut dsts = Vec::new(); - for i in 0..comps { - dsts.push(Dst::from(vec_dst.as_comp(i).unwrap())); - } - vec![Instr::new(Op::ParCopy(OpParCopy { - srcs: vec.srcs, - dsts: dsts, - }))] - } - Op::Split(split) => { - let vec_src = split.src.src_ref.as_reg().unwrap(); - assert!(usize::from(vec_src.comps()) == split.dsts.len()); - - let mut dsts = Vec::new(); - let mut srcs = Vec::new(); - for (i, dst) in split.dsts.iter().enumerate() { - let i = u8::try_from(i).unwrap(); - let src = vec_src.as_comp(i).unwrap(); - match dst { - Dst::None => continue, - Dst::Reg(reg) => { - if *reg == src { - continue; - } - } - _ => (), - } - dsts.push(*dst); - srcs.push(src.into()); - } - vec![Instr::new(Op::ParCopy(OpParCopy { - srcs: srcs, - dsts: dsts, - }))] - } Op::FSOut(out) => { let mut pcopy = OpParCopy::new(); for (i, src) in out.srcs.iter().enumerate() { diff --git a/src/nouveau/compiler/nak_legalize.rs b/src/nouveau/compiler/nak_legalize.rs index ef401502383..82966db8469 100644 --- a/src/nouveau/compiler/nak_legalize.rs +++ b/src/nouveau/compiler/nak_legalize.rs @@ -52,7 +52,7 @@ impl<'a> LegalizeInstr<'a> { } pub fn mov_src(&mut self, src: &mut Src, file: RegFile) { - let val = self.ssa_alloc.alloc(file, 1); + let val = self.ssa_alloc.alloc(file); self.instrs .push(Instr::new_mov(val.into(), src.src_ref.into())); src.src_ref = val.into(); diff --git a/src/nouveau/compiler/nak_liveness.rs b/src/nouveau/compiler/nak_liveness.rs index f5d6fae54c6..0224fde674e 100644 --- a/src/nouveau/compiler/nak_liveness.rs +++ b/src/nouveau/compiler/nak_liveness.rs @@ -47,14 +47,16 @@ impl BlockLiveness { } for src in instr.srcs() { - if let Some(val) = src.get_ssa() { - self.add_use(val, ip); + for sv in src.iter_ssa() { + self.add_use(sv, ip); } } for dst in instr.dsts() { - if let Dst::SSA(val) = dst { - self.add_def(val); + if let Dst::SSA(sr) = dst { + for sv in sr.iter() { + self.add_def(sv); + } } } } diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index 42da7a42b32..2a49ee27240 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -538,6 +538,8 @@ nak_postprocess_nir(nir_shader *nir, const struct nak_compiler *nak) }; OPT(nir, nir_lower_mem_access_bit_sizes, &mem_bit_size_options); + nak_optimize_nir(nir, nak); + OPT(nir, nak_nir_lower_tex, nak); OPT(nir, nir_lower_idiv, NULL); OPT(nir, nir_lower_int64); diff --git a/src/nouveau/compiler/nak_opt_copy_prop.rs b/src/nouveau/compiler/nak_opt_copy_prop.rs index 7e3e90046f2..dd9b5363c68 100644 --- a/src/nouveau/compiler/nak_opt_copy_prop.rs +++ b/src/nouveau/compiler/nak_opt_copy_prop.rs @@ -13,6 +13,7 @@ enum CopyType { Raw, Bits, F32, + F64H, I32, } @@ -32,7 +33,7 @@ impl CopyEntry { } struct CopyPropPass { - ssa_map: HashMap>, + ssa_map: HashMap, } impl CopyPropPass { @@ -42,55 +43,46 @@ impl CopyPropPass { } } - fn add_copy(&mut self, dst: &SSAValue, typ: CopyType, src_vec: &[Src]) { - let entries = src_vec - .iter() - .map(|src| { - match typ { - CopyType::Raw => assert!(src.src_mod.is_none()), - CopyType::Bits => assert!(src.src_mod.is_bitwise()), - CopyType::F32 | CopyType::I32 => { - assert!(src.src_mod.is_alu()) - } - } - CopyEntry { - typ: match src.src_mod { - SrcMod::None => CopyType::Raw, - SrcMod::Abs | SrcMod::Neg | SrcMod::NegAbs => { - assert!( - typ != CopyType::Raw && typ != CopyType::Bits - ); - typ - } - SrcMod::Not => { - assert!(typ == CopyType::Bits); - typ - } - }, - src: *src, - } - }) - .collect(); - self.ssa_map.insert(*dst, entries); + fn add_copy(&mut self, dst: SSAValue, typ: CopyType, src: Src) { + match typ { + CopyType::Raw => assert!(src.src_mod.is_none()), + CopyType::Bits => assert!(src.src_mod.is_bitwise()), + CopyType::F32 | CopyType::F64H | CopyType::I32 => { + assert!(src.src_mod.is_alu()) + } + } + let typ = match src.src_mod { + SrcMod::None => CopyType::Raw, + SrcMod::Abs | SrcMod::Neg | SrcMod::NegAbs => { + assert!(typ != CopyType::Raw && typ != CopyType::Bits); + typ + } + SrcMod::Not => { + assert!(typ == CopyType::Bits); + typ + } + }; + if let Some(ssa) = src.src_ref.as_ssa() { + assert!(ssa.comps() == 1); + } + + self.ssa_map.insert(dst, CopyEntry { typ: typ, src: src }); } - fn add_copy_entry(&mut self, dst: &SSAValue, entry: CopyEntry) { - self.ssa_map.insert(*dst, vec![entry]); - } - - fn get_copy(&mut self, dst: &SSAValue) -> Option<&Vec> { + fn get_copy(&mut self, dst: &SSAValue) -> Option<&CopyEntry> { self.ssa_map.get(dst) } fn prop_to_pred(&mut self, pred: &mut Pred, pred_inv: &mut bool) { if let Pred::SSA(src_ssa) = pred { - if let Some(src_vec) = self.get_copy(&src_ssa) { - let entry = &src_vec[0]; + if let Some(entry) = self.get_copy(&src_ssa) { if !entry.supports_type(CopyType::Bits) { return; } - *pred = Pred::SSA(*entry.src.src_ref.as_ssa().unwrap()); + let copy_ssa = entry.src.src_ref.as_ssa().unwrap(); + assert!(copy_ssa.comps() == 1 && copy_ssa.is_predicate()); + *pred = Pred::SSA(copy_ssa[0]); if entry.src.src_mod.has_not() { *pred_inv = !*pred_inv; } @@ -100,43 +92,63 @@ impl CopyPropPass { fn prop_to_src(&mut self, src: &mut Src, src_typ: CopyType) -> bool { if let SrcRef::SSA(src_ssa) = src.src_ref { - if src_ssa.comps() != 1 { - return false; /* TODO */ + let mut found_copy = false; + let mut copy_mod = src.src_mod; + let mut copy_vals = [SSAValue::NONE; 4]; + for c in 0..src_ssa.comps() { + let c_val = &src_ssa[usize::from(c)]; + if let Some(entry) = self.get_copy(c_val) { + let c_typ = match src_typ { + CopyType::Raw => CopyType::Raw, + CopyType::Bits | CopyType::F32 | CopyType::I32 => { + assert!(src_ssa.comps() == 1); + src_typ + } + CopyType::F64H => { + assert!(src_ssa.comps() == 2); + /* The low bits of a 64-bit value are read raw */ + if c == 0 { + CopyType::Raw + } else { + CopyType::F64H + } + } + }; + + if !entry.supports_type(c_typ) { + return false; + } + + if c_typ != CopyType::Raw { + assert!(c == src_ssa.comps() - 1); + copy_mod = entry.src.src_mod.modify(src.src_mod); + } + + if let Some(e_ssa) = entry.src.as_ssa() { + assert!(e_ssa.comps() == 1); + found_copy = true; + copy_vals[usize::from(c)] = e_ssa[0]; + } else if src_ssa.comps() == 1 { + src.src_mod = copy_mod; + src.src_ref = entry.src.src_ref; + return true; + } else { + return false; + } + } else { + copy_vals[usize::from(c)] = *c_val; + } } - if let Some(src_vec) = self.get_copy(&src_ssa) { - let entry = &src_vec[0]; - if !entry.supports_type(src_typ) { - return false; - } - - let mut new_src = entry.src; - match src_typ { - CopyType::Raw => { - assert!(src.src_mod.is_none()); - } - CopyType::Bits => { - if src.src_mod.has_neg() { - new_src.src_mod = new_src.src_mod.neg(); - } - } - CopyType::F32 | CopyType::I32 => { - if src.src_mod.has_abs() { - new_src.src_mod = new_src.src_mod.abs(); - } - if src.src_mod.has_neg() { - new_src.src_mod = new_src.src_mod.neg(); - } - } - } - *src = new_src; - true - } else { - false + if found_copy { + let comps = usize::from(src_ssa.comps()); + let copy_ssa = SSARef::try_from(©_vals[..comps]).unwrap(); + src.src_mod = copy_mod; + src.src_ref = copy_ssa.into(); + return true; } - } else { - false } + false } fn prop_to_srcs(&mut self, srcs: &mut [Src], src_typ: CopyType) -> bool { @@ -151,40 +163,51 @@ impl CopyPropPass { for b in &mut f.blocks { for instr in &mut b.instrs { match &instr.op { + Op::Mov(mov) => { + let dst = mov.dst.as_ssa().unwrap(); + assert!(dst.comps() == 1); + if mov.quad_lanes == 0xf { + self.add_copy(dst[0], CopyType::Raw, mov.src); + } + } Op::FMov(mov) => { + let dst = mov.dst.as_ssa().unwrap(); + assert!(dst.comps() == 1); if !mov.saturate { - self.add_copy( - mov.dst.as_ssa().unwrap(), - CopyType::F32, - slice::from_ref(&mov.src), - ); + self.add_copy(dst[0], CopyType::F32, mov.src); + } + } + Op::DMov(mov) => { + let dst = mov.dst.as_ssa().unwrap(); + assert!(dst.comps() == 2); + if !mov.saturate { + if let Some(src) = mov.src.src_ref.as_ssa() { + self.add_copy( + dst[0], + CopyType::Bits, + src[0].into(), + ); + self.add_copy( + dst[1], + CopyType::F64H, + Src { + src_ref: src[1].into(), + src_mod: mov.src.src_mod, + }, + ); + } } } Op::IMov(mov) => { - self.add_copy( - mov.dst.as_ssa().unwrap(), - CopyType::I32, - slice::from_ref(&mov.src), - ); + let dst = mov.dst.as_ssa().unwrap(); + assert!(dst.comps() == 1); + self.add_copy(dst[0], CopyType::I32, mov.src); } - Op::Vec(vec) => { - self.add_copy( - vec.dst.as_ssa().unwrap(), - CopyType::Raw, - &vec.srcs, - ); - } - Op::Split(split) => { - assert!(split.src.src_mod.is_none()); - let src_ssa = split.src.src_ref.as_ssa().unwrap(); - if let Some(src_vec) = self.get_copy(src_ssa) { - let mut src_vec = src_vec.clone(); - assert!(src_vec.len() == split.dsts.len()); - for (i, entry) in src_vec.drain(..).enumerate() { - if let Dst::SSA(ssa) = &split.dsts[i] { - self.add_copy_entry(ssa, entry); - } - } + Op::ParCopy(pcopy) => { + for (src, dst) in pcopy.iter() { + let dst = dst.as_ssa().unwrap(); + assert!(dst.comps() == 1); + self.add_copy(dst[0], CopyType::Raw, *src); } } _ => (), diff --git a/src/nouveau/compiler/nak_opt_dce.rs b/src/nouveau/compiler/nak_opt_dce.rs index a8b2307bd78..9b9474edfb2 100644 --- a/src/nouveau/compiler/nak_opt_dce.rs +++ b/src/nouveau/compiler/nak_opt_dce.rs @@ -30,7 +30,9 @@ impl DeadCodePass { fn mark_src_live(&mut self, src: &Src) { if let SrcRef::SSA(ssa) = &src.src_ref { - self.mark_ssa_live(ssa); + for val in ssa.iter() { + self.mark_ssa_live(val); + } } } @@ -40,7 +42,14 @@ impl DeadCodePass { fn is_dst_live(&self, dst: &Dst) -> bool { match dst { - Dst::SSA(ssa) => self.live_ssa.get(ssa).is_some(), + Dst::SSA(ssa) => { + for val in ssa.iter() { + if self.live_ssa.get(val).is_some() { + return true; + } + } + false + } Dst::None => false, _ => panic!("Invalid SSA destination"), }