diff --git a/src/nouveau/compiler/nak.rs b/src/nouveau/compiler/nak.rs index 1ddf6de5fa8..0ac6983d52b 100644 --- a/src/nouveau/compiler/nak.rs +++ b/src/nouveau/compiler/nak.rs @@ -10,6 +10,7 @@ mod nak_encode_sm75; mod nak_from_nir; mod nak_ir; mod nak_legalize; +mod nak_lower_par_copies; mod nak_opt_copy_prop; mod nak_opt_dce; mod nir; @@ -331,6 +332,8 @@ pub extern "C" fn nak_compile_shader( s.assign_regs_trivial(); s.lower_vec_split(); + s.lower_par_copies(); + s.lower_swap(); s.lower_mov_predicate(); s.calc_instr_deps(); diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs index 2fac36c08eb..a86b6062c05 100644 --- a/src/nouveau/compiler/nak_ir.rs +++ b/src/nouveau/compiler/nak_ir.rs @@ -1405,6 +1405,63 @@ impl fmt::Display for OpPhiDst { } } +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpSwap { + pub dsts: [Dst; 2], + pub srcs: [Src; 2], +} + +impl fmt::Display for OpSwap { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "SWAP {{ {} {} }} {{ {} {} }}", + self.dsts[0], self.dsts[1], self.srcs[0], self.srcs[1] + ) + } +} + +#[repr(C)] +pub struct OpParCopy { + pub dsts: Vec, + pub srcs: Vec, +} + +impl SrcsAsSlice for OpParCopy { + fn srcs_as_slice(&self) -> &[Src] { + &self.srcs + } + + fn srcs_as_mut_slice(&mut self) -> &mut [Src] { + &mut self.srcs + } +} + +impl DstsAsSlice for OpParCopy { + fn dsts_as_slice(&self) -> &[Dst] { + &self.dsts + } + + fn dsts_as_mut_slice(&mut self) -> &mut [Dst] { + &mut self.dsts + } +} + +impl fmt::Display for OpParCopy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "PAR_COPY {{")?; + assert!(self.srcs.len() == self.dsts.len()); + for i in 0..self.srcs.len() { + if i > 0 { + write!(f, ",")?; + } + write!(f, " {} <- {}", self.dsts[i], self.srcs[i])?; + } + write!(f, " }}") + } +} + #[repr(C)] #[derive(DstsAsSlice)] pub struct OpFSOut { @@ -1457,6 +1514,8 @@ pub enum Op { PhiDst(OpPhiDst), Vec(OpVec), Split(OpSplit), + Swap(OpSwap), + ParCopy(OpParCopy), FSOut(OpFSOut), } @@ -1695,6 +1754,11 @@ impl Instr { })) } + pub fn new_xor(dst: Dst, x: Src, y: Src) -> Instr { + let xor_lop = LogicOp::new_lut(&|x, y, _| x ^ y); + Instr::new_lop3(dst, xor_lop, x, y, Src::new_zero()) + } + pub fn new_shl(dst: Dst, x: Src, shift: Src) -> Instr { Instr::new(Op::Shl(OpShl { dst: dst, @@ -1815,6 +1879,14 @@ impl Instr { })) } + pub fn new_swap(x: RegRef, y: RegRef) -> Instr { + assert!(x.file() == y.file()); + Instr::new(Op::Swap(OpSwap { + dsts: [x.into(), y.into()], + srcs: [y.into(), x.into()], + })) + } + pub fn new_fs_out(srcs: &[Src]) -> Instr { Instr::new(Op::FSOut(OpFSOut { srcs: srcs.to_vec(), @@ -1879,6 +1951,8 @@ impl Instr { | Op::PhiDst(_) | Op::Vec(_) | Op::Split(_) + | Op::Swap(_) + | Op::ParCopy(_) | Op::FSOut(_) => { panic!("Not a hardware opcode") } @@ -2077,6 +2151,44 @@ impl Shader { }) } + pub fn lower_swap(&mut self) { + self.map_instrs(&|instr: Instr, _| -> Vec { + match instr.op { + Op::Swap(swap) => { + let x = *swap.dsts[0].as_reg().unwrap(); + let y = *swap.dsts[1].as_reg().unwrap(); + + assert!(x.file() == y.file()); + assert!(x.comps() == 1 && y.comps() == 1); + assert!(swap.srcs[0].src_mod.is_none()); + assert!(*swap.srcs[0].src_ref.as_reg().unwrap() == y); + assert!(swap.srcs[1].src_mod.is_none()); + assert!(*swap.srcs[1].src_ref.as_reg().unwrap() == x); + + if x == y { + Vec::new() + } else if x.is_predicate() { + vec![Instr::new(Op::PLop3(OpPLop3 { + dsts: [x.into(), y.into()], + srcs: [x.into(), y.into(), Src::new_imm_bool(true)], + ops: [ + LogicOp::new_lut(&|_, y, _| y), + LogicOp::new_lut(&|x, _, _| x), + ], + }))] + } else { + vec![ + Instr::new_xor(x.into(), x.into(), y.into()), + Instr::new_xor(y.into(), x.into(), y.into()), + Instr::new_xor(x.into(), x.into(), y.into()), + ] + } + } + _ => vec![instr], + } + }) + } + pub fn lower_mov_predicate(&mut self) { self.map_instrs(&|instr: Instr, _| -> Vec { match &instr.op { diff --git a/src/nouveau/compiler/nak_lower_par_copies.rs b/src/nouveau/compiler/nak_lower_par_copies.rs new file mode 100644 index 00000000000..5aa2ec0dd5f --- /dev/null +++ b/src/nouveau/compiler/nak_lower_par_copies.rs @@ -0,0 +1,198 @@ +/* + * Copyright © 2022 Collabora, Ltd. + * SPDX-License-Identifier: MIT + */ + +use crate::nak_ir::*; + +use std::collections::HashMap; + +struct CopyNode { + num_reads: usize, + src: isize, +} + +struct CopyGraph { + nodes: Vec, +} + +impl CopyGraph { + pub fn new() -> CopyGraph { + CopyGraph { nodes: Vec::new() } + } + + fn add_node(&mut self) -> usize { + let node_idx = self.nodes.len(); + self.nodes.push(CopyNode { + num_reads: 0, + src: -1, + }); + node_idx + } + + fn num_reads(&self, node_idx: usize) -> usize { + self.nodes[node_idx].num_reads + } + + fn src(&self, node_idx: usize) -> Option { + if self.nodes[node_idx].src < 0 { + None + } else { + Some(self.nodes[node_idx].src.try_into().unwrap()) + } + } + + fn add_edge(&mut self, dst_idx: usize, src_idx: usize) { + /* Disallow self-loops */ + assert!(dst_idx != src_idx); + + /* Each node has in-degree at most 1 */ + assert!(self.nodes[dst_idx].src == -1); + self.nodes[dst_idx].src = src_idx.try_into().unwrap(); + self.nodes[src_idx].num_reads += 1; + } + + fn del_edge(&mut self, dst_idx: usize, src_idx: usize) -> bool { + assert!(self.nodes[dst_idx].src >= 0); + self.nodes[dst_idx].src = -1; + self.nodes[src_idx].num_reads -= 1; + self.nodes[src_idx].num_reads == 0 + } +} + +fn lower_par_copy(pc: OpParCopy) -> Vec { + let mut graph = CopyGraph::new(); + let mut vals = Vec::new(); + let mut reg_to_idx = HashMap::new(); + + for (i, dst) in pc.dsts.iter().enumerate() { + /* Destinations must be pairwise unique */ + let reg = dst.as_reg().unwrap(); + assert!(reg_to_idx.get(reg).is_none()); + + /* Everything must be scalar */ + assert!(reg.comps() == 1); + + let node_idx = graph.add_node(); + assert!(node_idx == i && vals.len() == i); + vals.push(SrcRef::from(*reg)); + reg_to_idx.insert(*reg, i); + } + + for (dst_idx, src) in pc.srcs.iter().enumerate() { + assert!(src.src_mod.is_none()); + let src = src.src_ref; + + let src_idx = if let SrcRef::Reg(reg) = src { + /* Everything must be scalar */ + assert!(reg.comps() == 1); + + *reg_to_idx.entry(reg).or_insert_with(|| { + let node_idx = graph.add_node(); + assert!(node_idx == vals.len()); + vals.push(src); + node_idx + }) + } else { + /* We can't have bindless CBufs because we can't resolve cycles + * containing one. + */ + assert!(src.get_reg().is_none()); + + let node_idx = graph.add_node(); + assert!(node_idx == vals.len()); + vals.push(src); + node_idx + }; + + if dst_idx != src_idx { + graph.add_edge(dst_idx, src_idx); + } + } + + let mut instrs = Vec::new(); + + let mut ready = Vec::new(); + for i in 0..pc.dsts.len() { + if graph.num_reads(i) == 0 { + ready.push(i); + } + } + while !ready.is_empty() { + let dst_idx = ready.pop().unwrap(); + if let Some(src_idx) = graph.src(dst_idx) { + let dst = *vals[dst_idx].as_reg().unwrap(); + let src = vals[src_idx]; + instrs.push(Instr::new_mov(dst.into(), src.into())); + if graph.del_edge(dst_idx, src_idx) { + ready.push(src_idx); + } + } + } + + /* At this point, all we are left with in the graph are isolated nodes + * (no edges) and cycles. + * + * Proof: + * + * Without loss of generality, we can assume that there are no isolated + * nodes in the graph. By construction, no node has in-degree more than 1 + * (that would indicate a duplicate destination). The loop above ensures + * that there are no nodes with an in-degree of 1 and an out-degree of 0. + * + * Suppose that there were a node with an in-degree of 0. Then, because no + * node has an in-degree greater than 1, the number of edges must be less + * than the number of nodes. This implies that there is some node N with + * with out-degree of 0. If N has an in-degree of 0 then it is isolated, + * which is a contradiction. If N has an in-degree of 1 then it is a node + * with in-degree of 1 and out-degree of 0 which is also a contradiction. + * Therefore, there are no nodes with in-degree of 0 and all nodes have an + * in-degree of 1. + * + * Since all nodes have an in-degree of 1, no node has an out-degree of 0 + * and, because the sum of all in-degrees equals the sum of all out-degrees + * (they both equal the number of edges), every node must also have an + * out-degree of 1. Therefore, the graph only contains cycles. + * + * QED + */ + for i in 0..pc.dsts.len() { + loop { + if let Some(j) = graph.src(i) { + /* We're part of a cycle so j also has a source */ + let k = graph.src(j).unwrap(); + + instrs.push(Instr::new_swap( + *vals[j].as_reg().unwrap(), + *vals[k].as_reg().unwrap(), + )); + + graph.del_edge(i, j); + graph.del_edge(j, k); + if i != k { + graph.add_edge(i, k); + } + } else { + /* This is an isolated node */ + assert!(graph.src(i).is_none() && graph.num_reads(i) == 0); + break; + } + } + } + + instrs +} + +impl Shader { + pub fn lower_par_copies(&mut self) { + self.map_instrs(&|instr, _| -> Vec { + match instr.op { + Op::ParCopy(pc) => { + assert!(instr.pred.is_none()); + lower_par_copy(pc) + } + _ => vec![instr], + } + }); + } +}