nak: Add a parallel copy in struction with lowering

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24998>
This commit is contained in:
Faith Ekstrand 2023-04-10 17:23:25 -05:00 committed by Marge Bot
parent 90c098e286
commit df11e232d6
3 changed files with 313 additions and 0 deletions

View file

@ -10,6 +10,7 @@ mod nak_encode_sm75;
mod nak_from_nir;
mod nak_ir;
mod nak_legalize;
mod nak_lower_par_copies;
mod nak_opt_copy_prop;
mod nak_opt_dce;
mod nir;
@ -331,6 +332,8 @@ pub extern "C" fn nak_compile_shader(
s.assign_regs_trivial();
s.lower_vec_split();
s.lower_par_copies();
s.lower_swap();
s.lower_mov_predicate();
s.calc_instr_deps();

View file

@ -1405,6 +1405,63 @@ impl fmt::Display for OpPhiDst {
}
}
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpSwap {
pub dsts: [Dst; 2],
pub srcs: [Src; 2],
}
impl fmt::Display for OpSwap {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"SWAP {{ {} {} }} {{ {} {} }}",
self.dsts[0], self.dsts[1], self.srcs[0], self.srcs[1]
)
}
}
#[repr(C)]
pub struct OpParCopy {
pub dsts: Vec<Dst>,
pub srcs: Vec<Src>,
}
impl SrcsAsSlice for OpParCopy {
fn srcs_as_slice(&self) -> &[Src] {
&self.srcs
}
fn srcs_as_mut_slice(&mut self) -> &mut [Src] {
&mut self.srcs
}
}
impl DstsAsSlice for OpParCopy {
fn dsts_as_slice(&self) -> &[Dst] {
&self.dsts
}
fn dsts_as_mut_slice(&mut self) -> &mut [Dst] {
&mut self.dsts
}
}
impl fmt::Display for OpParCopy {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "PAR_COPY {{")?;
assert!(self.srcs.len() == self.dsts.len());
for i in 0..self.srcs.len() {
if i > 0 {
write!(f, ",")?;
}
write!(f, " {} <- {}", self.dsts[i], self.srcs[i])?;
}
write!(f, " }}")
}
}
#[repr(C)]
#[derive(DstsAsSlice)]
pub struct OpFSOut {
@ -1457,6 +1514,8 @@ pub enum Op {
PhiDst(OpPhiDst),
Vec(OpVec),
Split(OpSplit),
Swap(OpSwap),
ParCopy(OpParCopy),
FSOut(OpFSOut),
}
@ -1695,6 +1754,11 @@ impl Instr {
}))
}
pub fn new_xor(dst: Dst, x: Src, y: Src) -> Instr {
let xor_lop = LogicOp::new_lut(&|x, y, _| x ^ y);
Instr::new_lop3(dst, xor_lop, x, y, Src::new_zero())
}
pub fn new_shl(dst: Dst, x: Src, shift: Src) -> Instr {
Instr::new(Op::Shl(OpShl {
dst: dst,
@ -1815,6 +1879,14 @@ impl Instr {
}))
}
pub fn new_swap(x: RegRef, y: RegRef) -> Instr {
assert!(x.file() == y.file());
Instr::new(Op::Swap(OpSwap {
dsts: [x.into(), y.into()],
srcs: [y.into(), x.into()],
}))
}
pub fn new_fs_out(srcs: &[Src]) -> Instr {
Instr::new(Op::FSOut(OpFSOut {
srcs: srcs.to_vec(),
@ -1879,6 +1951,8 @@ impl Instr {
| Op::PhiDst(_)
| Op::Vec(_)
| Op::Split(_)
| Op::Swap(_)
| Op::ParCopy(_)
| Op::FSOut(_) => {
panic!("Not a hardware opcode")
}
@ -2077,6 +2151,44 @@ impl Shader {
})
}
pub fn lower_swap(&mut self) {
self.map_instrs(&|instr: Instr, _| -> Vec<Instr> {
match instr.op {
Op::Swap(swap) => {
let x = *swap.dsts[0].as_reg().unwrap();
let y = *swap.dsts[1].as_reg().unwrap();
assert!(x.file() == y.file());
assert!(x.comps() == 1 && y.comps() == 1);
assert!(swap.srcs[0].src_mod.is_none());
assert!(*swap.srcs[0].src_ref.as_reg().unwrap() == y);
assert!(swap.srcs[1].src_mod.is_none());
assert!(*swap.srcs[1].src_ref.as_reg().unwrap() == x);
if x == y {
Vec::new()
} else if x.is_predicate() {
vec![Instr::new(Op::PLop3(OpPLop3 {
dsts: [x.into(), y.into()],
srcs: [x.into(), y.into(), Src::new_imm_bool(true)],
ops: [
LogicOp::new_lut(&|_, y, _| y),
LogicOp::new_lut(&|x, _, _| x),
],
}))]
} else {
vec![
Instr::new_xor(x.into(), x.into(), y.into()),
Instr::new_xor(y.into(), x.into(), y.into()),
Instr::new_xor(x.into(), x.into(), y.into()),
]
}
}
_ => vec![instr],
}
})
}
pub fn lower_mov_predicate(&mut self) {
self.map_instrs(&|instr: Instr, _| -> Vec<Instr> {
match &instr.op {

View file

@ -0,0 +1,198 @@
/*
* Copyright © 2022 Collabora, Ltd.
* SPDX-License-Identifier: MIT
*/
use crate::nak_ir::*;
use std::collections::HashMap;
struct CopyNode {
num_reads: usize,
src: isize,
}
struct CopyGraph {
nodes: Vec<CopyNode>,
}
impl CopyGraph {
pub fn new() -> CopyGraph {
CopyGraph { nodes: Vec::new() }
}
fn add_node(&mut self) -> usize {
let node_idx = self.nodes.len();
self.nodes.push(CopyNode {
num_reads: 0,
src: -1,
});
node_idx
}
fn num_reads(&self, node_idx: usize) -> usize {
self.nodes[node_idx].num_reads
}
fn src(&self, node_idx: usize) -> Option<usize> {
if self.nodes[node_idx].src < 0 {
None
} else {
Some(self.nodes[node_idx].src.try_into().unwrap())
}
}
fn add_edge(&mut self, dst_idx: usize, src_idx: usize) {
/* Disallow self-loops */
assert!(dst_idx != src_idx);
/* Each node has in-degree at most 1 */
assert!(self.nodes[dst_idx].src == -1);
self.nodes[dst_idx].src = src_idx.try_into().unwrap();
self.nodes[src_idx].num_reads += 1;
}
fn del_edge(&mut self, dst_idx: usize, src_idx: usize) -> bool {
assert!(self.nodes[dst_idx].src >= 0);
self.nodes[dst_idx].src = -1;
self.nodes[src_idx].num_reads -= 1;
self.nodes[src_idx].num_reads == 0
}
}
fn lower_par_copy(pc: OpParCopy) -> Vec<Instr> {
let mut graph = CopyGraph::new();
let mut vals = Vec::new();
let mut reg_to_idx = HashMap::new();
for (i, dst) in pc.dsts.iter().enumerate() {
/* Destinations must be pairwise unique */
let reg = dst.as_reg().unwrap();
assert!(reg_to_idx.get(reg).is_none());
/* Everything must be scalar */
assert!(reg.comps() == 1);
let node_idx = graph.add_node();
assert!(node_idx == i && vals.len() == i);
vals.push(SrcRef::from(*reg));
reg_to_idx.insert(*reg, i);
}
for (dst_idx, src) in pc.srcs.iter().enumerate() {
assert!(src.src_mod.is_none());
let src = src.src_ref;
let src_idx = if let SrcRef::Reg(reg) = src {
/* Everything must be scalar */
assert!(reg.comps() == 1);
*reg_to_idx.entry(reg).or_insert_with(|| {
let node_idx = graph.add_node();
assert!(node_idx == vals.len());
vals.push(src);
node_idx
})
} else {
/* We can't have bindless CBufs because we can't resolve cycles
* containing one.
*/
assert!(src.get_reg().is_none());
let node_idx = graph.add_node();
assert!(node_idx == vals.len());
vals.push(src);
node_idx
};
if dst_idx != src_idx {
graph.add_edge(dst_idx, src_idx);
}
}
let mut instrs = Vec::new();
let mut ready = Vec::new();
for i in 0..pc.dsts.len() {
if graph.num_reads(i) == 0 {
ready.push(i);
}
}
while !ready.is_empty() {
let dst_idx = ready.pop().unwrap();
if let Some(src_idx) = graph.src(dst_idx) {
let dst = *vals[dst_idx].as_reg().unwrap();
let src = vals[src_idx];
instrs.push(Instr::new_mov(dst.into(), src.into()));
if graph.del_edge(dst_idx, src_idx) {
ready.push(src_idx);
}
}
}
/* At this point, all we are left with in the graph are isolated nodes
* (no edges) and cycles.
*
* Proof:
*
* Without loss of generality, we can assume that there are no isolated
* nodes in the graph. By construction, no node has in-degree more than 1
* (that would indicate a duplicate destination). The loop above ensures
* that there are no nodes with an in-degree of 1 and an out-degree of 0.
*
* Suppose that there were a node with an in-degree of 0. Then, because no
* node has an in-degree greater than 1, the number of edges must be less
* than the number of nodes. This implies that there is some node N with
* with out-degree of 0. If N has an in-degree of 0 then it is isolated,
* which is a contradiction. If N has an in-degree of 1 then it is a node
* with in-degree of 1 and out-degree of 0 which is also a contradiction.
* Therefore, there are no nodes with in-degree of 0 and all nodes have an
* in-degree of 1.
*
* Since all nodes have an in-degree of 1, no node has an out-degree of 0
* and, because the sum of all in-degrees equals the sum of all out-degrees
* (they both equal the number of edges), every node must also have an
* out-degree of 1. Therefore, the graph only contains cycles.
*
* QED
*/
for i in 0..pc.dsts.len() {
loop {
if let Some(j) = graph.src(i) {
/* We're part of a cycle so j also has a source */
let k = graph.src(j).unwrap();
instrs.push(Instr::new_swap(
*vals[j].as_reg().unwrap(),
*vals[k].as_reg().unwrap(),
));
graph.del_edge(i, j);
graph.del_edge(j, k);
if i != k {
graph.add_edge(i, k);
}
} else {
/* This is an isolated node */
assert!(graph.src(i).is_none() && graph.num_reads(i) == 0);
break;
}
}
}
instrs
}
impl Shader {
pub fn lower_par_copies(&mut self) {
self.map_instrs(&|instr, _| -> Vec<Instr> {
match instr.op {
Op::ParCopy(pc) => {
assert!(instr.pred.is_none());
lower_par_copy(pc)
}
_ => vec![instr],
}
});
}
}