mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-10 08:10:14 +01:00
nak: Add a parallel copy in struction with lowering
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24998>
This commit is contained in:
parent
90c098e286
commit
df11e232d6
3 changed files with 313 additions and 0 deletions
|
|
@ -10,6 +10,7 @@ mod nak_encode_sm75;
|
|||
mod nak_from_nir;
|
||||
mod nak_ir;
|
||||
mod nak_legalize;
|
||||
mod nak_lower_par_copies;
|
||||
mod nak_opt_copy_prop;
|
||||
mod nak_opt_dce;
|
||||
mod nir;
|
||||
|
|
@ -331,6 +332,8 @@ pub extern "C" fn nak_compile_shader(
|
|||
|
||||
s.assign_regs_trivial();
|
||||
s.lower_vec_split();
|
||||
s.lower_par_copies();
|
||||
s.lower_swap();
|
||||
s.lower_mov_predicate();
|
||||
s.calc_instr_deps();
|
||||
|
||||
|
|
|
|||
|
|
@ -1405,6 +1405,63 @@ impl fmt::Display for OpPhiDst {
|
|||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpSwap {
|
||||
pub dsts: [Dst; 2],
|
||||
pub srcs: [Src; 2],
|
||||
}
|
||||
|
||||
impl fmt::Display for OpSwap {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"SWAP {{ {} {} }} {{ {} {} }}",
|
||||
self.dsts[0], self.dsts[1], self.srcs[0], self.srcs[1]
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub struct OpParCopy {
|
||||
pub dsts: Vec<Dst>,
|
||||
pub srcs: Vec<Src>,
|
||||
}
|
||||
|
||||
impl SrcsAsSlice for OpParCopy {
|
||||
fn srcs_as_slice(&self) -> &[Src] {
|
||||
&self.srcs
|
||||
}
|
||||
|
||||
fn srcs_as_mut_slice(&mut self) -> &mut [Src] {
|
||||
&mut self.srcs
|
||||
}
|
||||
}
|
||||
|
||||
impl DstsAsSlice for OpParCopy {
|
||||
fn dsts_as_slice(&self) -> &[Dst] {
|
||||
&self.dsts
|
||||
}
|
||||
|
||||
fn dsts_as_mut_slice(&mut self) -> &mut [Dst] {
|
||||
&mut self.dsts
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for OpParCopy {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "PAR_COPY {{")?;
|
||||
assert!(self.srcs.len() == self.dsts.len());
|
||||
for i in 0..self.srcs.len() {
|
||||
if i > 0 {
|
||||
write!(f, ",")?;
|
||||
}
|
||||
write!(f, " {} <- {}", self.dsts[i], self.srcs[i])?;
|
||||
}
|
||||
write!(f, " }}")
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(DstsAsSlice)]
|
||||
pub struct OpFSOut {
|
||||
|
|
@ -1457,6 +1514,8 @@ pub enum Op {
|
|||
PhiDst(OpPhiDst),
|
||||
Vec(OpVec),
|
||||
Split(OpSplit),
|
||||
Swap(OpSwap),
|
||||
ParCopy(OpParCopy),
|
||||
FSOut(OpFSOut),
|
||||
}
|
||||
|
||||
|
|
@ -1695,6 +1754,11 @@ impl Instr {
|
|||
}))
|
||||
}
|
||||
|
||||
pub fn new_xor(dst: Dst, x: Src, y: Src) -> Instr {
|
||||
let xor_lop = LogicOp::new_lut(&|x, y, _| x ^ y);
|
||||
Instr::new_lop3(dst, xor_lop, x, y, Src::new_zero())
|
||||
}
|
||||
|
||||
pub fn new_shl(dst: Dst, x: Src, shift: Src) -> Instr {
|
||||
Instr::new(Op::Shl(OpShl {
|
||||
dst: dst,
|
||||
|
|
@ -1815,6 +1879,14 @@ impl Instr {
|
|||
}))
|
||||
}
|
||||
|
||||
pub fn new_swap(x: RegRef, y: RegRef) -> Instr {
|
||||
assert!(x.file() == y.file());
|
||||
Instr::new(Op::Swap(OpSwap {
|
||||
dsts: [x.into(), y.into()],
|
||||
srcs: [y.into(), x.into()],
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn new_fs_out(srcs: &[Src]) -> Instr {
|
||||
Instr::new(Op::FSOut(OpFSOut {
|
||||
srcs: srcs.to_vec(),
|
||||
|
|
@ -1879,6 +1951,8 @@ impl Instr {
|
|||
| Op::PhiDst(_)
|
||||
| Op::Vec(_)
|
||||
| Op::Split(_)
|
||||
| Op::Swap(_)
|
||||
| Op::ParCopy(_)
|
||||
| Op::FSOut(_) => {
|
||||
panic!("Not a hardware opcode")
|
||||
}
|
||||
|
|
@ -2077,6 +2151,44 @@ impl Shader {
|
|||
})
|
||||
}
|
||||
|
||||
pub fn lower_swap(&mut self) {
|
||||
self.map_instrs(&|instr: Instr, _| -> Vec<Instr> {
|
||||
match instr.op {
|
||||
Op::Swap(swap) => {
|
||||
let x = *swap.dsts[0].as_reg().unwrap();
|
||||
let y = *swap.dsts[1].as_reg().unwrap();
|
||||
|
||||
assert!(x.file() == y.file());
|
||||
assert!(x.comps() == 1 && y.comps() == 1);
|
||||
assert!(swap.srcs[0].src_mod.is_none());
|
||||
assert!(*swap.srcs[0].src_ref.as_reg().unwrap() == y);
|
||||
assert!(swap.srcs[1].src_mod.is_none());
|
||||
assert!(*swap.srcs[1].src_ref.as_reg().unwrap() == x);
|
||||
|
||||
if x == y {
|
||||
Vec::new()
|
||||
} else if x.is_predicate() {
|
||||
vec![Instr::new(Op::PLop3(OpPLop3 {
|
||||
dsts: [x.into(), y.into()],
|
||||
srcs: [x.into(), y.into(), Src::new_imm_bool(true)],
|
||||
ops: [
|
||||
LogicOp::new_lut(&|_, y, _| y),
|
||||
LogicOp::new_lut(&|x, _, _| x),
|
||||
],
|
||||
}))]
|
||||
} else {
|
||||
vec![
|
||||
Instr::new_xor(x.into(), x.into(), y.into()),
|
||||
Instr::new_xor(y.into(), x.into(), y.into()),
|
||||
Instr::new_xor(x.into(), x.into(), y.into()),
|
||||
]
|
||||
}
|
||||
}
|
||||
_ => vec![instr],
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn lower_mov_predicate(&mut self) {
|
||||
self.map_instrs(&|instr: Instr, _| -> Vec<Instr> {
|
||||
match &instr.op {
|
||||
|
|
|
|||
198
src/nouveau/compiler/nak_lower_par_copies.rs
Normal file
198
src/nouveau/compiler/nak_lower_par_copies.rs
Normal file
|
|
@ -0,0 +1,198 @@
|
|||
/*
|
||||
* Copyright © 2022 Collabora, Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
use crate::nak_ir::*;
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
struct CopyNode {
|
||||
num_reads: usize,
|
||||
src: isize,
|
||||
}
|
||||
|
||||
struct CopyGraph {
|
||||
nodes: Vec<CopyNode>,
|
||||
}
|
||||
|
||||
impl CopyGraph {
|
||||
pub fn new() -> CopyGraph {
|
||||
CopyGraph { nodes: Vec::new() }
|
||||
}
|
||||
|
||||
fn add_node(&mut self) -> usize {
|
||||
let node_idx = self.nodes.len();
|
||||
self.nodes.push(CopyNode {
|
||||
num_reads: 0,
|
||||
src: -1,
|
||||
});
|
||||
node_idx
|
||||
}
|
||||
|
||||
fn num_reads(&self, node_idx: usize) -> usize {
|
||||
self.nodes[node_idx].num_reads
|
||||
}
|
||||
|
||||
fn src(&self, node_idx: usize) -> Option<usize> {
|
||||
if self.nodes[node_idx].src < 0 {
|
||||
None
|
||||
} else {
|
||||
Some(self.nodes[node_idx].src.try_into().unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
fn add_edge(&mut self, dst_idx: usize, src_idx: usize) {
|
||||
/* Disallow self-loops */
|
||||
assert!(dst_idx != src_idx);
|
||||
|
||||
/* Each node has in-degree at most 1 */
|
||||
assert!(self.nodes[dst_idx].src == -1);
|
||||
self.nodes[dst_idx].src = src_idx.try_into().unwrap();
|
||||
self.nodes[src_idx].num_reads += 1;
|
||||
}
|
||||
|
||||
fn del_edge(&mut self, dst_idx: usize, src_idx: usize) -> bool {
|
||||
assert!(self.nodes[dst_idx].src >= 0);
|
||||
self.nodes[dst_idx].src = -1;
|
||||
self.nodes[src_idx].num_reads -= 1;
|
||||
self.nodes[src_idx].num_reads == 0
|
||||
}
|
||||
}
|
||||
|
||||
fn lower_par_copy(pc: OpParCopy) -> Vec<Instr> {
|
||||
let mut graph = CopyGraph::new();
|
||||
let mut vals = Vec::new();
|
||||
let mut reg_to_idx = HashMap::new();
|
||||
|
||||
for (i, dst) in pc.dsts.iter().enumerate() {
|
||||
/* Destinations must be pairwise unique */
|
||||
let reg = dst.as_reg().unwrap();
|
||||
assert!(reg_to_idx.get(reg).is_none());
|
||||
|
||||
/* Everything must be scalar */
|
||||
assert!(reg.comps() == 1);
|
||||
|
||||
let node_idx = graph.add_node();
|
||||
assert!(node_idx == i && vals.len() == i);
|
||||
vals.push(SrcRef::from(*reg));
|
||||
reg_to_idx.insert(*reg, i);
|
||||
}
|
||||
|
||||
for (dst_idx, src) in pc.srcs.iter().enumerate() {
|
||||
assert!(src.src_mod.is_none());
|
||||
let src = src.src_ref;
|
||||
|
||||
let src_idx = if let SrcRef::Reg(reg) = src {
|
||||
/* Everything must be scalar */
|
||||
assert!(reg.comps() == 1);
|
||||
|
||||
*reg_to_idx.entry(reg).or_insert_with(|| {
|
||||
let node_idx = graph.add_node();
|
||||
assert!(node_idx == vals.len());
|
||||
vals.push(src);
|
||||
node_idx
|
||||
})
|
||||
} else {
|
||||
/* We can't have bindless CBufs because we can't resolve cycles
|
||||
* containing one.
|
||||
*/
|
||||
assert!(src.get_reg().is_none());
|
||||
|
||||
let node_idx = graph.add_node();
|
||||
assert!(node_idx == vals.len());
|
||||
vals.push(src);
|
||||
node_idx
|
||||
};
|
||||
|
||||
if dst_idx != src_idx {
|
||||
graph.add_edge(dst_idx, src_idx);
|
||||
}
|
||||
}
|
||||
|
||||
let mut instrs = Vec::new();
|
||||
|
||||
let mut ready = Vec::new();
|
||||
for i in 0..pc.dsts.len() {
|
||||
if graph.num_reads(i) == 0 {
|
||||
ready.push(i);
|
||||
}
|
||||
}
|
||||
while !ready.is_empty() {
|
||||
let dst_idx = ready.pop().unwrap();
|
||||
if let Some(src_idx) = graph.src(dst_idx) {
|
||||
let dst = *vals[dst_idx].as_reg().unwrap();
|
||||
let src = vals[src_idx];
|
||||
instrs.push(Instr::new_mov(dst.into(), src.into()));
|
||||
if graph.del_edge(dst_idx, src_idx) {
|
||||
ready.push(src_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* At this point, all we are left with in the graph are isolated nodes
|
||||
* (no edges) and cycles.
|
||||
*
|
||||
* Proof:
|
||||
*
|
||||
* Without loss of generality, we can assume that there are no isolated
|
||||
* nodes in the graph. By construction, no node has in-degree more than 1
|
||||
* (that would indicate a duplicate destination). The loop above ensures
|
||||
* that there are no nodes with an in-degree of 1 and an out-degree of 0.
|
||||
*
|
||||
* Suppose that there were a node with an in-degree of 0. Then, because no
|
||||
* node has an in-degree greater than 1, the number of edges must be less
|
||||
* than the number of nodes. This implies that there is some node N with
|
||||
* with out-degree of 0. If N has an in-degree of 0 then it is isolated,
|
||||
* which is a contradiction. If N has an in-degree of 1 then it is a node
|
||||
* with in-degree of 1 and out-degree of 0 which is also a contradiction.
|
||||
* Therefore, there are no nodes with in-degree of 0 and all nodes have an
|
||||
* in-degree of 1.
|
||||
*
|
||||
* Since all nodes have an in-degree of 1, no node has an out-degree of 0
|
||||
* and, because the sum of all in-degrees equals the sum of all out-degrees
|
||||
* (they both equal the number of edges), every node must also have an
|
||||
* out-degree of 1. Therefore, the graph only contains cycles.
|
||||
*
|
||||
* QED
|
||||
*/
|
||||
for i in 0..pc.dsts.len() {
|
||||
loop {
|
||||
if let Some(j) = graph.src(i) {
|
||||
/* We're part of a cycle so j also has a source */
|
||||
let k = graph.src(j).unwrap();
|
||||
|
||||
instrs.push(Instr::new_swap(
|
||||
*vals[j].as_reg().unwrap(),
|
||||
*vals[k].as_reg().unwrap(),
|
||||
));
|
||||
|
||||
graph.del_edge(i, j);
|
||||
graph.del_edge(j, k);
|
||||
if i != k {
|
||||
graph.add_edge(i, k);
|
||||
}
|
||||
} else {
|
||||
/* This is an isolated node */
|
||||
assert!(graph.src(i).is_none() && graph.num_reads(i) == 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
instrs
|
||||
}
|
||||
|
||||
impl Shader {
|
||||
pub fn lower_par_copies(&mut self) {
|
||||
self.map_instrs(&|instr, _| -> Vec<Instr> {
|
||||
match instr.op {
|
||||
Op::ParCopy(pc) => {
|
||||
assert!(instr.pred.is_none());
|
||||
lower_par_copy(pc)
|
||||
}
|
||||
_ => vec![instr],
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue