nak: Rework vector handling

Previously, we handled vectors by giving each SSAValue a number of
components which we assume matches in all uses.  To deal with swizzles
and component selection, we had OpVec and OpSplit instructions to
convert vectors to/from scalars as-needed.  This is fine as an SSA
representation but it leads to a lot of redundant values when it comes
time for assigning registers.  There are strategies for dealing with
this such as ensuring that splits always kill the whole vector and then
re-combining into a new vector for later uses.  It's possible by doing
this to ensure that each component only ever exists exactly once at the
cost of a LOT of vec/split instructions.

Another possible solution is to naievely emit vec/split but teach
liveness analysis and RA bout the duplicated values.  Instead of RA
working on individual SSA values, it can work on equivalence classes of
components.

This takes a different (and currently novel to Mesa) approach of making
each SSAValue a single component but having an SSARef type which can
reference up to 4 SSAValues as a vector.  Register allocation then works
on individual components and only ensures that the components of a
vector are contiguous when it's used as a vector.  This isn't very
different from how it worked before.  If anything, it's a bit more
straightforward now because the component/vector split uses the same
types as the rest of the IR and the SSAComp is gone.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24998>
This commit is contained in:
Faith Ekstrand 2023-04-20 12:58:03 -05:00 committed by Marge Bot
parent a9e79994a7
commit 9ef47c8921
9 changed files with 692 additions and 852 deletions

View file

@ -445,7 +445,6 @@ pub extern "C" fn nak_compile_shader(
}
s.assign_regs();
//s.assign_regs_trivial();
if DEBUG.print() {
println!("NAK IR:\n{}", &s);
}

View file

@ -41,36 +41,24 @@ impl KillSet {
self.set.contains(ssa)
}
pub fn contains_all(&self, slice: &[SSAValue]) -> bool {
for ssa in slice {
if !self.contains(ssa) {
return false;
}
}
true
}
pub fn iter(&self) -> std::slice::Iter<'_, SSAValue> {
self.vec.iter()
}
}
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
struct PhiComp {
v: SSAValue,
}
impl PhiComp {
pub fn new(idx: u32, comp: u8) -> PhiComp {
PhiComp {
v: SSAValue::new(RegFile::GPR, idx, comp + 1),
}
}
pub fn idx(&self) -> u32 {
self.v.idx()
}
pub fn comp(&self) -> u8 {
self.v.comps() - 1
}
}
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
enum LiveRef {
SSA(SSAComp),
Phi(PhiComp),
SSA(SSAValue),
Phi(u32),
}
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
@ -107,8 +95,8 @@ struct RegFileAllocation {
max_reg: u8,
used: BitSet,
pinned: BitSet,
reg_ssa: Vec<SSAComp>,
ssa_reg: HashMap<SSAComp, u8>,
reg_ssa: Vec<SSAValue>,
ssa_reg: HashMap<SSAValue, u8>,
}
impl RegFileAllocation {
@ -141,11 +129,11 @@ impl RegFileAllocation {
}
}
pub fn get_reg_comp(&self, ssa: SSAComp) -> u8 {
pub fn get_reg(&self, ssa: SSAValue) -> u8 {
*self.ssa_reg.get(&ssa).unwrap()
}
pub fn get_ssa_comp(&self, reg: u8) -> Option<SSAComp> {
pub fn get_ssa(&self, reg: u8) -> Option<SSAValue> {
if self.used.get(reg.into()) {
Some(self.reg_ssa[usize::from(reg)])
} else {
@ -153,12 +141,12 @@ impl RegFileAllocation {
}
}
pub fn try_get_reg(&self, ssa: SSAValue) -> Option<u8> {
let align = ssa.comps().next_power_of_two();
let reg = self.get_reg_comp(ssa.comp(0));
pub fn try_get_vec_reg(&self, vec: SSARef) -> Option<u8> {
let align = vec.comps().next_power_of_two();
let reg = self.get_reg(vec[0]);
if reg % align == 0 {
for i in 1..ssa.comps() {
if self.get_reg_comp(ssa.comp(i)) != reg + i {
for i in 1..vec.comps() {
if self.get_reg(vec[usize::from(i)]) != reg + i {
return None;
}
}
@ -168,7 +156,7 @@ impl RegFileAllocation {
}
}
pub fn free_ssa_comp(&mut self, ssa: SSAComp) -> u8 {
pub fn free_ssa(&mut self, ssa: SSAValue) -> u8 {
assert!(ssa.file() == self.file);
let reg = self.ssa_reg.remove(&ssa).unwrap();
assert!(self.used.get(reg.into()));
@ -176,9 +164,9 @@ impl RegFileAllocation {
reg
}
pub fn free_ssa(&mut self, ssa: SSAValue) {
for i in 0..ssa.comps() {
self.free_ssa_comp(ssa.comp(i));
pub fn free_ssa_ref(&mut self, ssa_ref: SSARef) {
for ssa in ssa_ref.iter() {
self.free_ssa(*ssa);
}
}
@ -190,14 +178,13 @@ impl RegFileAllocation {
}
}
pub fn assign_reg_comp(&mut self, ssa: SSAComp, reg: u8) -> RegRef {
pub fn assign_reg(&mut self, ssa: SSAValue, reg: u8) -> RegRef {
assert!(ssa.file() == self.file);
assert!(reg <= self.max_reg);
assert!(!self.used.get(reg.into()));
if usize::from(reg) >= self.reg_ssa.len() {
self.reg_ssa
.resize(usize::from(reg) + 1, SSAComp::new(RegFile::GPR, 0, 0));
self.reg_ssa.resize(usize::from(reg) + 1, SSAValue::NONE);
}
self.reg_ssa[usize::from(reg)] = ssa;
self.ssa_reg.insert(ssa, reg);
@ -207,14 +194,18 @@ impl RegFileAllocation {
RegRef::new(self.file, reg, 1)
}
pub fn assign_reg(&mut self, ssa: SSAValue, reg: u8) -> RegRef {
pub fn assign_vec_reg(&mut self, ssa: SSARef, reg: u8) -> RegRef {
for i in 0..ssa.comps() {
self.assign_reg_comp(ssa.comp(i), reg + i);
self.assign_reg(ssa[usize::from(i)], reg + i);
}
RegRef::new(self.file, reg, ssa.comps())
}
pub fn try_assign_reg(&mut self, ssa: SSAValue, reg: u8) -> Option<RegRef> {
pub fn try_assign_vec_reg(
&mut self,
ssa: SSARef,
reg: u8,
) -> Option<RegRef> {
if ssa.file() != self.file() {
return None;
}
@ -226,10 +217,14 @@ impl RegFileAllocation {
return None;
}
}
Some(self.assign_reg(ssa, reg))
Some(self.assign_vec_reg(ssa, reg))
}
pub fn try_find_unused_reg(&self, start_reg: u8, comps: u8) -> Option<u8> {
pub fn try_find_unused_reg_range(
&self,
start_reg: u8,
comps: u8,
) -> Option<u8> {
assert!(comps > 0);
let comps_mask = u32::MAX >> (32 - comps);
let align = comps.next_power_of_two();
@ -271,6 +266,7 @@ impl RegFileAllocation {
}
if let Ok(reg) = u8::try_from(self.used.words().len() * 32) {
let reg = max(start_reg, reg);
if self.is_reg_in_bounds(reg, comps) {
Some(reg)
} else {
@ -281,36 +277,39 @@ impl RegFileAllocation {
}
}
fn get_reg_near_reg(&self, reg: u8, comps: u8) -> u8 {
fn try_find_unpinned_reg_range(
&self,
start_reg: u8,
comps: u8,
) -> Option<u8> {
let align = comps.next_power_of_two();
/* Pick something properly aligned near component 0 */
let mut reg = reg & (align - 1);
if !self.is_reg_in_bounds(reg, comps) {
reg -= align;
}
reg
}
pub fn get_reg_near_ssa(&self, ssa: SSAValue) -> u8 {
/* Get something near component 0 */
self.get_reg_near_reg(self.get_reg_comp(ssa.comp(0)), ssa.comps())
}
pub fn get_any_reg(&self, comps: u8) -> u8 {
let mut pick_comps = comps;
while pick_comps > 0 {
if let Some(reg) = self.try_find_unused_reg(0, pick_comps) {
return self.get_reg_near_reg(reg, comps);
let mut reg = start_reg.next_multiple_of(align);
while self.is_reg_in_bounds(reg, comps) {
let mut is_pinned = false;
for i in 0..comps {
if self.pinned.get((reg + i).into()) {
is_pinned = true;
break;
}
}
pick_comps = pick_comps >> 1;
if !is_pinned {
return Some(reg);
}
reg += align;
}
panic!("Failed to find any free registers");
None
}
pub fn get_scalar(&mut self, ssa: SSAComp) -> RegRef {
pub fn try_find_unpinned_reg_near_ssa(&self, ssa: SSARef) -> Option<u8> {
/* Get something near component 0 */
self.try_find_unpinned_reg_range(self.get_reg(ssa[0]), ssa.comps())
}
pub fn get_scalar(&mut self, ssa: SSAValue) -> RegRef {
assert!(ssa.file() == self.file);
let reg = self.get_reg_comp(ssa);
let reg = self.get_reg(ssa);
self.pinned.insert(reg.into());
RegRef::new(self.file, reg, 1)
}
@ -318,54 +317,79 @@ impl RegFileAllocation {
pub fn move_to_reg(
&mut self,
pcopy: &mut OpParCopy,
ssa: SSAValue,
ssa: SSARef,
reg: u8,
) -> RegRef {
for c in 0..ssa.comps() {
let old_reg = self.get_reg_comp(ssa.comp(c));
let old_reg = self.get_reg(ssa[usize::from(c)]);
if old_reg == reg + c {
continue;
}
self.free_ssa_comp(ssa.comp(c));
self.free_ssa(ssa[usize::from(c)]);
/* If something already exists in the destination, swap it to the
* source.
*/
if let Some(evicted) = self.get_ssa_comp(reg + c) {
self.free_ssa_comp(evicted);
if let Some(evicted) = self.get_ssa(reg + c) {
self.free_ssa(evicted);
pcopy.srcs.push(RegRef::new(self.file, reg + c, 1).into());
pcopy.dsts.push(RegRef::new(self.file, old_reg, 1).into());
self.assign_reg_comp(evicted, old_reg);
self.assign_reg(evicted, old_reg);
}
pcopy.srcs.push(RegRef::new(self.file, old_reg, 1).into());
pcopy.dsts.push(RegRef::new(self.file, reg + c, 1).into());
self.assign_reg_comp(ssa.comp(c), reg + c);
self.assign_reg(ssa[usize::from(c)], reg + c);
}
RegRef::new(self.file, reg, ssa.comps())
}
pub fn get_vector(
&mut self,
pcopy: &mut OpParCopy,
ssa: SSAValue,
) -> RegRef {
let reg = if let Some(reg) = self.try_get_reg(ssa) {
reg
} else if let Some(reg) = self.try_find_unused_reg(0, ssa.comps()) {
reg
} else {
self.get_reg_near_ssa(ssa)
};
pub fn get_vector(&mut self, pcopy: &mut OpParCopy, ssa: SSARef) -> RegRef {
let reg = self
.try_get_vec_reg(ssa)
.or_else(|| self.try_find_unused_reg_range(0, ssa.comps()))
.or_else(|| self.try_find_unpinned_reg_near_ssa(ssa))
.or_else(|| self.try_find_unpinned_reg_range(0, ssa.comps()))
.expect("Failed to find an unpinned register range");
for c in 0..ssa.comps() {
self.pinned.insert((reg + c).into());
}
self.move_to_reg(pcopy, ssa, reg)
}
pub fn alloc_scalar(&mut self, ssa: SSAComp) -> RegRef {
let reg = self.try_find_unused_reg(0, 1).unwrap();
self.assign_reg_comp(ssa, reg)
pub fn alloc_scalar(&mut self, ssa: SSAValue) -> RegRef {
let reg = self.try_find_unused_reg_range(0, 1).unwrap();
self.assign_reg(ssa, reg)
}
pub fn alloc_vector(
&mut self,
pcopy: &mut OpParCopy,
ssa: SSARef,
) -> RegRef {
let reg = self
.try_find_unused_reg_range(0, ssa.comps())
.or_else(|| self.try_find_unpinned_reg_range(0, ssa.comps()))
.expect("Failed to find an unpinned register range");
for c in 0..ssa.comps() {
self.pinned.insert((reg + c).into());
}
for c in 0..ssa.comps() {
if let Some(evicted) = self.get_ssa(reg + c) {
self.free_ssa(evicted);
let new_reg = self.try_find_unused_reg_range(0, 1).unwrap();
pcopy.srcs.push(RegRef::new(self.file, reg + c, 1).into());
pcopy.dsts.push(RegRef::new(self.file, new_reg, 1).into());
self.assign_reg(evicted, new_reg);
}
}
self.assign_vec_reg(ssa, reg)
}
}
@ -376,7 +400,7 @@ fn instr_remap_srcs_file(
) {
if let Pred::SSA(pred) = instr.pred {
if pred.file() == ra.file() {
instr.pred = ra.get_scalar(pred.as_comp()).into();
instr.pred = ra.get_scalar(pred).into();
}
}
@ -392,8 +416,9 @@ fn instr_remap_srcs_file(
fn instr_alloc_scalar_dsts_file(instr: &mut Instr, ra: &mut RegFileAllocation) {
for dst in instr.dsts_mut() {
if let Dst::SSA(ssa) = dst {
assert!(ssa.comps() == 1);
if ssa.file() == ra.file() {
*dst = ra.alloc_scalar(ssa.as_comp()).into();
*dst = ra.alloc_scalar(ssa[0]).into();
}
}
}
@ -408,7 +433,7 @@ fn instr_assign_regs_file(
struct VecDst {
dst_idx: usize,
comps: u8,
killed: Option<SSAValue>,
killed: Option<SSARef>,
reg: u8,
}
@ -446,12 +471,25 @@ fn instr_assign_regs_file(
*/
assert!(!ra.file().is_predicate());
let mut avail = killed.set.clone();
let mut killed_vecs = Vec::new();
let mut killed_vec_comps = 0;
for ssa in killed.iter() {
if ssa.file() == ra.file() && ssa.comps() > 1 {
killed_vecs.push(ssa);
killed_vec_comps += ssa.comps();
for src in instr.srcs() {
if let SrcRef::SSA(vec) = src.src_ref {
if vec.comps() > 1 {
let mut vec_killed = true;
for ssa in vec.iter() {
if ssa.file() != ra.file() || !avail.contains(ssa) {
vec_killed = false;
break;
}
}
if vec_killed {
for ssa in vec.iter() {
avail.remove(ssa);
}
killed_vecs.push(vec);
}
}
}
}
@ -465,7 +503,7 @@ fn instr_assign_regs_file(
while !killed_vecs.is_empty() {
let src = killed_vecs.pop().unwrap();
if src.comps() >= vec_dst.comps {
vec_dst.killed = Some(*src);
vec_dst.killed = Some(src);
break;
}
}
@ -473,7 +511,9 @@ fn instr_assign_regs_file(
vec_dsts_map_to_killed_srcs = false;
}
if let Some(reg) = ra.try_find_unused_reg(next_dst_reg, vec_dst.comps) {
if let Some(reg) =
ra.try_find_unused_reg_range(next_dst_reg, vec_dst.comps)
{
vec_dst.reg = reg;
next_dst_reg = reg + vec_dst.comps;
} else {
@ -487,76 +527,41 @@ fn instr_assign_regs_file(
instr_remap_srcs_file(instr, pcopy, ra);
for vec_dst in &mut vec_dsts {
vec_dst.reg = ra.try_get_reg(vec_dst.killed.unwrap()).unwrap();
vec_dst.reg = ra.try_get_vec_reg(vec_dst.killed.unwrap()).unwrap();
}
ra.free_killed(killed);
for vec_dst in vec_dsts {
let dst = &mut instr.dsts_mut()[vec_dst.dst_idx];
*dst = ra.assign_reg(*dst.as_ssa().unwrap(), vec_dst.reg).into();
*dst = ra
.assign_vec_reg(*dst.as_ssa().unwrap(), vec_dst.reg)
.into();
}
instr_alloc_scalar_dsts_file(instr, ra);
} else if could_trivially_allocate {
for vec_dst in vec_dsts {
let dst = &mut instr.dsts_mut()[vec_dst.dst_idx];
*dst = ra.assign_reg(*dst.as_ssa().unwrap(), vec_dst.reg).into();
*dst = ra
.assign_vec_reg(*dst.as_ssa().unwrap(), vec_dst.reg)
.into();
}
instr_remap_srcs_file(instr, pcopy, ra);
ra.free_killed(killed);
instr_alloc_scalar_dsts_file(instr, ra);
} else {
/* We're all out of tricks. We need to allocate enough space for all
* the vector destinations and all the killed SSA values and shuffle the
* killed values into the new space.
*/
let vec_comps = max(killed_vec_comps, vec_dst_comps);
let vec_reg = ra.get_any_reg(vec_comps);
let mut ssa_reg = HashMap::new();
let mut src_vec_reg = vec_reg;
for src in instr.srcs_mut() {
if let SrcRef::SSA(ssa) = src.src_ref {
if ssa.file() == ra.file() {
if killed.contains(&ssa) && ssa.comps() > 1 {
let reg = *ssa_reg.entry(ssa).or_insert_with(|| {
let align = ssa.comps().next_power_of_two();
let reg = src_vec_reg;
src_vec_reg += ssa.comps();
/* We assume vector sources are in order of
* decreasing alignment. This is true for texture
* opcodes which should be the only interesting
* case.
*/
assert!(reg % align == 0);
ra.move_to_reg(pcopy, ssa, reg)
});
src.src_ref = reg.into();
}
}
}
}
/* Handle the scalar and not killed sources */
instr_remap_srcs_file(instr, pcopy, ra);
ra.free_killed(killed);
let mut dst_vec_reg = vec_reg;
/* Allocate vector destinations first so we have the most freedom.
* Scalar destinations can fill in holes.
*/
for dst in instr.dsts_mut() {
if let Dst::SSA(ssa) = dst {
if ssa.comps() > 1 {
let align = ssa.comps().next_power_of_two();
let reg = dst_vec_reg;
dst_vec_reg += ssa.comps();
/* We assume vector destinations are in order of decreasing
* alignment. This is true for texture opcodes which should
* be the only interesting case.
*/
assert!(reg % align == 0);
*dst = ra.assign_reg(*ssa, reg).into();
if ssa.file() == ra.file() && ssa.comps() > 1 {
*dst = ra.alloc_vector(pcopy, *ssa).into();
}
}
}
@ -570,7 +575,7 @@ fn instr_assign_regs_file(
#[derive(Clone)]
struct RegAllocation {
files: [RegFileAllocation; 4],
phi_ssa: HashMap<u32, SSAValue>,
phi_ssa: HashMap<u32, SSARef>,
}
impl RegAllocation {
@ -608,17 +613,21 @@ impl RegAllocation {
self.file_mut(ssa.file()).free_ssa(ssa);
}
pub fn free_ssa_ref(&mut self, ssa: SSARef) {
self.file_mut(ssa.file()).free_ssa_ref(ssa);
}
pub fn free_killed(&mut self, killed: &KillSet) {
for ssa in killed.iter() {
self.free_ssa(*ssa);
}
}
pub fn get_scalar(&mut self, ssa: SSAComp) -> RegRef {
pub fn get_scalar(&mut self, ssa: SSAValue) -> RegRef {
self.file_mut(ssa.file()).get_scalar(ssa)
}
pub fn alloc_scalar(&mut self, ssa: SSAComp) -> RegRef {
pub fn alloc_scalar(&mut self, ssa: SSAValue) -> RegRef {
self.file_mut(ssa.file()).alloc_scalar(ssa)
}
}
@ -626,7 +635,7 @@ impl RegAllocation {
struct AssignRegsBlock {
ra: RegAllocation,
live_in: Vec<LiveValue>,
phi_out: HashMap<PhiComp, SrcRef>,
phi_out: HashMap<u32, SrcRef>,
}
impl AssignRegsBlock {
@ -638,58 +647,6 @@ impl AssignRegsBlock {
}
}
fn assign_regs_split(
&mut self,
split: &OpSplit,
killed: &KillSet,
pcopy: &mut OpParCopy,
) {
let src = split.src.src_ref.as_ssa().unwrap();
let comps = src.comps();
assert!(usize::from(comps) == split.dsts.len());
let mut coalesced = BitSet::new();
if killed.contains(src) {
for c in 0..comps {
/* Feee the component regardless of any dest checks */
let src_ra = self.ra.file_mut(src.file());
let reg = src_ra.free_ssa_comp(src.comp(c));
let src_ref = RegRef::new(src.file(), reg, 1);
/* If we have an OpSplit which kills its source, we can coalesce
* on the spot into the destinations.
*/
if let Dst::SSA(dst) = &split.dsts[usize::from(c)] {
if dst.file() == src.file() {
/* Assign destinations to source components when the
* register files match.
*/
let dst_ra = src_ra;
dst_ra.assign_reg_comp(dst.as_comp(), reg);
coalesced.insert(c.into());
} else {
/* Otherwise, they come from different files so
* allocating a destination register won't affect the
* source and it's okay to alloc before we've finished
* freeing the source.
*/
let dst_ra = self.ra.file_mut(dst.file());
let dst_ref = dst_ra.alloc_scalar(dst.as_comp());
pcopy.srcs.push(src_ref.into());
pcopy.dsts.push(dst_ref.into());
}
}
}
} else {
for c in 0..comps {
if let Dst::SSA(dst) = &split.dsts[usize::from(c)] {
pcopy.srcs.push(self.ra.get_scalar(src.comp(c)).into());
pcopy.dsts.push(self.ra.alloc_scalar(dst.as_comp()).into());
}
}
}
}
fn assign_regs_instr(
&mut self,
mut instr: Instr,
@ -697,22 +654,15 @@ impl AssignRegsBlock {
pcopy: &mut OpParCopy,
) -> Option<Instr> {
match &instr.op {
Op::Split(split) => {
assert!(instr.pred.is_none());
assert!(split.src.src_mod.is_none());
self.assign_regs_split(split, killed, pcopy);
None
}
Op::PhiSrcs(phi) => {
for (id, src) in phi.iter() {
assert!(src.src_mod.is_none());
if let SrcRef::SSA(ssa) = src.src_ref {
for c in 0..ssa.comps() {
let src = self.ra.get_scalar(ssa.comp(c)).into();
self.phi_out.insert(PhiComp::new(*id, 0), src);
}
assert!(ssa.comps() == 1);
let src = self.ra.get_scalar(ssa[0]).into();
self.phi_out.insert(*id, src);
} else {
self.phi_out.insert(PhiComp::new(*id, 0), src.src_ref);
self.phi_out.insert(*id, src.src_ref);
}
}
self.ra.free_killed(killed);
@ -723,12 +673,11 @@ impl AssignRegsBlock {
for (id, dst) in phi.iter() {
if let Dst::SSA(ssa) = dst {
for c in 0..ssa.comps() {
self.live_in.push(LiveValue {
live_ref: LiveRef::Phi(PhiComp::new(*id, c)),
reg_ref: self.ra.alloc_scalar(ssa.as_comp()),
});
}
assert!(ssa.comps() == 1);
self.live_in.push(LiveValue {
live_ref: LiveRef::Phi(*id),
reg_ref: self.ra.alloc_scalar(ssa[0]),
});
}
}
@ -748,9 +697,9 @@ impl AssignRegsBlock {
* live in when we process the OpPhiDst, if any.
*/
for raf in &self.ra.files {
for (comp, reg) in &raf.ssa_reg {
for (ssa, reg) in &raf.ssa_reg {
self.live_in.push(LiveValue {
live_ref: LiveRef::SSA(*comp),
live_ref: LiveRef::SSA(*ssa),
reg_ref: RegRef::new(raf.file(), *reg, 1),
});
}
@ -768,7 +717,7 @@ impl AssignRegsBlock {
}
}
for src in instr.srcs() {
if let SrcRef::SSA(ssa) = &src.src_ref {
for ssa in src.iter_ssa() {
if !bl.is_live_after(ssa, ip) {
killed.insert(*ssa);
}
@ -800,7 +749,7 @@ impl AssignRegsBlock {
for lv in &target.live_in {
let src = match lv.live_ref {
LiveRef::SSA(ssa) => {
let reg = self.ra.file(ssa.file()).get_reg_comp(ssa);
let reg = self.ra.file(ssa.file()).get_reg(ssa);
SrcRef::from(RegRef::new(ssa.file(), reg, 1))
}
LiveRef::Phi(phi) => *self.phi_out.get(&phi).unwrap(),
@ -874,143 +823,3 @@ impl Shader {
}
}
}
struct TrivialRegAlloc {
next_reg: u8,
next_ureg: u8,
next_pred: u8,
next_upred: u8,
reg_map: HashMap<SSAValue, RegRef>,
phi_map: HashMap<u32, RegRef>,
}
impl TrivialRegAlloc {
pub fn new() -> TrivialRegAlloc {
TrivialRegAlloc {
next_reg: 16, /* Leave some space for FS outputs */
next_ureg: 0,
next_pred: 0,
next_upred: 0,
reg_map: HashMap::new(),
phi_map: HashMap::new(),
}
}
fn alloc_reg(&mut self, file: RegFile, comps: u8) -> RegRef {
let align = comps.next_power_of_two();
let idx = match file {
RegFile::GPR => {
let idx = self.next_reg.next_multiple_of(align);
self.next_reg = idx + comps;
idx
}
RegFile::UGPR => {
let idx = self.next_ureg.next_multiple_of(align);
self.next_ureg = idx + comps;
idx
}
RegFile::Pred => {
let idx = self.next_pred.next_multiple_of(align);
self.next_pred = idx + comps;
idx
}
RegFile::UPred => {
let idx = self.next_upred.next_multiple_of(align);
self.next_upred = idx + comps;
idx
}
};
RegRef::new(file, idx, comps)
}
fn alloc_ssa(&mut self, ssa: SSAValue) -> RegRef {
let reg = self.alloc_reg(ssa.file(), ssa.comps());
let old = self.reg_map.insert(ssa, reg);
assert!(old.is_none());
reg
}
fn get_ssa_reg(&self, ssa: SSAValue) -> RegRef {
*self.reg_map.get(&ssa).unwrap()
}
fn map_src(&self, mut src: Src) -> Src {
if let SrcRef::SSA(ssa) = src.src_ref {
src.src_ref = self.get_ssa_reg(ssa).into();
}
src
}
pub fn do_alloc(&mut self, s: &mut Shader) {
for f in &mut s.functions {
for b in &mut f.blocks {
for instr in &mut b.instrs {
match &instr.op {
Op::PhiDsts(phi) => {
let mut pcopy = OpParCopy::new();
assert!(phi.ids.len() == phi.dsts.len());
for (id, dst) in phi.iter() {
let dst_ssa = dst.as_ssa().unwrap();
let dst_reg = self.alloc_ssa(*dst_ssa);
let src_reg = self
.alloc_reg(dst_ssa.file(), dst_ssa.comps());
self.phi_map.insert(*id, src_reg);
pcopy.srcs.push(src_reg.into());
pcopy.dsts.push(dst_reg.into());
}
instr.op = Op::ParCopy(pcopy);
}
_ => (),
}
}
}
}
for f in &mut s.functions {
for b in &mut f.blocks {
for instr in &mut b.instrs {
match &instr.op {
Op::PhiSrcs(phi) => {
assert!(phi.ids.len() == phi.srcs.len());
instr.op = Op::ParCopy(OpParCopy {
srcs: phi
.srcs
.iter()
.map(|src| self.map_src(*src))
.collect(),
dsts: phi
.ids
.iter()
.map(|id| {
(*self.phi_map.get(id).unwrap()).into()
})
.collect(),
});
}
_ => {
if let Pred::SSA(ssa) = instr.pred {
instr.pred = self.get_ssa_reg(ssa).into();
}
for dst in instr.dsts_mut() {
if let Dst::SSA(ssa) = dst {
*dst = self.alloc_ssa(*ssa).into();
}
}
for src in instr.srcs_mut() {
*src = self.map_src(*src);
}
}
}
}
}
}
}
}
impl Shader {
pub fn assign_regs_trivial(&mut self) {
TrivialRegAlloc::new().do_alloc(self);
}
}

View file

@ -22,8 +22,9 @@ struct ShaderFromNir<'a> {
instrs: Vec<Instr>,
fs_out_regs: Vec<Src>,
end_block_id: u32,
ssa_map: HashMap<u32, Vec<SSAValue>>,
num_phis: u32,
phis: HashMap<u32, u32>,
phi_map: HashMap<(u32, u8), u32>,
}
impl<'a> ShaderFromNir<'a> {
@ -41,32 +42,46 @@ impl<'a> ShaderFromNir<'a> {
instrs: Vec::new(),
fs_out_regs: fs_out_regs,
end_block_id: 0,
ssa_map: HashMap::new(),
num_phis: 0,
phis: HashMap::new(),
phi_map: HashMap::new(),
}
}
pub fn alloc_ssa(&mut self, file: RegFile, comps: u8) -> SSAValue {
self.func.as_mut().unwrap().ssa_alloc.alloc(file, comps)
fn alloc_ssa(&mut self, file: RegFile) -> SSAValue {
self.func.as_mut().unwrap().ssa_alloc.alloc(file)
}
fn get_ssa(&self, def: &nir_def) -> SSAValue {
if def.bit_size == 1 {
SSAValue::new(RegFile::Pred, def.index, def.num_components)
} else {
assert!(def.bit_size == 32 || def.bit_size == 64);
let dwords = (def.bit_size / 32) * def.num_components;
//Src::new_ssa(def.index, dwords, !def.divergent)
SSAValue::new(RegFile::GPR, def.index, dwords)
fn get_ssa(&mut self, def: &nir_def) -> &[SSAValue] {
self.ssa_map.entry(def.index).or_insert_with(|| {
let (file, comps) = if def.bit_size == 1 {
(RegFile::Pred, def.num_components)
} else {
assert!(def.bit_size == 32 || def.bit_size == 64);
let comps = (def.bit_size / 32) * def.num_components;
(RegFile::GPR, comps)
};
let mut vec = Vec::new();
for i in 0..comps {
vec.push(self.func.as_mut().unwrap().ssa_alloc.alloc(file))
}
vec
})
}
fn get_ssa_comp(&mut self, def: &nir_def, c: u8) -> SSARef {
let vec = self.get_ssa(def);
match def.bit_size {
1 | 32 => vec[usize::from(c)].into(),
64 => [vec[usize::from(c) * 2], vec[usize::from(c) * 2 + 1]].into(),
_ => panic!("Unsupported bit size"),
}
}
fn get_src(&self, src: &nir_src) -> Src {
self.get_ssa(&src.as_def()).into()
}
fn get_dst(&self, dst: &nir_def) -> Dst {
self.get_ssa(dst).into()
fn get_src(&mut self, src: &nir_src) -> Src {
SSARef::try_from(self.get_ssa(&src.as_def()))
.unwrap()
.into()
}
fn get_io_addr_offset(
@ -81,85 +96,70 @@ impl<'a> ShaderFromNir<'a> {
if let Some(base_def) = std::ptr::NonNull::new(addr_offset.base.def) {
let base_def = unsafe { base_def.as_ref() };
assert!(addr_offset.base.comp == 0);
let base = self.get_ssa(base_def);
let base_comp = u8::try_from(addr_offset.base.comp).unwrap();
let base = self.get_ssa_comp(base_def, base_comp);
(base.into(), addr_offset.offset)
} else {
(SrcRef::Zero.into(), addr_offset.offset)
}
}
fn get_alu_src(&mut self, alu_src: &nir_alu_src) -> Src {
if alu_src.src.num_components() == 1 {
self.get_src(&alu_src.src)
} else {
assert!(alu_src.src.bit_size() == 32);
let vec_src = self.get_src(&alu_src.src);
let comp =
self.alloc_ssa(vec_src.src_ref.as_ssa().unwrap().file(), 1);
let mut dsts = Vec::new();
for c in 0..alu_src.src.num_components() {
if c == alu_src.swizzle[0] {
dsts.push(comp.into());
} else {
dsts.push(Dst::None);
}
}
self.instrs.push(Instr::new_split(&dsts, vec_src));
comp.into()
}
fn get_dst(&mut self, dst: &nir_def) -> Dst {
SSARef::try_from(self.get_ssa(dst)).unwrap().into()
}
fn get_phi_id(&mut self, phi: &nir_phi_instr) -> u32 {
match self.phis.get(&phi.def.index) {
Some(id) => *id,
None => {
let id = self.num_phis;
self.num_phis += 1;
self.phis.insert(phi.def.index, id);
id
}
}
}
fn split64(&mut self, ssa: SSAValue) -> [SSAValue; 2] {
assert!(ssa.comps() == 2);
let split =
[self.alloc_ssa(ssa.file(), 1), self.alloc_ssa(ssa.file(), 1)];
let dsts = [split[0].into(), split[1].into()];
self.instrs.push(Instr::new_split(&dsts, ssa.into()));
split
}
fn split(&mut self, ssa: SSAValue) -> Vec<SSAValue> {
if ssa.comps() == 1 {
return vec![ssa];
}
let mut split_ssa = Vec::new();
let mut dsts = Vec::new();
for c in 0..ssa.comps() {
let dst_ssa = self.alloc_ssa(ssa.file(), 1);
split_ssa.push(dst_ssa);
dsts.push(Dst::from(dst_ssa));
}
self.instrs.push(Instr::new_split(&dsts, ssa.into()));
split_ssa
}
fn vec(&mut self, ssa: &[SSAValue]) -> SSAValue {
let dst = self.alloc_ssa(ssa[0].file(), ssa.len().try_into().unwrap());
let srcs: Vec<Src> = ssa.iter().map(|s| (*s).into()).collect();
self.instrs.push(Instr::new_vec(dst.into(), &srcs));
dst
fn get_phi_id(&mut self, phi: &nir_phi_instr, comp: u8) -> u32 {
let ssa = phi.def.as_def();
*self.phi_map.entry((ssa.index, comp)).or_insert_with(|| {
let id = self.num_phis;
self.num_phis += 1;
id
})
}
fn parse_alu(&mut self, alu: &nir_alu_instr) {
let mut srcs = Vec::new();
for alu_src in alu.srcs_as_slice() {
srcs.push(self.get_alu_src(alu_src));
for (i, alu_src) in alu.srcs_as_slice().iter().enumerate() {
let bit_size = alu_src.src.bit_size();
let comps = alu.src_components(i.try_into().unwrap());
let alu_src_ssa = self.get_ssa(&alu_src.src.as_def());
let mut src_comps = Vec::new();
for c in 0..comps {
let s = usize::from(alu_src.swizzle[usize::from(c)]);
if bit_size == 1 || bit_size == 32 {
src_comps.push(alu_src_ssa[s]);
} else if bit_size == 64 {
src_comps.push(alu_src_ssa[s * 2]);
src_comps.push(alu_src_ssa[s * 2 + 1]);
} else {
panic!("Unhandled bit size");
}
}
srcs.push(Src::from(SSARef::try_from(src_comps).unwrap()));
}
/* Handle vectors as a special case since they're the only ALU ops that
* can produce more than a 16B of data.
*/
match alu.op {
nir_op_mov | nir_op_vec2 | nir_op_vec3 | nir_op_vec4
| nir_op_vec5 | nir_op_vec8 | nir_op_vec16 => {
let mut pcopy = OpParCopy::new();
for src in srcs {
for v in src.as_ssa().unwrap().iter() {
pcopy.srcs.push((*v).into());
}
}
for v in self.get_ssa(&alu.def.as_def()) {
pcopy.dsts.push((*v).into());
}
assert!(pcopy.srcs.len() == pcopy.dsts.len());
self.instrs.push(Instr::new(Op::ParCopy(pcopy)));
return;
}
_ => (),
}
let srcs = srcs;
let dst = self.get_dst(&alu.def);
@ -293,7 +293,7 @@ impl<'a> ShaderFromNir<'a> {
})));
}
nir_op_fsign => {
let lz = self.alloc_ssa(RegFile::GPR, 1);
let lz = self.alloc_ssa(RegFile::GPR);
self.instrs.push(Instr::new_fset(
lz.into(),
FloatCmpOp::OrdLt,
@ -301,7 +301,7 @@ impl<'a> ShaderFromNir<'a> {
Src::new_zero(),
));
let gz = self.alloc_ssa(RegFile::GPR, 1);
let gz = self.alloc_ssa(RegFile::GPR);
self.instrs.push(Instr::new_fset(
gz.into(),
FloatCmpOp::OrdGt,
@ -332,15 +332,12 @@ impl<'a> ShaderFromNir<'a> {
})));
}
nir_op_iadd => {
if alu.def.bit_size() == 64 {
let x = self.split64(*srcs[0].as_ssa().unwrap());
let y = self.split64(*srcs[1].as_ssa().unwrap());
let carry = self.alloc_ssa(RegFile::Pred, 1);
if alu.def.bit_size == 64 {
let x = srcs[0].as_ssa().unwrap();
let y = srcs[1].as_ssa().unwrap();
let sum = dst.as_ssa().unwrap();
let carry = self.alloc_ssa(RegFile::Pred);
let sum = [
self.alloc_ssa(dst.as_ssa().unwrap().file(), 1),
self.alloc_ssa(dst.as_ssa().unwrap().file(), 1),
];
self.instrs.push(Instr::new(Op::IAdd3(OpIAdd3 {
dst: sum[0].into(),
overflow: carry.into(),
@ -353,9 +350,6 @@ impl<'a> ShaderFromNir<'a> {
srcs: [x[1].into(), y[1].into(), Src::new_zero()],
carry: carry.into(),
})));
let sum = [sum[0].into(), sum[1].into()];
self.instrs.push(Instr::new_vec(dst, &sum));
} else {
self.instrs.push(Instr::new_iadd(dst, srcs[0], srcs[1]));
}
@ -458,14 +452,13 @@ impl<'a> ShaderFromNir<'a> {
})));
}
nir_op_imul_high | nir_op_umul_high => {
let dst64 = self.alloc_ssa(RegFile::GPR, 2);
let dst_hi = dst.as_ssa().unwrap()[0];
let dst_lo = self.alloc_ssa(RegFile::GPR);
self.instrs.push(Instr::new(Op::IMad64(OpIMad64 {
dst: dst64.into(),
dst: [dst_lo, dst_hi].into(),
srcs: [srcs[0], srcs[1], Src::new_zero()],
signed: alu.op == nir_op_imul_high,
})));
self.instrs
.push(Instr::new_split(&[Dst::None, dst], dst64.into()));
}
nir_op_ineg => {
self.instrs.push(Instr::new(Op::IMov(OpIMov {
@ -539,7 +532,11 @@ impl<'a> ShaderFromNir<'a> {
self.instrs.push(Instr::new_mov(dst, srcs[0]));
}
nir_op_pack_64_2x32_split => {
self.instrs.push(Instr::new_vec(dst, &[srcs[0], srcs[1]]));
let dst_ssa = dst.as_ssa().unwrap();
let mut pcopy = OpParCopy::new();
pcopy.push(srcs[0], dst_ssa[0].into());
pcopy.push(srcs[1], dst_ssa[1].into());
self.instrs.push(Instr::new(Op::ParCopy(pcopy)));
}
nir_op_u2f32 => {
self.instrs.push(Instr::new_u2f(dst, srcs[0]));
@ -563,12 +560,12 @@ impl<'a> ShaderFromNir<'a> {
));
}
nir_op_unpack_64_2x32_split_x => {
self.instrs
.push(Instr::new_split(&[dst, Dst::None], srcs[0]));
let src0_x = srcs[0].as_ssa().unwrap()[0];
self.instrs.push(Instr::new_mov(dst, src0_x.into()));
}
nir_op_unpack_64_2x32_split_y => {
self.instrs
.push(Instr::new_split(&[Dst::None, dst], srcs[0]));
let src0_y = srcs[0].as_ssa().unwrap()[1];
self.instrs.push(Instr::new_mov(dst, src0_y.into()));
}
nir_op_ushr => {
self.instrs.push(Instr::new(Op::Shf(OpShf {
@ -582,9 +579,6 @@ impl<'a> ShaderFromNir<'a> {
dst_high: false,
})));
}
nir_op_vec2 | nir_op_vec3 | nir_op_vec4 => {
self.instrs.push(Instr::new_vec(dst, &srcs));
}
_ => panic!("Unsupported ALU instruction: {}", alu.info().name()),
}
}
@ -644,24 +638,38 @@ impl<'a> ShaderFromNir<'a> {
let mask = tex.def.components_read();
let mask = u8::try_from(mask).unwrap();
let dst_comps = u8::try_from(mask.count_ones()).unwrap();
let tex_dst = *self.get_dst(&tex.def).as_ssa().unwrap();
let mut dst_comps = Vec::new();
for (i, comp) in tex_dst.iter().enumerate() {
if mask & (1 << i) == 0 {
self.instrs
.push(Instr::new_mov((*comp).into(), Src::new_zero()));
} else {
dst_comps.push(*comp);
}
}
let mut dsts = [Dst::None; 2];
dsts[0] = self.alloc_ssa(RegFile::GPR, min(dst_comps, 2)).into();
if dst_comps > 2 {
dsts[1] = self.alloc_ssa(RegFile::GPR, dst_comps - 2).into();
dsts[0] = SSARef::try_from(&dst_comps[..min(dst_comps.len(), 2)])
.unwrap()
.into();
if dst_comps.len() > 2 {
dsts[1] = SSARef::try_from(&dst_comps[2..]).unwrap().into();
}
if tex.op == nir_texop_hdr_dim_nv {
let src = self.get_src(&srcs[0].src);
self.instrs.push(Instr::new(Op::Txq(OpTxq {
dsts: dsts,
src: self.get_src(&srcs[0].src),
src: src,
query: TexQuery::Dimension,
mask: mask,
})));
} else if tex.op == nir_texop_tex_type_nv {
let src = self.get_src(&srcs[0].src);
self.instrs.push(Instr::new(Op::Txq(OpTxq {
dsts: dsts,
src: self.get_src(&srcs[0].src),
src: src,
query: TexQuery::TextureType,
mask: mask,
})));
@ -742,22 +750,6 @@ impl<'a> ShaderFromNir<'a> {
})));
}
}
let mut dst_comps = Vec::new();
for dst in dsts {
if let Dst::SSA(dst) = dst {
for comp in self.split(dst) {
dst_comps.push(comp.into());
}
}
}
for c in 0..tex.def.num_components() {
if mask & (1 << c) == 0 {
dst_comps.insert(c.into(), SrcRef::Zero.into());
}
}
self.instrs
.push(Instr::new_vec(self.get_dst(&tex.def), &dst_comps));
}
fn parse_intrinsic(&mut self, intrin: &nir_intrinsic_instr) {
@ -797,21 +789,16 @@ impl<'a> ShaderFromNir<'a> {
nir_intrinsic_load_barycentric_pixel => InterpLoc::Default,
_ => panic!("Unsupported interp mode"),
};
let dst = self.get_dst(&intrin.def);
let mut comps = Vec::new();
let dst = *self.get_dst(&intrin.def).as_ssa().unwrap();
for c in 0..intrin.num_components {
let tmp = self.alloc_ssa(RegFile::GPR, 1);
self.instrs.push(Instr::new(Op::Ipa(OpIpa {
dst: tmp.into(),
addr: addr + 4 * u16::try_from(c).unwrap(),
dst: dst[usize::from(c)].into(),
addr: addr + 4 * u16::from(c),
freq: freq,
loc: loc,
offset: SrcRef::Zero.into(),
})));
comps.push(tmp.into());
}
self.instrs.push(Instr::new_vec(dst, &comps));
}
nir_intrinsic_load_per_vertex_input => {
let addr = u16::try_from(intrin.base()).unwrap();
@ -828,21 +815,20 @@ impl<'a> ShaderFromNir<'a> {
nir_intrinsic_load_ubo => {
let idx = srcs[0];
let offset = srcs[1];
let dst = self.get_dst(&intrin.def);
let dwords =
(intrin.def.bit_size() / 32) * intrin.def.num_components();
let dst = *self.get_dst(&intrin.def).as_ssa().unwrap();
if let Some(imm_idx) = idx.as_uint() {
let imm_idx = u8::try_from(imm_idx).unwrap();
if let Some(imm_offset) = offset.as_uint() {
let imm_offset = u16::try_from(imm_offset).unwrap();
let mut srcs = Vec::new();
for i in 0..dwords {
srcs.push(Src::new_cbuf(
let mut pcopy = OpParCopy::new();
for (i, dst) in dst.iter().enumerate() {
let src = Src::new_cbuf(
imm_idx,
imm_offset + u16::from(i) * 4,
));
imm_offset + u16::try_from(i).unwrap() * 4,
);
pcopy.push(src, (*dst).into());
}
self.instrs.push(Instr::new_vec(dst, &srcs));
self.instrs.push(Instr::new(Op::ParCopy(pcopy)));
} else {
panic!("Indirect UBO offsets not yet supported");
}
@ -869,16 +855,13 @@ impl<'a> ShaderFromNir<'a> {
/* We assume these only ever happen in the last block.
* This is ensured by nir_lower_io_to_temporaries()
*/
let data = self.get_src(&srcs[0]);
let data = *self.get_src(&srcs[0]).as_ssa().unwrap();
assert!(srcs[1].is_zero());
let base: u8 = intrin.base().try_into().unwrap();
let mut dsts = Vec::new();
for c in 0..intrin.num_components {
let tmp = self.alloc_ssa(RegFile::GPR, 1);
self.fs_out_regs[(base + c) as usize] = tmp.into();
dsts.push(Dst::from(tmp));
self.fs_out_regs[usize::from(base + c)] =
data[usize::from(c)].into();
}
self.instrs.push(Instr::new_split(&dsts, data))
} else {
let data = self.get_src(&srcs[0]);
let vtx = Src::new_zero();
@ -895,23 +878,35 @@ impl<'a> ShaderFromNir<'a> {
}
fn parse_load_const(&mut self, load_const: &nir_load_const_instr) {
let dst = self.get_dst(&load_const.def);
let mut srcs = Vec::new();
fn src_for_u32(u: u32) -> Src {
if u == 0 {
Src::new_zero()
} else {
Src::new_imm_u32(u)
}
}
let mut pcopy = OpParCopy::new();
for c in 0..load_const.def.num_components {
if load_const.def.bit_size == 1 {
let imm_b1 = unsafe { load_const.values()[c as usize].b };
srcs.push(Src::new_imm_bool(imm_b1));
} else {
assert!(load_const.def.bit_size == 32);
pcopy.srcs.push(Src::new_imm_bool(imm_b1));
} else if load_const.def.bit_size == 32 {
let imm_u32 = unsafe { load_const.values()[c as usize].u32_ };
srcs.push(if imm_u32 == 0 {
Src::new_zero()
} else {
Src::new_imm_u32(imm_u32)
});
pcopy.srcs.push(src_for_u32(imm_u32));
} else if load_const.def.bit_size == 64 {
let imm_u64 = unsafe { load_const.values()[c as usize].u64_ };
pcopy.srcs.push(src_for_u32(imm_u64 as u32));
pcopy.srcs.push(src_for_u32((imm_u64 >> 32) as u32));
}
}
self.instrs.push(Instr::new_vec(dst, &srcs));
for sv in self.get_ssa(&load_const.def) {
pcopy.dsts.push((*sv).into());
}
assert!(pcopy.srcs.len() == pcopy.dsts.len());
self.instrs.push(Instr::new(Op::ParCopy(pcopy)));
}
fn parse_undef(&mut self, _ssa_undef: &nir_undef_instr) {
@ -919,16 +914,15 @@ impl<'a> ShaderFromNir<'a> {
}
fn parse_block(&mut self, nb: &nir_block) {
let mut phi = OpPhiDsts {
ids: Vec::new(),
dsts: Vec::new(),
};
let mut phi = OpPhiDsts::new();
for ni in nb.iter_instr_list() {
if ni.type_ == nir_instr_type_phi {
let np = ni.as_phi().unwrap();
phi.ids.push(self.get_phi_id(np));
phi.dsts.push(self.get_dst(&np.def));
let dst = *self.get_dst(&np.def).as_ssa().unwrap();
for (i, dst) in dst.iter().enumerate() {
let phi_id = self.get_phi_id(np, i.try_into().unwrap());
phi.push(phi_id, (*dst).into());
}
} else {
break;
}
@ -964,10 +958,7 @@ impl<'a> ShaderFromNir<'a> {
None => continue,
};
let mut phi = OpPhiSrcs {
srcs: Vec::new(),
ids: Vec::new(),
};
let mut phi = OpPhiSrcs::new();
for i in sb.iter_instr_list() {
let np = match i.as_phi() {
@ -977,8 +968,12 @@ impl<'a> ShaderFromNir<'a> {
for ps in np.iter_srcs() {
if ps.pred().index == nb.index {
phi.srcs.push(self.get_src(&ps.src));
phi.ids.push(self.get_phi_id(np));
let src = *self.get_src(&ps.src).as_ssa().unwrap();
for (i, src) in src.iter().enumerate() {
let phi_id =
self.get_phi_id(np, i.try_into().unwrap());
phi.push(phi_id, (*src).into());
}
break;
}
}
@ -1007,7 +1002,7 @@ impl<'a> ShaderFromNir<'a> {
}
fn parse_if(&mut self, ni: &nir_if) {
let cond = self.get_ssa(&ni.condition.as_def());
let cond = self.get_ssa(&ni.condition.as_def())[0];
let if_bra = self.blocks.last_mut().unwrap().branch_mut().unwrap();
if_bra.pred = cond.into();
@ -1040,7 +1035,7 @@ impl<'a> ShaderFromNir<'a> {
}
pub fn parse_function_impl(&mut self, nfi: &nir_function_impl) -> Function {
self.func = Some(Function::new(0, nfi.ssa_alloc));
self.func = Some(Function::new(0));
self.end_block_id = nfi.end_block().index;
self.parse_cf_list(nfi.iter_body());

View file

@ -8,7 +8,7 @@ extern crate nak_ir_proc;
use nak_ir_proc::*;
use std::fmt;
use std::iter::Zip;
use std::ops::{BitAnd, BitOr, Not, Range};
use std::ops::{BitAnd, BitOr, Deref, DerefMut, Not, Range};
use std::slice;
#[repr(u8)]
@ -111,32 +111,23 @@ pub struct SSAValue {
}
impl SSAValue {
pub fn new(file: RegFile, idx: u32, comps: u8) -> SSAValue {
assert!(idx < (1 << 27));
pub const NONE: Self = SSAValue { packed: 0 };
pub fn new(file: RegFile, idx: u32) -> SSAValue {
/* Reserve 2 numbers for use for SSARef::comps() */
assert!(idx > 0 && idx < (1 << 30) - 2);
let mut packed = idx;
assert!(comps > 0 && comps <= 8);
packed |= u32::from(comps - 1) << 27;
assert!(u8::from(file) < 4);
packed |= u32::from(u8::from(file)) << 30;
SSAValue { packed: packed }
}
pub fn idx(&self) -> u32 {
self.packed & 0x07ffffff
self.packed & 0x3fffffff
}
pub fn comps(&self) -> u8 {
(((self.packed >> 27) & 0x7) + 1).try_into().unwrap()
}
pub fn comp(&self, comp: u8) -> SSAComp {
assert!(comp < self.comps());
SSAComp::new(self.file(), self.idx(), comp)
}
pub fn as_comp(&self) -> SSAComp {
assert!(self.comps() == 1);
SSAComp::new(self.file(), self.idx(), 0)
pub fn is_none(&self) -> bool {
self.packed == 0
}
}
@ -148,38 +139,125 @@ impl HasRegFile for SSAValue {
impl fmt::Display for SSAValue {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.is_uniform() {
write!(f, "USSA{}@{}", self.idx(), self.comps())
} else {
write!(f, "SSA{}@{}", self.idx(), self.comps())
match self.file() {
RegFile::GPR => write!(f, "S")?,
RegFile::UGPR => write!(f, "US")?,
RegFile::Pred => write!(f, "PS")?,
RegFile::UPred => write!(f, "UPS")?,
}
write!(f, "{}", self.idx())
}
}
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
pub struct SSAComp {
v: SSAValue,
pub struct SSARef {
v: [SSAValue; 4],
}
impl SSAComp {
pub fn new(file: RegFile, idx: u32, comp: u8) -> SSAComp {
SSAComp {
v: SSAValue::new(file, idx, comp + 1),
impl SSARef {
#[inline]
fn new(comps: &[SSAValue]) -> SSARef {
assert!(comps.len() > 0 && comps.len() <= 4);
let mut r = SSARef {
v: [SSAValue::NONE; 4],
};
for i in 0..comps.len() {
r.v[i] = comps[i];
}
if comps.len() < 4 {
r.v[3].packed = (comps.len() as u32).wrapping_neg();
}
r
}
pub fn comps(&self) -> u8 {
if self.v[3].packed >= u32::MAX - 2 {
self.v[3].packed.wrapping_neg() as u8
} else {
4
}
}
}
pub fn idx(&self) -> u32 {
self.v.idx()
}
pub fn comp(&self) -> u8 {
self.v.comps() - 1
impl HasRegFile for SSARef {
fn file(&self) -> RegFile {
let comps = usize::from(self.comps());
for i in 1..comps {
assert!(self.v[i].file() == self.v[0].file());
}
self.v[0].file()
}
}
impl HasRegFile for SSAComp {
fn file(&self) -> RegFile {
self.v.file()
impl Deref for SSARef {
type Target = [SSAValue];
fn deref(&self) -> &[SSAValue] {
let comps = usize::from(self.comps());
&self.v[..comps]
}
}
impl DerefMut for SSARef {
fn deref_mut(&mut self) -> &mut [SSAValue] {
let comps = usize::from(self.comps());
&mut self.v[..comps]
}
}
impl TryFrom<&[SSAValue]> for SSARef {
type Error = &'static str;
fn try_from(comps: &[SSAValue]) -> Result<Self, Self::Error> {
if comps.len() == 0 {
Err("Empty vector")
} else if comps.len() > 4 {
Err("Too many vector components")
} else {
Ok(SSARef::new(comps))
}
}
}
impl TryFrom<Vec<SSAValue>> for SSARef {
type Error = &'static str;
fn try_from(comps: Vec<SSAValue>) -> Result<Self, Self::Error> {
SSARef::try_from(&comps[..])
}
}
macro_rules! impl_ssa_ref_from_arr {
($n: expr) => {
impl From<[SSAValue; $n]> for SSARef {
fn from(comps: [SSAValue; $n]) -> Self {
SSARef::new(&comps[..])
}
}
};
}
impl_ssa_ref_from_arr!(1);
impl_ssa_ref_from_arr!(2);
impl_ssa_ref_from_arr!(3);
impl_ssa_ref_from_arr!(4);
impl From<SSAValue> for SSARef {
fn from(val: SSAValue) -> Self {
[val].into()
}
}
impl fmt::Display for SSARef {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.comps() == 1 {
write!(f, "{}", self[0])
} else {
write!(f, "{{")?;
for v in self.iter() {
write!(f, " {}", v)?;
}
write!(f, " }}")
}
}
}
@ -188,20 +266,17 @@ pub struct SSAValueAllocator {
}
impl SSAValueAllocator {
pub fn new(initial_count: u32) -> SSAValueAllocator {
SSAValueAllocator {
count: initial_count,
}
pub fn new() -> SSAValueAllocator {
SSAValueAllocator { count: 0 }
}
pub fn count(&self) -> u32 {
self.count
}
pub fn alloc(&mut self, file: RegFile, comps: u8) -> SSAValue {
let idx = self.count;
pub fn alloc(&mut self, file: RegFile) -> SSAValue {
self.count += 1;
SSAValue::new(file, idx, comps)
SSAValue::new(file, self.count)
}
}
@ -282,7 +357,7 @@ impl fmt::Display for RegRef {
#[derive(Clone, Copy)]
pub enum Dst {
None,
SSA(SSAValue),
SSA(SSARef),
Reg(RegRef),
}
@ -294,7 +369,7 @@ impl Dst {
}
}
pub fn as_ssa(&self) -> Option<&SSAValue> {
pub fn as_ssa(&self) -> Option<&SSARef> {
match self {
Dst::SSA(r) => Some(r),
_ => None,
@ -308,9 +383,9 @@ impl From<RegRef> for Dst {
}
}
impl From<SSAValue> for Dst {
fn from(ssa: SSAValue) -> Dst {
Dst::SSA(ssa)
impl<T: Into<SSARef>> From<T> for Dst {
fn from(ssa: T) -> Dst {
Dst::SSA(ssa.into())
}
}
@ -345,7 +420,7 @@ pub enum SrcRef {
False,
Imm32(u32),
CBuf(CBufRef),
SSA(SSAValue),
SSA(SSARef),
Reg(RegRef),
}
@ -357,7 +432,7 @@ impl SrcRef {
}
}
pub fn as_ssa(&self) -> Option<&SSAValue> {
pub fn as_ssa(&self) -> Option<&SSARef> {
match self {
SrcRef::SSA(r) => Some(r),
_ => None,
@ -379,19 +454,20 @@ impl SrcRef {
}
}
pub fn get_ssa(&self) -> Option<&SSAValue> {
pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
match self {
SrcRef::Zero
| SrcRef::True
| SrcRef::False
| SrcRef::Imm32(_)
| SrcRef::Reg(_) => None,
| SrcRef::Reg(_) => &[],
SrcRef::CBuf(cb) => match &cb.buf {
CBuf::Binding(_) | CBuf::BindlessGPR(_) => None,
CBuf::BindlessSSA(ssa) => Some(ssa),
CBuf::Binding(_) | CBuf::BindlessGPR(_) => &[],
CBuf::BindlessSSA(ssa) => slice::from_ref(ssa),
},
SrcRef::SSA(ssa) => Some(ssa),
SrcRef::SSA(ssa) => ssa,
}
.iter()
}
}
@ -401,9 +477,9 @@ impl From<RegRef> for SrcRef {
}
}
impl From<SSAValue> for SrcRef {
fn from(ssa: SSAValue) -> SrcRef {
SrcRef::SSA(ssa)
impl<T: Into<SSARef>> From<T> for SrcRef {
fn from(ssa: T) -> SrcRef {
SrcRef::SSA(ssa.into())
}
}
@ -484,7 +560,7 @@ impl SrcMod {
}
}
pub fn abs(&self) -> SrcMod {
pub fn abs(self) -> SrcMod {
match self {
SrcMod::None | SrcMod::Abs | SrcMod::Neg | SrcMod::NegAbs => {
SrcMod::Abs
@ -493,7 +569,7 @@ impl SrcMod {
}
}
pub fn neg(&self) -> SrcMod {
pub fn neg(self) -> SrcMod {
match self {
SrcMod::None => SrcMod::Neg,
SrcMod::Abs => SrcMod::NegAbs,
@ -503,13 +579,23 @@ impl SrcMod {
}
}
pub fn not(&self) -> SrcMod {
pub fn not(self) -> SrcMod {
match self {
SrcMod::None => SrcMod::Not,
SrcMod::Not => SrcMod::None,
_ => panic!("Not a boolean source modifier"),
}
}
pub fn modify(self, other: SrcMod) -> SrcMod {
match other {
SrcMod::None => self,
SrcMod::Abs => self.abs(),
SrcMod::Neg => self.neg(),
SrcMod::NegAbs => self.abs().neg(),
SrcMod::Not => self.not(),
}
}
}
#[derive(Clone, Copy)]
@ -560,7 +646,7 @@ impl Src {
}
}
pub fn as_ssa(&self) -> Option<&SSAValue> {
pub fn as_ssa(&self) -> Option<&SSARef> {
if self.src_mod.is_none() {
self.src_ref.as_ssa()
} else {
@ -572,8 +658,8 @@ impl Src {
self.src_ref.get_reg()
}
pub fn get_ssa(&self) -> Option<&SSAValue> {
self.src_ref.get_ssa()
pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
self.src_ref.iter_ssa()
}
pub fn is_uniform(&self) -> bool {
@ -615,27 +701,15 @@ impl Src {
}
}
impl From<SrcRef> for Src {
fn from(src_ref: SrcRef) -> Src {
impl<T: Into<SrcRef>> From<T> for Src {
fn from(value: T) -> Src {
Src {
src_ref: src_ref,
src_ref: value.into(),
src_mod: SrcMod::None,
}
}
}
impl From<RegRef> for Src {
fn from(reg: RegRef) -> Src {
SrcRef::from(reg).into()
}
}
impl From<SSAValue> for Src {
fn from(ssa: SSAValue) -> Src {
SrcRef::from(ssa).into()
}
}
impl fmt::Display for Src {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.src_mod {
@ -1928,6 +2002,24 @@ impl fmt::Display for OpFMov {
}
}
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpDMov {
pub dst: Dst,
pub src: Src,
pub saturate: bool,
}
impl fmt::Display for OpDMov {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "DMOV")?;
if self.saturate {
write!(f, ".SAT")?;
}
write!(f, " {} {}", self.dst, self.src)
}
}
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpIMov {
@ -1941,60 +2033,6 @@ impl fmt::Display for OpIMov {
}
}
#[repr(C)]
#[derive(DstsAsSlice)]
pub struct OpVec {
pub dst: Dst,
pub srcs: Vec<Src>,
}
impl SrcsAsSlice for OpVec {
fn srcs_as_slice(&self) -> &[Src] {
&self.srcs
}
fn srcs_as_mut_slice(&mut self) -> &mut [Src] {
&mut self.srcs
}
}
impl fmt::Display for OpVec {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "VEC {} {{ {}", self.dst, self.srcs[0])?;
for src in &self.srcs[1..] {
write!(f, " {}", src)?;
}
write!(f, " }}")
}
}
#[repr(C)]
#[derive(SrcsAsSlice)]
pub struct OpSplit {
pub dsts: Vec<Dst>,
pub src: Src,
}
impl DstsAsSlice for OpSplit {
fn dsts_as_slice(&self) -> &[Dst] {
&self.dsts
}
fn dsts_as_mut_slice(&mut self) -> &mut [Dst] {
&mut self.dsts
}
}
impl fmt::Display for OpSplit {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "SPLIT {{ {}", self.dsts[0])?;
for dst in &self.dsts[1..] {
write!(f, " {}", dst)?;
}
write!(f, " }} {}", self.src)
}
}
#[repr(C)]
#[derive(DstsAsSlice)]
pub struct OpPhiSrcs {
@ -2019,6 +2057,12 @@ impl OpPhiSrcs {
assert!(self.ids.len() == self.srcs.len());
self.ids.iter().zip(self.srcs.iter())
}
pub fn push(&mut self, id: u32, src: Src) {
assert!(self.ids.len() == self.srcs.len());
self.ids.push(id);
self.srcs.push(src);
}
}
impl SrcsAsSlice for OpPhiSrcs {
@ -2053,6 +2097,13 @@ pub struct OpPhiDsts {
}
impl OpPhiDsts {
pub fn new() -> OpPhiDsts {
OpPhiDsts {
ids: Vec::new(),
dsts: Vec::new(),
}
}
pub fn is_empty(&self) -> bool {
assert!(self.ids.len() == self.dsts.len());
self.ids.is_empty()
@ -2062,6 +2113,12 @@ impl OpPhiDsts {
assert!(self.ids.len() == self.dsts.len());
self.ids.iter().zip(self.dsts.iter())
}
pub fn push(&mut self, id: u32, dst: Dst) {
assert!(self.ids.len() == self.dsts.len());
self.ids.push(id);
self.dsts.push(dst);
}
}
impl DstsAsSlice for OpPhiDsts {
@ -2128,6 +2185,12 @@ impl OpParCopy {
assert!(self.srcs.len() == self.dsts.len());
self.srcs.iter().zip(&self.dsts)
}
pub fn push(&mut self, src: Src, dst: Dst) {
assert!(self.srcs.len() == self.dsts.len());
self.srcs.push(src);
self.dsts.push(dst);
}
}
impl SrcsAsSlice for OpParCopy {
@ -2226,11 +2289,10 @@ pub enum Op {
Exit(OpExit),
S2R(OpS2R),
FMov(OpFMov),
DMov(OpDMov),
IMov(OpIMov),
PhiSrcs(OpPhiSrcs),
PhiDsts(OpPhiDsts),
Vec(OpVec),
Split(OpSplit),
Swap(OpSwap),
ParCopy(OpParCopy),
FSOut(OpFSOut),
@ -2281,13 +2343,7 @@ impl fmt::Display for Pred {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Pred::None => (),
Pred::SSA(v) => {
if v.is_uniform() {
write!(f, "USSA{}@{}", v.idx(), v.comps())?;
} else {
write!(f, "SSA{}@{}", v.idx(), v.comps())?;
}
}
Pred::SSA(s) => s.fmt(f)?,
Pred::Reg(r) => r.fmt(f)?,
}
Ok(())
@ -2598,20 +2654,6 @@ impl Instr {
Instr::new(Op::S2R(OpS2R { dst: dst, idx: idx }))
}
pub fn new_vec(dst: Dst, srcs: &[Src]) -> Instr {
Instr::new(Op::Vec(OpVec {
dst: dst,
srcs: srcs.to_vec(),
}))
}
pub fn new_split(dsts: &[Dst], src: Src) -> Instr {
Instr::new(Op::Split(OpSplit {
dsts: dsts.to_vec(),
src: src,
}))
}
pub fn new_swap(x: RegRef, y: RegRef) -> Instr {
assert!(x.file() == y.file());
Instr::new(Op::Swap(OpSwap {
@ -2694,11 +2736,10 @@ impl Instr {
Op::St(_) => None,
Op::Bra(_) | Op::Exit(_) => Some(15),
Op::FMov(_)
| Op::DMov(_)
| Op::IMov(_)
| Op::PhiSrcs(_)
| Op::PhiDsts(_)
| Op::Vec(_)
| Op::Split(_)
| Op::Swap(_)
| Op::ParCopy(_)
| Op::FSOut(_) => {
@ -2796,10 +2837,10 @@ pub struct Function {
}
impl Function {
pub fn new(id: u32, reserved_ssa_count: u32) -> Function {
pub fn new(id: u32) -> Function {
Function {
id: id,
ssa_alloc: SSAValueAllocator::new(reserved_ssa_count),
ssa_alloc: SSAValueAllocator::new(),
blocks: Vec::new(),
}
}
@ -2864,46 +2905,6 @@ impl Shader {
carry: Src::new_imm_bool(false),
}))]
}
Op::Vec(vec) => {
let comps = u8::try_from(vec.srcs.len()).unwrap();
let vec_dst = vec.dst.as_reg().unwrap();
assert!(comps == vec_dst.comps());
let mut dsts = Vec::new();
for i in 0..comps {
dsts.push(Dst::from(vec_dst.as_comp(i).unwrap()));
}
vec![Instr::new(Op::ParCopy(OpParCopy {
srcs: vec.srcs,
dsts: dsts,
}))]
}
Op::Split(split) => {
let vec_src = split.src.src_ref.as_reg().unwrap();
assert!(usize::from(vec_src.comps()) == split.dsts.len());
let mut dsts = Vec::new();
let mut srcs = Vec::new();
for (i, dst) in split.dsts.iter().enumerate() {
let i = u8::try_from(i).unwrap();
let src = vec_src.as_comp(i).unwrap();
match dst {
Dst::None => continue,
Dst::Reg(reg) => {
if *reg == src {
continue;
}
}
_ => (),
}
dsts.push(*dst);
srcs.push(src.into());
}
vec![Instr::new(Op::ParCopy(OpParCopy {
srcs: srcs,
dsts: dsts,
}))]
}
Op::FSOut(out) => {
let mut pcopy = OpParCopy::new();
for (i, src) in out.srcs.iter().enumerate() {

View file

@ -52,7 +52,7 @@ impl<'a> LegalizeInstr<'a> {
}
pub fn mov_src(&mut self, src: &mut Src, file: RegFile) {
let val = self.ssa_alloc.alloc(file, 1);
let val = self.ssa_alloc.alloc(file);
self.instrs
.push(Instr::new_mov(val.into(), src.src_ref.into()));
src.src_ref = val.into();

View file

@ -47,14 +47,16 @@ impl BlockLiveness {
}
for src in instr.srcs() {
if let Some(val) = src.get_ssa() {
self.add_use(val, ip);
for sv in src.iter_ssa() {
self.add_use(sv, ip);
}
}
for dst in instr.dsts() {
if let Dst::SSA(val) = dst {
self.add_def(val);
if let Dst::SSA(sr) = dst {
for sv in sr.iter() {
self.add_def(sv);
}
}
}
}

View file

@ -538,6 +538,8 @@ nak_postprocess_nir(nir_shader *nir, const struct nak_compiler *nak)
};
OPT(nir, nir_lower_mem_access_bit_sizes, &mem_bit_size_options);
nak_optimize_nir(nir, nak);
OPT(nir, nak_nir_lower_tex, nak);
OPT(nir, nir_lower_idiv, NULL);
OPT(nir, nir_lower_int64);

View file

@ -13,6 +13,7 @@ enum CopyType {
Raw,
Bits,
F32,
F64H,
I32,
}
@ -32,7 +33,7 @@ impl CopyEntry {
}
struct CopyPropPass {
ssa_map: HashMap<SSAValue, Vec<CopyEntry>>,
ssa_map: HashMap<SSAValue, CopyEntry>,
}
impl CopyPropPass {
@ -42,55 +43,46 @@ impl CopyPropPass {
}
}
fn add_copy(&mut self, dst: &SSAValue, typ: CopyType, src_vec: &[Src]) {
let entries = src_vec
.iter()
.map(|src| {
match typ {
CopyType::Raw => assert!(src.src_mod.is_none()),
CopyType::Bits => assert!(src.src_mod.is_bitwise()),
CopyType::F32 | CopyType::I32 => {
assert!(src.src_mod.is_alu())
}
}
CopyEntry {
typ: match src.src_mod {
SrcMod::None => CopyType::Raw,
SrcMod::Abs | SrcMod::Neg | SrcMod::NegAbs => {
assert!(
typ != CopyType::Raw && typ != CopyType::Bits
);
typ
}
SrcMod::Not => {
assert!(typ == CopyType::Bits);
typ
}
},
src: *src,
}
})
.collect();
self.ssa_map.insert(*dst, entries);
fn add_copy(&mut self, dst: SSAValue, typ: CopyType, src: Src) {
match typ {
CopyType::Raw => assert!(src.src_mod.is_none()),
CopyType::Bits => assert!(src.src_mod.is_bitwise()),
CopyType::F32 | CopyType::F64H | CopyType::I32 => {
assert!(src.src_mod.is_alu())
}
}
let typ = match src.src_mod {
SrcMod::None => CopyType::Raw,
SrcMod::Abs | SrcMod::Neg | SrcMod::NegAbs => {
assert!(typ != CopyType::Raw && typ != CopyType::Bits);
typ
}
SrcMod::Not => {
assert!(typ == CopyType::Bits);
typ
}
};
if let Some(ssa) = src.src_ref.as_ssa() {
assert!(ssa.comps() == 1);
}
self.ssa_map.insert(dst, CopyEntry { typ: typ, src: src });
}
fn add_copy_entry(&mut self, dst: &SSAValue, entry: CopyEntry) {
self.ssa_map.insert(*dst, vec![entry]);
}
fn get_copy(&mut self, dst: &SSAValue) -> Option<&Vec<CopyEntry>> {
fn get_copy(&mut self, dst: &SSAValue) -> Option<&CopyEntry> {
self.ssa_map.get(dst)
}
fn prop_to_pred(&mut self, pred: &mut Pred, pred_inv: &mut bool) {
if let Pred::SSA(src_ssa) = pred {
if let Some(src_vec) = self.get_copy(&src_ssa) {
let entry = &src_vec[0];
if let Some(entry) = self.get_copy(&src_ssa) {
if !entry.supports_type(CopyType::Bits) {
return;
}
*pred = Pred::SSA(*entry.src.src_ref.as_ssa().unwrap());
let copy_ssa = entry.src.src_ref.as_ssa().unwrap();
assert!(copy_ssa.comps() == 1 && copy_ssa.is_predicate());
*pred = Pred::SSA(copy_ssa[0]);
if entry.src.src_mod.has_not() {
*pred_inv = !*pred_inv;
}
@ -100,43 +92,63 @@ impl CopyPropPass {
fn prop_to_src(&mut self, src: &mut Src, src_typ: CopyType) -> bool {
if let SrcRef::SSA(src_ssa) = src.src_ref {
if src_ssa.comps() != 1 {
return false; /* TODO */
let mut found_copy = false;
let mut copy_mod = src.src_mod;
let mut copy_vals = [SSAValue::NONE; 4];
for c in 0..src_ssa.comps() {
let c_val = &src_ssa[usize::from(c)];
if let Some(entry) = self.get_copy(c_val) {
let c_typ = match src_typ {
CopyType::Raw => CopyType::Raw,
CopyType::Bits | CopyType::F32 | CopyType::I32 => {
assert!(src_ssa.comps() == 1);
src_typ
}
CopyType::F64H => {
assert!(src_ssa.comps() == 2);
/* The low bits of a 64-bit value are read raw */
if c == 0 {
CopyType::Raw
} else {
CopyType::F64H
}
}
};
if !entry.supports_type(c_typ) {
return false;
}
if c_typ != CopyType::Raw {
assert!(c == src_ssa.comps() - 1);
copy_mod = entry.src.src_mod.modify(src.src_mod);
}
if let Some(e_ssa) = entry.src.as_ssa() {
assert!(e_ssa.comps() == 1);
found_copy = true;
copy_vals[usize::from(c)] = e_ssa[0];
} else if src_ssa.comps() == 1 {
src.src_mod = copy_mod;
src.src_ref = entry.src.src_ref;
return true;
} else {
return false;
}
} else {
copy_vals[usize::from(c)] = *c_val;
}
}
if let Some(src_vec) = self.get_copy(&src_ssa) {
let entry = &src_vec[0];
if !entry.supports_type(src_typ) {
return false;
}
let mut new_src = entry.src;
match src_typ {
CopyType::Raw => {
assert!(src.src_mod.is_none());
}
CopyType::Bits => {
if src.src_mod.has_neg() {
new_src.src_mod = new_src.src_mod.neg();
}
}
CopyType::F32 | CopyType::I32 => {
if src.src_mod.has_abs() {
new_src.src_mod = new_src.src_mod.abs();
}
if src.src_mod.has_neg() {
new_src.src_mod = new_src.src_mod.neg();
}
}
}
*src = new_src;
true
} else {
false
if found_copy {
let comps = usize::from(src_ssa.comps());
let copy_ssa = SSARef::try_from(&copy_vals[..comps]).unwrap();
src.src_mod = copy_mod;
src.src_ref = copy_ssa.into();
return true;
}
} else {
false
}
false
}
fn prop_to_srcs(&mut self, srcs: &mut [Src], src_typ: CopyType) -> bool {
@ -151,40 +163,51 @@ impl CopyPropPass {
for b in &mut f.blocks {
for instr in &mut b.instrs {
match &instr.op {
Op::Mov(mov) => {
let dst = mov.dst.as_ssa().unwrap();
assert!(dst.comps() == 1);
if mov.quad_lanes == 0xf {
self.add_copy(dst[0], CopyType::Raw, mov.src);
}
}
Op::FMov(mov) => {
let dst = mov.dst.as_ssa().unwrap();
assert!(dst.comps() == 1);
if !mov.saturate {
self.add_copy(
mov.dst.as_ssa().unwrap(),
CopyType::F32,
slice::from_ref(&mov.src),
);
self.add_copy(dst[0], CopyType::F32, mov.src);
}
}
Op::DMov(mov) => {
let dst = mov.dst.as_ssa().unwrap();
assert!(dst.comps() == 2);
if !mov.saturate {
if let Some(src) = mov.src.src_ref.as_ssa() {
self.add_copy(
dst[0],
CopyType::Bits,
src[0].into(),
);
self.add_copy(
dst[1],
CopyType::F64H,
Src {
src_ref: src[1].into(),
src_mod: mov.src.src_mod,
},
);
}
}
}
Op::IMov(mov) => {
self.add_copy(
mov.dst.as_ssa().unwrap(),
CopyType::I32,
slice::from_ref(&mov.src),
);
let dst = mov.dst.as_ssa().unwrap();
assert!(dst.comps() == 1);
self.add_copy(dst[0], CopyType::I32, mov.src);
}
Op::Vec(vec) => {
self.add_copy(
vec.dst.as_ssa().unwrap(),
CopyType::Raw,
&vec.srcs,
);
}
Op::Split(split) => {
assert!(split.src.src_mod.is_none());
let src_ssa = split.src.src_ref.as_ssa().unwrap();
if let Some(src_vec) = self.get_copy(src_ssa) {
let mut src_vec = src_vec.clone();
assert!(src_vec.len() == split.dsts.len());
for (i, entry) in src_vec.drain(..).enumerate() {
if let Dst::SSA(ssa) = &split.dsts[i] {
self.add_copy_entry(ssa, entry);
}
}
Op::ParCopy(pcopy) => {
for (src, dst) in pcopy.iter() {
let dst = dst.as_ssa().unwrap();
assert!(dst.comps() == 1);
self.add_copy(dst[0], CopyType::Raw, *src);
}
}
_ => (),

View file

@ -30,7 +30,9 @@ impl DeadCodePass {
fn mark_src_live(&mut self, src: &Src) {
if let SrcRef::SSA(ssa) = &src.src_ref {
self.mark_ssa_live(ssa);
for val in ssa.iter() {
self.mark_ssa_live(val);
}
}
}
@ -40,7 +42,14 @@ impl DeadCodePass {
fn is_dst_live(&self, dst: &Dst) -> bool {
match dst {
Dst::SSA(ssa) => self.live_ssa.get(ssa).is_some(),
Dst::SSA(ssa) => {
for val in ssa.iter() {
if self.live_ssa.get(val).is_some() {
return true;
}
}
false
}
Dst::None => false,
_ => panic!("Invalid SSA destination"),
}