diff --git a/src/nouveau/compiler/nak/opt_copy_prop.rs b/src/nouveau/compiler/nak/opt_copy_prop.rs index 645aa6819fc..9fd4ae72270 100644 --- a/src/nouveau/compiler/nak/opt_copy_prop.rs +++ b/src/nouveau/compiler/nak/opt_copy_prop.rs @@ -5,12 +5,38 @@ use crate::ir::*; use std::collections::HashMap; +enum CBufRule { + Yes, + No, + BindlessRequiresBlock(usize), +} + +impl CBufRule { + fn allows_src(&self, src_bi: usize, src: &Src) -> bool { + let SrcRef::CBuf(cb) = &src.src_ref else { + return true; + }; + + match self { + CBufRule::Yes => true, + CBufRule::No => false, + CBufRule::BindlessRequiresBlock(bi) => match cb.buf { + CBuf::Binding(_) => true, + CBuf::BindlessSSA(_) => src_bi == *bi, + CBuf::BindlessUGPR(_) => panic!("Not in SSA form"), + }, + } + } +} + struct CopyEntry { + bi: usize, src_type: SrcType, src: Src, } struct PrmtEntry { + bi: usize, srcs: [Src; 2], selection: u16, } @@ -31,27 +57,45 @@ impl CopyPropPass { } } - fn add_copy(&mut self, dst: SSAValue, src_type: SrcType, src: Src) { + fn add_copy( + &mut self, + bi: usize, + dst: SSAValue, + src_type: SrcType, + src: Src, + ) { assert!(src.src_ref.get_reg().is_none()); self.ssa_map - .insert(dst, CopyPropEntry::Copy(CopyEntry { src_type, src })); + .insert(dst, CopyPropEntry::Copy(CopyEntry { bi, src_type, src })); } - fn add_prmt(&mut self, dst: SSAValue, srcs: [Src; 2], selection: u16) { + fn add_prmt( + &mut self, + bi: usize, + dst: SSAValue, + srcs: [Src; 2], + selection: u16, + ) { assert!( srcs[0].src_ref.get_reg().is_none() && srcs[1].src_ref.get_reg().is_none() ); - self.ssa_map - .insert(dst, CopyPropEntry::Prmt(PrmtEntry { srcs, selection })); + self.ssa_map.insert( + dst, + CopyPropEntry::Prmt(PrmtEntry { + bi, + srcs, + selection, + }), + ); } - fn add_fp64_copy(&mut self, dst: &SSARef, src: Src) { + fn add_fp64_copy(&mut self, bi: usize, dst: &SSARef, src: Src) { assert!(dst.comps() == 2); match src.src_ref { SrcRef::Zero | SrcRef::Imm32(_) => { - self.add_copy(dst[0], SrcType::ALU, Src::new_zero()); - self.add_copy(dst[1], SrcType::F64, src); + self.add_copy(bi, dst[0], SrcType::ALU, Src::new_zero()); + self.add_copy(bi, dst[1], SrcType::F64, src); } SrcRef::CBuf(cb) => { let lo32 = Src::from(SrcRef::CBuf(cb)); @@ -60,8 +104,8 @@ impl CopyPropPass { src_mod: src.src_mod, src_swizzle: src.src_swizzle, }; - self.add_copy(dst[0], SrcType::ALU, lo32); - self.add_copy(dst[1], SrcType::F64, hi32); + self.add_copy(bi, dst[0], SrcType::ALU, lo32); + self.add_copy(bi, dst[1], SrcType::F64, hi32); } SrcRef::SSA(ssa) => { assert!(ssa.comps() == 2); @@ -71,8 +115,8 @@ impl CopyPropPass { src_mod: src.src_mod, src_swizzle: src.src_swizzle, }; - self.add_copy(dst[0], SrcType::ALU, lo32); - self.add_copy(dst[1], SrcType::F64, hi32); + self.add_copy(bi, dst[0], SrcType::ALU, lo32); + self.add_copy(bi, dst[1], SrcType::F64, hi32); } _ => (), } @@ -185,7 +229,7 @@ impl CopyPropPass { fn prop_to_scalar_src( &self, src_type: SrcType, - allow_cbuf: bool, + cbuf_rule: &CBufRule, src: &mut Src, ) { loop { @@ -202,9 +246,7 @@ impl CopyPropPass { match entry { CopyPropEntry::Copy(entry) => { - if matches!(&entry.src.src_ref, SrcRef::CBuf(_)) - && !allow_cbuf - { + if !cbuf_rule.allows_src(entry.bi, &entry.src) { return; } @@ -259,9 +301,7 @@ impl CopyPropPass { let entry_src_idx = usize::from(entry_src_idx.unwrap()); let entry_src = entry.srcs[entry_src_idx]; - if matches!(&entry_src.src_ref, SrcRef::CBuf(_)) - && !allow_cbuf - { + if !cbuf_rule.allows_src(entry.bi, &entry_src) { return; } @@ -295,7 +335,7 @@ impl CopyPropPass { } } - fn prop_to_f64_src(&self, src: &mut Src) { + fn prop_to_f64_src(&self, cbuf_rule: &CBufRule, src: &mut Src) { loop { let src_ssa = match &mut src.src_ref { SrcRef::SSA(ssa) => ssa, @@ -349,6 +389,12 @@ impl CopyPropPass { return; } + if !cbuf_rule.allows_src(hi_entry.bi, &hi_entry.src) + || !cbuf_rule.allows_src(lo_entry.bi, &lo_entry.src) + { + return; + } + let new_src_ref = match hi_entry.src.src_ref { SrcRef::Zero => match lo_entry.src.src_ref { SrcRef::Zero | SrcRef::Imm32(0) => SrcRef::Zero, @@ -386,7 +432,12 @@ impl CopyPropPass { } } - fn prop_to_src(&self, src_type: SrcType, allow_cbuf: bool, src: &mut Src) { + fn prop_to_src( + &self, + src_type: SrcType, + cbuf_rule: &CBufRule, + src: &mut Src, + ) { match src_type { SrcType::SSA => { self.prop_to_ssa_src(src); @@ -401,17 +452,16 @@ impl CopyPropPass { | SrcType::I32 | SrcType::B32 | SrcType::Pred => { - self.prop_to_scalar_src(src_type, allow_cbuf, src); + self.prop_to_scalar_src(src_type, cbuf_rule, src); } SrcType::F64 => { - debug_assert!(allow_cbuf); - self.prop_to_f64_src(src); + self.prop_to_f64_src(cbuf_rule, src); } SrcType::Bar => (), } } - fn try_add_instr(&mut self, instr: &Instr) { + fn try_add_instr(&mut self, bi: usize, instr: &Instr) { match &instr.op { Op::HAdd2(add) => { let dst = add.dst.as_ssa().unwrap(); @@ -420,9 +470,9 @@ impl CopyPropPass { if !add.saturate { if add.srcs[0].is_fneg_zero(SrcType::F16v2) { - self.add_copy(dst, SrcType::F16v2, add.srcs[1]); + self.add_copy(bi, dst, SrcType::F16v2, add.srcs[1]); } else if add.srcs[1].is_fneg_zero(SrcType::F16v2) { - self.add_copy(dst, SrcType::F16v2, add.srcs[0]); + self.add_copy(bi, dst, SrcType::F16v2, add.srcs[0]); } } } @@ -433,18 +483,18 @@ impl CopyPropPass { if !add.saturate { if add.srcs[0].is_fneg_zero(SrcType::F32) { - self.add_copy(dst, SrcType::F32, add.srcs[1]); + self.add_copy(bi, dst, SrcType::F32, add.srcs[1]); } else if add.srcs[1].is_fneg_zero(SrcType::F32) { - self.add_copy(dst, SrcType::F32, add.srcs[0]); + self.add_copy(bi, dst, SrcType::F32, add.srcs[0]); } } } Op::DAdd(add) => { let dst = add.dst.as_ssa().unwrap(); if add.srcs[0].is_fneg_zero(SrcType::F64) { - self.add_fp64_copy(dst, add.srcs[1]); + self.add_fp64_copy(bi, dst, add.srcs[1]); } else if add.srcs[1].is_fneg_zero(SrcType::F64) { - self.add_fp64_copy(dst, add.srcs[0]); + self.add_fp64_copy(bi, dst, add.srcs[0]); } } Op::Lop3(lop) => { @@ -454,9 +504,10 @@ impl CopyPropPass { let op = lop.op; if op.lut == 0 { - self.add_copy(dst, SrcType::ALU, SrcRef::Zero.into()); + self.add_copy(bi, dst, SrcType::ALU, SrcRef::Zero.into()); } else if op.lut == !0 { self.add_copy( + bi, dst, SrcType::ALU, SrcRef::Imm32(u32::MAX).into(), @@ -464,7 +515,7 @@ impl CopyPropPass { } else { for s in 0..3 { if op.lut == LogicOp3::SRC_MASKS[s] { - self.add_copy(dst, SrcType::ALU, lop.srcs[s]); + self.add_copy(bi, dst, SrcType::ALU, lop.srcs[s]); } } } @@ -481,15 +532,31 @@ impl CopyPropPass { let op = lop.ops[i]; if op.lut == 0 { - self.add_copy(dst, SrcType::Pred, SrcRef::False.into()); + self.add_copy( + bi, + dst, + SrcType::Pred, + SrcRef::False.into(), + ); } else if op.lut == !0 { - self.add_copy(dst, SrcType::Pred, SrcRef::True.into()); + self.add_copy( + bi, + dst, + SrcType::Pred, + SrcRef::True.into(), + ); } else { for s in 0..3 { if op.lut == LogicOp3::SRC_MASKS[s] { - self.add_copy(dst, SrcType::Pred, lop.srcs[s]); + self.add_copy( + bi, + dst, + SrcType::Pred, + lop.srcs[s], + ); } else if op.lut == !LogicOp3::SRC_MASKS[s] { self.add_copy( + bi, dst, SrcType::Pred, lop.srcs[s].bnot(), @@ -506,9 +573,9 @@ impl CopyPropPass { if add.carry_in.is_zero() { if add.srcs[0].is_zero() { - self.add_copy(dst, SrcType::I32, add.srcs[1]); + self.add_copy(bi, dst, SrcType::I32, add.srcs[1]); } else if add.srcs[1].is_zero() { - self.add_copy(dst, SrcType::I32, add.srcs[0]); + self.add_copy(bi, dst, SrcType::I32, add.srcs[0]); } } } @@ -519,12 +586,12 @@ impl CopyPropPass { if add.srcs[0].is_zero() { if add.srcs[1].is_zero() { - self.add_copy(dst, SrcType::I32, add.srcs[2]); + self.add_copy(bi, dst, SrcType::I32, add.srcs[2]); } else if add.srcs[2].is_zero() { - self.add_copy(dst, SrcType::I32, add.srcs[1]); + self.add_copy(bi, dst, SrcType::I32, add.srcs[1]); } } else if add.srcs[1].is_zero() && add.srcs[2].is_zero() { - self.add_copy(dst, SrcType::I32, add.srcs[0]); + self.add_copy(bi, dst, SrcType::I32, add.srcs[0]); } } Op::Prmt(prmt) => { @@ -538,9 +605,9 @@ impl CopyPropPass { }; if sel == 0x3210 { - self.add_copy(dst[0], SrcType::GPR, prmt.srcs[0]); + self.add_copy(bi, dst[0], SrcType::GPR, prmt.srcs[0]); } else if sel == 0x7654 { - self.add_copy(dst[0], SrcType::GPR, prmt.srcs[1]); + self.add_copy(bi, dst[0], SrcType::GPR, prmt.srcs[1]); } else { let mut is_imm = true; let mut imm = 0_u32; @@ -559,9 +626,10 @@ impl CopyPropPass { } } if is_imm { - self.add_copy(dst[0], SrcType::GPR, imm.into()); + self.add_copy(bi, dst[0], SrcType::GPR, imm.into()); } else { self.add_prmt( + bi, dst[0], prmt.srcs, sel.try_into().unwrap(), @@ -574,19 +642,19 @@ impl CopyPropPass { if r2ur.src.is_uniform() { let dst = r2ur.dst.as_ssa().unwrap(); assert!(dst.comps() == 1); - self.add_copy(dst[0], SrcType::GPR, r2ur.src); + self.add_copy(bi, dst[0], SrcType::GPR, r2ur.src); } } Op::Copy(copy) => { let dst = copy.dst.as_ssa().unwrap(); assert!(dst.comps() == 1); - self.add_copy(dst[0], SrcType::GPR, copy.src); + self.add_copy(bi, dst[0], SrcType::GPR, copy.src); } Op::ParCopy(pcopy) => { for (dst, src) in pcopy.dsts_srcs.iter() { let dst = dst.as_ssa().unwrap(); assert!(dst.comps() == 1); - self.add_copy(dst[0], SrcType::GPR, *src); + self.add_copy(bi, dst[0], SrcType::GPR, *src); } } _ => (), @@ -594,25 +662,33 @@ impl CopyPropPass { } pub fn run(&mut self, f: &mut Function) { - for b in &mut f.blocks { + for (bi, b) in f.blocks.iter_mut().enumerate() { + let b_uniform = b.uniform; for instr in &mut b.instrs { - self.try_add_instr(instr); + self.try_add_instr(bi, instr); self.prop_to_pred(&mut instr.pred); - let allow_cbuf = !instr.is_uniform(); + let cbuf_rule = if instr.is_uniform() { + CBufRule::No + } else if !b_uniform { + CBufRule::BindlessRequiresBlock(bi) + } else { + CBufRule::Yes + }; match &mut instr.op { Op::IAdd2(add) => { // Carry-out interacts funny with SrcMod::INeg so we can // only propagate with modifiers if no carry is written. use SrcType::{ALU, I32}; + let [src0, src1] = &mut add.srcs; if add.carry_out.is_none() { - self.prop_to_src(I32, allow_cbuf, &mut add.srcs[0]); - self.prop_to_src(I32, allow_cbuf, &mut add.srcs[1]); + self.prop_to_src(I32, &cbuf_rule, src0); + self.prop_to_src(I32, &cbuf_rule, src1); } else { - self.prop_to_src(ALU, allow_cbuf, &mut add.srcs[0]); - self.prop_to_src(ALU, allow_cbuf, &mut add.srcs[1]); + self.prop_to_src(ALU, &cbuf_rule, src0); + self.prop_to_src(ALU, &cbuf_rule, src1); } } Op::IAdd3(add) => { @@ -620,23 +696,24 @@ impl CopyPropPass { // only propagate with modifiers if no overflow values // are written. use SrcType::{ALU, I32}; + let [src0, src1, src2] = &mut add.srcs; if add.overflow[0].is_none() && add.overflow[0].is_none() { - self.prop_to_src(I32, allow_cbuf, &mut add.srcs[0]); - self.prop_to_src(I32, allow_cbuf, &mut add.srcs[1]); - self.prop_to_src(I32, allow_cbuf, &mut add.srcs[2]); + self.prop_to_src(I32, &cbuf_rule, src0); + self.prop_to_src(I32, &cbuf_rule, src1); + self.prop_to_src(I32, &cbuf_rule, src2); } else { - self.prop_to_src(ALU, allow_cbuf, &mut add.srcs[0]); - self.prop_to_src(ALU, allow_cbuf, &mut add.srcs[1]); - self.prop_to_src(ALU, allow_cbuf, &mut add.srcs[2]); + self.prop_to_src(ALU, &cbuf_rule, src0); + self.prop_to_src(ALU, &cbuf_rule, src1); + self.prop_to_src(ALU, &cbuf_rule, src2); } } _ => { let src_types = instr.src_types(); for (i, src) in instr.srcs_mut().iter_mut().enumerate() { - self.prop_to_src(src_types[i], allow_cbuf, src); + self.prop_to_src(src_types[i], &cbuf_rule, src); } } }