nak/copy_prop: Don't propagate bindless cbufs into non-uniform blocks

We can propagate within a non-uniform block just fine but not across
them because that might change live registers in unpredictable ways.
The real boundary here is that we can't propagate across an OpPin but
that's a lot harder to express.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29591>
This commit is contained in:
Faith Ekstrand 2024-06-11 10:14:28 -05:00 committed by Marge Bot
parent 06fc2d018e
commit ab84cf11c7

View file

@ -5,12 +5,38 @@ use crate::ir::*;
use std::collections::HashMap;
enum CBufRule {
Yes,
No,
BindlessRequiresBlock(usize),
}
impl CBufRule {
fn allows_src(&self, src_bi: usize, src: &Src) -> bool {
let SrcRef::CBuf(cb) = &src.src_ref else {
return true;
};
match self {
CBufRule::Yes => true,
CBufRule::No => false,
CBufRule::BindlessRequiresBlock(bi) => match cb.buf {
CBuf::Binding(_) => true,
CBuf::BindlessSSA(_) => src_bi == *bi,
CBuf::BindlessUGPR(_) => panic!("Not in SSA form"),
},
}
}
}
struct CopyEntry {
bi: usize,
src_type: SrcType,
src: Src,
}
struct PrmtEntry {
bi: usize,
srcs: [Src; 2],
selection: u16,
}
@ -31,27 +57,45 @@ impl CopyPropPass {
}
}
fn add_copy(&mut self, dst: SSAValue, src_type: SrcType, src: Src) {
fn add_copy(
&mut self,
bi: usize,
dst: SSAValue,
src_type: SrcType,
src: Src,
) {
assert!(src.src_ref.get_reg().is_none());
self.ssa_map
.insert(dst, CopyPropEntry::Copy(CopyEntry { src_type, src }));
.insert(dst, CopyPropEntry::Copy(CopyEntry { bi, src_type, src }));
}
fn add_prmt(&mut self, dst: SSAValue, srcs: [Src; 2], selection: u16) {
fn add_prmt(
&mut self,
bi: usize,
dst: SSAValue,
srcs: [Src; 2],
selection: u16,
) {
assert!(
srcs[0].src_ref.get_reg().is_none()
&& srcs[1].src_ref.get_reg().is_none()
);
self.ssa_map
.insert(dst, CopyPropEntry::Prmt(PrmtEntry { srcs, selection }));
self.ssa_map.insert(
dst,
CopyPropEntry::Prmt(PrmtEntry {
bi,
srcs,
selection,
}),
);
}
fn add_fp64_copy(&mut self, dst: &SSARef, src: Src) {
fn add_fp64_copy(&mut self, bi: usize, dst: &SSARef, src: Src) {
assert!(dst.comps() == 2);
match src.src_ref {
SrcRef::Zero | SrcRef::Imm32(_) => {
self.add_copy(dst[0], SrcType::ALU, Src::new_zero());
self.add_copy(dst[1], SrcType::F64, src);
self.add_copy(bi, dst[0], SrcType::ALU, Src::new_zero());
self.add_copy(bi, dst[1], SrcType::F64, src);
}
SrcRef::CBuf(cb) => {
let lo32 = Src::from(SrcRef::CBuf(cb));
@ -60,8 +104,8 @@ impl CopyPropPass {
src_mod: src.src_mod,
src_swizzle: src.src_swizzle,
};
self.add_copy(dst[0], SrcType::ALU, lo32);
self.add_copy(dst[1], SrcType::F64, hi32);
self.add_copy(bi, dst[0], SrcType::ALU, lo32);
self.add_copy(bi, dst[1], SrcType::F64, hi32);
}
SrcRef::SSA(ssa) => {
assert!(ssa.comps() == 2);
@ -71,8 +115,8 @@ impl CopyPropPass {
src_mod: src.src_mod,
src_swizzle: src.src_swizzle,
};
self.add_copy(dst[0], SrcType::ALU, lo32);
self.add_copy(dst[1], SrcType::F64, hi32);
self.add_copy(bi, dst[0], SrcType::ALU, lo32);
self.add_copy(bi, dst[1], SrcType::F64, hi32);
}
_ => (),
}
@ -185,7 +229,7 @@ impl CopyPropPass {
fn prop_to_scalar_src(
&self,
src_type: SrcType,
allow_cbuf: bool,
cbuf_rule: &CBufRule,
src: &mut Src,
) {
loop {
@ -202,9 +246,7 @@ impl CopyPropPass {
match entry {
CopyPropEntry::Copy(entry) => {
if matches!(&entry.src.src_ref, SrcRef::CBuf(_))
&& !allow_cbuf
{
if !cbuf_rule.allows_src(entry.bi, &entry.src) {
return;
}
@ -259,9 +301,7 @@ impl CopyPropPass {
let entry_src_idx = usize::from(entry_src_idx.unwrap());
let entry_src = entry.srcs[entry_src_idx];
if matches!(&entry_src.src_ref, SrcRef::CBuf(_))
&& !allow_cbuf
{
if !cbuf_rule.allows_src(entry.bi, &entry_src) {
return;
}
@ -295,7 +335,7 @@ impl CopyPropPass {
}
}
fn prop_to_f64_src(&self, src: &mut Src) {
fn prop_to_f64_src(&self, cbuf_rule: &CBufRule, src: &mut Src) {
loop {
let src_ssa = match &mut src.src_ref {
SrcRef::SSA(ssa) => ssa,
@ -349,6 +389,12 @@ impl CopyPropPass {
return;
}
if !cbuf_rule.allows_src(hi_entry.bi, &hi_entry.src)
|| !cbuf_rule.allows_src(lo_entry.bi, &lo_entry.src)
{
return;
}
let new_src_ref = match hi_entry.src.src_ref {
SrcRef::Zero => match lo_entry.src.src_ref {
SrcRef::Zero | SrcRef::Imm32(0) => SrcRef::Zero,
@ -386,7 +432,12 @@ impl CopyPropPass {
}
}
fn prop_to_src(&self, src_type: SrcType, allow_cbuf: bool, src: &mut Src) {
fn prop_to_src(
&self,
src_type: SrcType,
cbuf_rule: &CBufRule,
src: &mut Src,
) {
match src_type {
SrcType::SSA => {
self.prop_to_ssa_src(src);
@ -401,17 +452,16 @@ impl CopyPropPass {
| SrcType::I32
| SrcType::B32
| SrcType::Pred => {
self.prop_to_scalar_src(src_type, allow_cbuf, src);
self.prop_to_scalar_src(src_type, cbuf_rule, src);
}
SrcType::F64 => {
debug_assert!(allow_cbuf);
self.prop_to_f64_src(src);
self.prop_to_f64_src(cbuf_rule, src);
}
SrcType::Bar => (),
}
}
fn try_add_instr(&mut self, instr: &Instr) {
fn try_add_instr(&mut self, bi: usize, instr: &Instr) {
match &instr.op {
Op::HAdd2(add) => {
let dst = add.dst.as_ssa().unwrap();
@ -420,9 +470,9 @@ impl CopyPropPass {
if !add.saturate {
if add.srcs[0].is_fneg_zero(SrcType::F16v2) {
self.add_copy(dst, SrcType::F16v2, add.srcs[1]);
self.add_copy(bi, dst, SrcType::F16v2, add.srcs[1]);
} else if add.srcs[1].is_fneg_zero(SrcType::F16v2) {
self.add_copy(dst, SrcType::F16v2, add.srcs[0]);
self.add_copy(bi, dst, SrcType::F16v2, add.srcs[0]);
}
}
}
@ -433,18 +483,18 @@ impl CopyPropPass {
if !add.saturate {
if add.srcs[0].is_fneg_zero(SrcType::F32) {
self.add_copy(dst, SrcType::F32, add.srcs[1]);
self.add_copy(bi, dst, SrcType::F32, add.srcs[1]);
} else if add.srcs[1].is_fneg_zero(SrcType::F32) {
self.add_copy(dst, SrcType::F32, add.srcs[0]);
self.add_copy(bi, dst, SrcType::F32, add.srcs[0]);
}
}
}
Op::DAdd(add) => {
let dst = add.dst.as_ssa().unwrap();
if add.srcs[0].is_fneg_zero(SrcType::F64) {
self.add_fp64_copy(dst, add.srcs[1]);
self.add_fp64_copy(bi, dst, add.srcs[1]);
} else if add.srcs[1].is_fneg_zero(SrcType::F64) {
self.add_fp64_copy(dst, add.srcs[0]);
self.add_fp64_copy(bi, dst, add.srcs[0]);
}
}
Op::Lop3(lop) => {
@ -454,9 +504,10 @@ impl CopyPropPass {
let op = lop.op;
if op.lut == 0 {
self.add_copy(dst, SrcType::ALU, SrcRef::Zero.into());
self.add_copy(bi, dst, SrcType::ALU, SrcRef::Zero.into());
} else if op.lut == !0 {
self.add_copy(
bi,
dst,
SrcType::ALU,
SrcRef::Imm32(u32::MAX).into(),
@ -464,7 +515,7 @@ impl CopyPropPass {
} else {
for s in 0..3 {
if op.lut == LogicOp3::SRC_MASKS[s] {
self.add_copy(dst, SrcType::ALU, lop.srcs[s]);
self.add_copy(bi, dst, SrcType::ALU, lop.srcs[s]);
}
}
}
@ -481,15 +532,31 @@ impl CopyPropPass {
let op = lop.ops[i];
if op.lut == 0 {
self.add_copy(dst, SrcType::Pred, SrcRef::False.into());
self.add_copy(
bi,
dst,
SrcType::Pred,
SrcRef::False.into(),
);
} else if op.lut == !0 {
self.add_copy(dst, SrcType::Pred, SrcRef::True.into());
self.add_copy(
bi,
dst,
SrcType::Pred,
SrcRef::True.into(),
);
} else {
for s in 0..3 {
if op.lut == LogicOp3::SRC_MASKS[s] {
self.add_copy(dst, SrcType::Pred, lop.srcs[s]);
self.add_copy(
bi,
dst,
SrcType::Pred,
lop.srcs[s],
);
} else if op.lut == !LogicOp3::SRC_MASKS[s] {
self.add_copy(
bi,
dst,
SrcType::Pred,
lop.srcs[s].bnot(),
@ -506,9 +573,9 @@ impl CopyPropPass {
if add.carry_in.is_zero() {
if add.srcs[0].is_zero() {
self.add_copy(dst, SrcType::I32, add.srcs[1]);
self.add_copy(bi, dst, SrcType::I32, add.srcs[1]);
} else if add.srcs[1].is_zero() {
self.add_copy(dst, SrcType::I32, add.srcs[0]);
self.add_copy(bi, dst, SrcType::I32, add.srcs[0]);
}
}
}
@ -519,12 +586,12 @@ impl CopyPropPass {
if add.srcs[0].is_zero() {
if add.srcs[1].is_zero() {
self.add_copy(dst, SrcType::I32, add.srcs[2]);
self.add_copy(bi, dst, SrcType::I32, add.srcs[2]);
} else if add.srcs[2].is_zero() {
self.add_copy(dst, SrcType::I32, add.srcs[1]);
self.add_copy(bi, dst, SrcType::I32, add.srcs[1]);
}
} else if add.srcs[1].is_zero() && add.srcs[2].is_zero() {
self.add_copy(dst, SrcType::I32, add.srcs[0]);
self.add_copy(bi, dst, SrcType::I32, add.srcs[0]);
}
}
Op::Prmt(prmt) => {
@ -538,9 +605,9 @@ impl CopyPropPass {
};
if sel == 0x3210 {
self.add_copy(dst[0], SrcType::GPR, prmt.srcs[0]);
self.add_copy(bi, dst[0], SrcType::GPR, prmt.srcs[0]);
} else if sel == 0x7654 {
self.add_copy(dst[0], SrcType::GPR, prmt.srcs[1]);
self.add_copy(bi, dst[0], SrcType::GPR, prmt.srcs[1]);
} else {
let mut is_imm = true;
let mut imm = 0_u32;
@ -559,9 +626,10 @@ impl CopyPropPass {
}
}
if is_imm {
self.add_copy(dst[0], SrcType::GPR, imm.into());
self.add_copy(bi, dst[0], SrcType::GPR, imm.into());
} else {
self.add_prmt(
bi,
dst[0],
prmt.srcs,
sel.try_into().unwrap(),
@ -574,19 +642,19 @@ impl CopyPropPass {
if r2ur.src.is_uniform() {
let dst = r2ur.dst.as_ssa().unwrap();
assert!(dst.comps() == 1);
self.add_copy(dst[0], SrcType::GPR, r2ur.src);
self.add_copy(bi, dst[0], SrcType::GPR, r2ur.src);
}
}
Op::Copy(copy) => {
let dst = copy.dst.as_ssa().unwrap();
assert!(dst.comps() == 1);
self.add_copy(dst[0], SrcType::GPR, copy.src);
self.add_copy(bi, dst[0], SrcType::GPR, copy.src);
}
Op::ParCopy(pcopy) => {
for (dst, src) in pcopy.dsts_srcs.iter() {
let dst = dst.as_ssa().unwrap();
assert!(dst.comps() == 1);
self.add_copy(dst[0], SrcType::GPR, *src);
self.add_copy(bi, dst[0], SrcType::GPR, *src);
}
}
_ => (),
@ -594,25 +662,33 @@ impl CopyPropPass {
}
pub fn run(&mut self, f: &mut Function) {
for b in &mut f.blocks {
for (bi, b) in f.blocks.iter_mut().enumerate() {
let b_uniform = b.uniform;
for instr in &mut b.instrs {
self.try_add_instr(instr);
self.try_add_instr(bi, instr);
self.prop_to_pred(&mut instr.pred);
let allow_cbuf = !instr.is_uniform();
let cbuf_rule = if instr.is_uniform() {
CBufRule::No
} else if !b_uniform {
CBufRule::BindlessRequiresBlock(bi)
} else {
CBufRule::Yes
};
match &mut instr.op {
Op::IAdd2(add) => {
// Carry-out interacts funny with SrcMod::INeg so we can
// only propagate with modifiers if no carry is written.
use SrcType::{ALU, I32};
let [src0, src1] = &mut add.srcs;
if add.carry_out.is_none() {
self.prop_to_src(I32, allow_cbuf, &mut add.srcs[0]);
self.prop_to_src(I32, allow_cbuf, &mut add.srcs[1]);
self.prop_to_src(I32, &cbuf_rule, src0);
self.prop_to_src(I32, &cbuf_rule, src1);
} else {
self.prop_to_src(ALU, allow_cbuf, &mut add.srcs[0]);
self.prop_to_src(ALU, allow_cbuf, &mut add.srcs[1]);
self.prop_to_src(ALU, &cbuf_rule, src0);
self.prop_to_src(ALU, &cbuf_rule, src1);
}
}
Op::IAdd3(add) => {
@ -620,23 +696,24 @@ impl CopyPropPass {
// only propagate with modifiers if no overflow values
// are written.
use SrcType::{ALU, I32};
let [src0, src1, src2] = &mut add.srcs;
if add.overflow[0].is_none()
&& add.overflow[0].is_none()
{
self.prop_to_src(I32, allow_cbuf, &mut add.srcs[0]);
self.prop_to_src(I32, allow_cbuf, &mut add.srcs[1]);
self.prop_to_src(I32, allow_cbuf, &mut add.srcs[2]);
self.prop_to_src(I32, &cbuf_rule, src0);
self.prop_to_src(I32, &cbuf_rule, src1);
self.prop_to_src(I32, &cbuf_rule, src2);
} else {
self.prop_to_src(ALU, allow_cbuf, &mut add.srcs[0]);
self.prop_to_src(ALU, allow_cbuf, &mut add.srcs[1]);
self.prop_to_src(ALU, allow_cbuf, &mut add.srcs[2]);
self.prop_to_src(ALU, &cbuf_rule, src0);
self.prop_to_src(ALU, &cbuf_rule, src1);
self.prop_to_src(ALU, &cbuf_rule, src2);
}
}
_ => {
let src_types = instr.src_types();
for (i, src) in instr.srcs_mut().iter_mut().enumerate()
{
self.prop_to_src(src_types[i], allow_cbuf, src);
self.prop_to_src(src_types[i], &cbuf_rule, src);
}
}
}