nak: CBuf and SSARef are no longer Copy

Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34794>
This commit is contained in:
Mel Henning 2025-04-30 20:35:23 -04:00 committed by Marge Bot
parent 68069fb810
commit dee3a0aa58
17 changed files with 140 additions and 129 deletions

View file

@ -92,14 +92,14 @@ impl SSAUseMap {
v.push((ip, SSAUse::FixedReg(reg)));
}
fn add_vec_use(&mut self, ip: usize, vec: SSARef) {
fn add_vec_use(&mut self, ip: usize, vec: &SSARef) {
if vec.comps() == 1 {
return;
}
for ssa in vec.iter() {
let v = self.ssa_map.entry(*ssa).or_default();
v.push((ip, SSAUse::Vec(vec)));
v.push((ip, SSAUse::Vec(vec.clone())));
}
}
@ -133,7 +133,7 @@ impl SSAUseMap {
// We don't care about predicates because they're scalar
for src in instr.srcs() {
if let Some(ssa) = src_ssa_ref(src) {
self.add_vec_use(ip, *ssa);
self.add_vec_use(ip, ssa);
}
}
}
@ -530,7 +530,7 @@ impl<'a> VecRegAllocator<'a> {
RegRef::new(self.file(), reg, 1)
}
pub fn assign_pin_vec_reg(&mut self, vec: SSARef, reg: u32) -> RegRef {
pub fn assign_pin_vec_reg(&mut self, vec: &SSARef, reg: u32) -> RegRef {
for c in 0..vec.comps() {
let ssa = vec[usize::from(c)];
self.assign_pin_reg(ssa, reg + u32::from(c));
@ -675,12 +675,12 @@ impl<'a> VecRegAllocator<'a> {
RegRef::new(self.file(), reg, comps)
}
pub fn alloc_vector(&mut self, vec: SSARef) -> RegRef {
pub fn alloc_vector(&mut self, vec: &SSARef) -> RegRef {
let comps = vec.comps();
let align = u32::from(comps).next_power_of_two();
if let Some(reg) = self.ra.try_find_unused_reg_range(0, align, comps) {
return self.assign_pin_vec_reg(vec, reg);
return self.assign_pin_vec_reg(&vec, reg);
}
let reg = self
@ -690,7 +690,7 @@ impl<'a> VecRegAllocator<'a> {
for c in 0..comps {
self.evict_reg_if_used(reg + u32::from(c));
}
self.assign_pin_vec_reg(vec, reg)
self.assign_pin_vec_reg(&vec, reg)
}
pub fn free_killed(&mut self, killed: &KillSet) {
@ -817,7 +817,7 @@ fn instr_assign_regs_file(
for ssa in vec.iter() {
avail.remove(ssa);
}
killed_vecs.push(*vec);
killed_vecs.push(vec.clone());
}
}
}
@ -865,7 +865,7 @@ fn instr_assign_regs_file(
for vec_dst in vec_dsts {
let dst = &mut instr.dsts_mut()[vec_dst.dst_idx];
*dst = vra
.assign_pin_vec_reg(*dst.as_ssa().unwrap(), vec_dst.reg)
.assign_pin_vec_reg(dst.as_ssa().unwrap(), vec_dst.reg)
.into();
}
@ -877,7 +877,7 @@ fn instr_assign_regs_file(
for vec_dst in vec_dsts {
let dst = &mut instr.dsts_mut()[vec_dst.dst_idx];
*dst = vra
.assign_pin_vec_reg(*dst.as_ssa().unwrap(), vec_dst.reg)
.assign_pin_vec_reg(dst.as_ssa().unwrap(), vec_dst.reg)
.into();
}
@ -894,7 +894,7 @@ fn instr_assign_regs_file(
for dst in instr.dsts_mut() {
if let Dst::SSA(ssa) = dst {
if ssa.file().unwrap() == vra.file() && ssa.comps() > 1 {
*dst = vra.alloc_vector(*ssa).into();
*dst = vra.alloc_vector(ssa).into();
}
}
}
@ -1008,7 +1008,7 @@ impl AssignRegsBlock {
) -> Option<Box<Instr>> {
match &mut instr.op {
Op::Undef(undef) => {
if let Dst::SSA(ssa) = undef.dst {
if let Dst::SSA(ssa) = &undef.dst {
assert!(ssa.comps() == 1);
self.alloc_scalar(ip, sum, phi_webs, ssa[0]);
}
@ -1164,7 +1164,7 @@ impl AssignRegsBlock {
let dst_ra = &mut self.ra[dst_vec.file().unwrap()];
let mut vra = VecRegAllocator::new(dst_ra);
let dst_reg = vra.alloc_vector(*dst_vec);
let dst_reg = vra.alloc_vector(dst_vec);
vra.finish(pcopy);
let mut pin_copy = OpParCopy::new();

View file

@ -23,7 +23,7 @@ pub trait Builder {
}
fn lop2_to(&mut self, dst: Dst, op: LogicOp2, mut x: Src, mut y: Src) {
let is_predicate = match dst {
let is_predicate = match &dst {
Dst::None => panic!("No LOP destination"),
Dst::SSA(ssa) => ssa.is_predicate(),
Dst::Reg(reg) => reg.is_predicate(),
@ -462,7 +462,7 @@ pub trait SSABuilder: Builder {
let dst = self.alloc_ssa_vec(RegFile::GPR, 2);
if self.sm() >= 70 {
self.push_op(OpIMad64 {
dst: dst.into(),
dst: dst.clone().into(),
srcs: [x, y, 0.into()],
signed,
});

View file

@ -813,7 +813,7 @@ impl<'a> ShaderFromNir<'a> {
let dst = b.alloc_ssa_vec(RegFile::GPR, dst_bits.div_ceil(32));
b.push_op(OpF2F {
dst: dst.into(),
dst: dst.clone().into(),
src: srcs(0),
src_type: FloatType::from_bits(src_bits.into()),
dst_type: dst_type,
@ -865,7 +865,7 @@ impl<'a> ShaderFromNir<'a> {
ftz: self.float_ctl[src_type].ftz,
});
b.push_op(OpI2I {
dst: dst.into(),
dst: dst.clone().into(),
src: tmp.into(),
src_type: tmp_type,
dst_type,
@ -875,7 +875,7 @@ impl<'a> ShaderFromNir<'a> {
});
} else {
b.push_op(OpF2I {
dst: dst.into(),
dst: dst.clone().into(),
src: srcs(0),
src_type,
dst_type,
@ -897,14 +897,14 @@ impl<'a> ShaderFromNir<'a> {
if alu.def.bit_size() == 64 {
dst = b.alloc_ssa_vec(RegFile::GPR, 2);
b.push_op(OpDAdd {
dst: dst.into(),
dst: dst.clone().into(),
srcs: [x, y],
rnd_mode: self.float_ctl[ftype].rnd_mode,
});
} else if alu.def.bit_size() == 32 {
dst = b.alloc_ssa_vec(RegFile::GPR, 1);
b.push_op(OpFAdd {
dst: dst.into(),
dst: dst.clone().into(),
srcs: [x, y],
saturate: self.try_saturate_alu_dst(&alu.def),
rnd_mode: self.float_ctl[ftype].rnd_mode,
@ -917,7 +917,7 @@ impl<'a> ShaderFromNir<'a> {
dst = b.alloc_ssa_vec(RegFile::GPR, 1);
b.push_op(OpHAdd2 {
dst: dst.into(),
dst: dst.clone().into(),
srcs: [restrict_f16v2_src(x), restrict_f16v2_src(y)],
saturate: self.try_saturate_alu_dst(&alu.def),
ftz: self.float_ctl[ftype].ftz,
@ -984,7 +984,7 @@ impl<'a> ShaderFromNir<'a> {
if alu.get_src(0).bit_size() == 64 {
assert!(alu.def.num_components == 1);
b.push_op(OpDSetP {
dst: dst.into(),
dst: dst.clone().into(),
set_op: PredSetOp::And,
cmp_op: cmp_op,
srcs: [srcs(0), srcs(1)],
@ -993,7 +993,7 @@ impl<'a> ShaderFromNir<'a> {
} else if alu.get_src(0).bit_size() == 32 {
assert!(alu.def.num_components == 1);
b.push_op(OpFSetP {
dst: dst.into(),
dst: dst.clone().into(),
set_op: PredSetOp::And,
cmp_op: cmp_op,
srcs: [srcs(0), srcs(1)],
@ -1040,14 +1040,14 @@ impl<'a> ShaderFromNir<'a> {
debug_assert!(!self.float_ctl[ftype].ftz);
dst = b.alloc_ssa_vec(RegFile::GPR, 2);
b.push_op(OpDFma {
dst: dst.into(),
dst: dst.clone().into(),
srcs: [srcs(0), srcs(1), srcs(2)],
rnd_mode: self.float_ctl[ftype].rnd_mode,
});
} else if alu.def.bit_size() == 32 {
dst = b.alloc_ssa_vec(RegFile::GPR, 1);
b.push_op(OpFFma {
dst: dst.into(),
dst: dst.clone().into(),
srcs: [srcs(0), srcs(1), srcs(2)],
saturate: self.try_saturate_alu_dst(&alu.def),
rnd_mode: self.float_ctl[ftype].rnd_mode,
@ -1063,7 +1063,7 @@ impl<'a> ShaderFromNir<'a> {
dst = b.alloc_ssa_vec(RegFile::GPR, 1);
b.push_op(OpHFma2 {
dst: dst.into(),
dst: dst.clone().into(),
srcs: [
restrict_f16v2_src(srcs(0)),
restrict_f16v2_src(srcs(1)),
@ -1105,14 +1105,14 @@ impl<'a> ShaderFromNir<'a> {
if alu.def.bit_size() == 64 {
dst = b.alloc_ssa_vec(RegFile::GPR, 2);
b.push_op(OpDMnMx {
dst: dst.into(),
dst: dst.clone().into(),
srcs: [srcs(0), srcs(1)],
min: (alu.op == nir_op_fmin).into(),
});
} else if alu.def.bit_size() == 32 {
dst = b.alloc_ssa_vec(RegFile::GPR, 1);
b.push_op(OpFMnMx {
dst: dst.into(),
dst: dst.clone().into(),
srcs: [srcs(0), srcs(1)],
min: (alu.op == nir_op_fmin).into(),
ftz: self.float_ctl.fp32.ftz,
@ -1120,7 +1120,7 @@ impl<'a> ShaderFromNir<'a> {
} else if alu.def.bit_size() == 16 {
dst = b.alloc_ssa_vec(RegFile::GPR, 1);
b.push_op(OpHMnMx2 {
dst: dst.into(),
dst: dst.clone().into(),
srcs: [
restrict_f16v2_src(srcs(0)),
restrict_f16v2_src(srcs(1)),
@ -1140,14 +1140,14 @@ impl<'a> ShaderFromNir<'a> {
debug_assert!(!self.float_ctl[ftype].ftz);
dst = b.alloc_ssa_vec(RegFile::GPR, 2);
b.push_op(OpDMul {
dst: dst.into(),
dst: dst.clone().into(),
srcs: [srcs(0), srcs(1)],
rnd_mode: self.float_ctl[ftype].rnd_mode,
});
} else if alu.def.bit_size() == 32 {
dst = b.alloc_ssa_vec(RegFile::GPR, 1);
b.push_op(OpFMul {
dst: dst.into(),
dst: dst.clone().into(),
srcs: [srcs(0), srcs(1)],
saturate: self.try_saturate_alu_dst(&alu.def),
rnd_mode: self.float_ctl[ftype].rnd_mode,
@ -1161,7 +1161,7 @@ impl<'a> ShaderFromNir<'a> {
dst = b.alloc_ssa_vec(RegFile::GPR, 1);
b.push_op(OpHMul2 {
dst: dst.into(),
dst: dst.clone().into(),
srcs: [
restrict_f16v2_src(srcs(0)),
restrict_f16v2_src(srcs(1)),
@ -1310,7 +1310,7 @@ impl<'a> ShaderFromNir<'a> {
let dst_type = FloatType::from_bits(dst_bits.into());
let dst = b.alloc_ssa_vec(RegFile::GPR, dst_bits.div_ceil(32));
b.push_op(OpI2F {
dst: dst.into(),
dst: dst.clone().into(),
src: srcs(0),
dst_type: dst_type,
src_type: IntType::from_bits(src_bits.into(), true),
@ -1411,8 +1411,8 @@ impl<'a> ShaderFromNir<'a> {
dst.into()
}
nir_op_ige | nir_op_ilt | nir_op_uge | nir_op_ult => {
let x = *srcs(0).as_ssa().unwrap();
let y = *srcs(1).as_ssa().unwrap();
let x = srcs(0).to_ssa();
let y = srcs(1).to_ssa();
let (cmp_type, cmp_op) = match alu.op {
nir_op_ige => (IntCmpType::I32, IntCmpOp::Ge),
nir_op_ilt => (IntCmpType::I32, IntCmpOp::Lt),
@ -1624,7 +1624,7 @@ impl<'a> ShaderFromNir<'a> {
let dst_type = FloatType::from_bits(dst_bits.into());
let dst = b.alloc_ssa_vec(RegFile::GPR, dst_bits.div_ceil(32));
b.push_op(OpI2F {
dst: dst.into(),
dst: dst.clone().into(),
src: srcs(0),
dst_type: dst_type,
src_type: IntType::from_bits(src_bits.into(), false),
@ -1827,7 +1827,7 @@ impl<'a> ShaderFromNir<'a> {
dsts[0] = SSARef::try_from(&dst[0..2]).unwrap().into();
dsts[1] = SSARef::try_from(&dst[2..]).unwrap().into();
} else {
dsts[0] = dst.into();
dsts[0] = dst.clone().into();
}
let fault = if flags.is_sparse() {
@ -2164,7 +2164,7 @@ impl<'a> ShaderFromNir<'a> {
assert!(intrin.def.bit_size() == 32);
let dst = b.alloc_ssa_vec(RegFile::GPR, comps);
b.push_op(OpALd {
dst: dst.into(),
dst: dst.clone().into(),
vtx,
addr,
offset,
@ -2233,7 +2233,7 @@ impl<'a> ShaderFromNir<'a> {
src_base_type == ALUType::INT,
);
b.push_op(OpI2I {
dst: dst.into(),
dst: dst.clone().into(),
src: self.get_src(&srcs[0]),
src_type,
dst_type,
@ -2249,7 +2249,7 @@ impl<'a> ShaderFromNir<'a> {
// pre-Volta
assert!(b.sm() >= 70 || dst_bit_size > 8);
b.push_op(OpF2I {
dst: dst.into(),
dst: dst.clone().into(),
src: self.get_src(&srcs[0]),
src_type,
dst_type,
@ -2270,7 +2270,7 @@ impl<'a> ShaderFromNir<'a> {
src_base_type == ALUType::INT,
);
b.push_op(OpI2F {
dst: dst.into(),
dst: dst.clone().into(),
src: self.get_src(&srcs[0]),
src_type,
dst_type,
@ -2281,7 +2281,7 @@ impl<'a> ShaderFromNir<'a> {
let src_type =
FloatType::from_bits(src_bit_size.into());
b.push_op(OpF2F {
dst: dst.into(),
dst: dst.clone().into(),
src: self.get_src(&srcs[0]),
src_type,
dst_type,
@ -2511,7 +2511,7 @@ impl<'a> ShaderFromNir<'a> {
dst: if self.sm.sm() >= 70 && is_reduction {
Dst::None
} else {
dst.into()
dst.clone().into()
},
fault: Dst::None,
handle: handle,
@ -2559,7 +2559,7 @@ impl<'a> ShaderFromNir<'a> {
let dst = b.alloc_ssa_vec(RegFile::GPR, comps);
b.push_op(OpSuLd {
dst: dst.into(),
dst: dst.clone().into(),
fault: Dst::None,
image_access,
image_dim: dim,
@ -2594,10 +2594,10 @@ impl<'a> ShaderFromNir<'a> {
ImageAccess::Formatted(ChannelMask::for_comps(comps - 1));
let dst = b.alloc_ssa_vec(RegFile::GPR, comps - 1);
let fault = b.alloc_ssa_vec(RegFile::Pred, 1);
let fault = b.alloc_ssa(RegFile::Pred);
b.push_op(OpSuLd {
dst: dst.into(),
dst: dst.clone().into(),
fault: fault.into(),
image_access,
image_dim: dim,
@ -2716,7 +2716,11 @@ impl<'a> ShaderFromNir<'a> {
atom_op.is_reduction() && intrin.def.components_read() == 0;
b.push_op(OpAtom {
dst: if is_reduction { Dst::None } else { dst.into() },
dst: if is_reduction {
Dst::None
} else {
dst.clone().into()
},
addr: addr,
cmpr: 0.into(),
data: data,
@ -2741,7 +2745,7 @@ impl<'a> ShaderFromNir<'a> {
let dst = b.alloc_ssa_vec(RegFile::GPR, bit_size.div_ceil(32));
b.push_op(OpAtom {
dst: dst.into(),
dst: dst.clone().into(),
addr: addr,
cmpr: cmpr,
data: data,
@ -2847,7 +2851,7 @@ impl<'a> ShaderFromNir<'a> {
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
b.push_op(OpLd {
dst: dst.into(),
dst: dst.clone().into(),
addr: addr,
offset: offset,
access: access,
@ -2873,7 +2877,7 @@ impl<'a> ShaderFromNir<'a> {
let dst = b.alloc_ssa_vec(RegFile::GPR, 2);
b.push_op(OpLdTram {
dst: dst.into(),
dst: dst.clone().into(),
addr,
use_c,
});
@ -2931,7 +2935,7 @@ impl<'a> ShaderFromNir<'a> {
let comps = 2;
let dst = b.alloc_ssa_vec(RegFile::GPR, comps);
b.push_op(OpALd {
dst: dst.into(),
dst: dst.clone().into(),
vtx: vtx.into(),
addr: NAK_ATTR_TESS_COORD,
offset: 0.into(),
@ -2956,7 +2960,7 @@ impl<'a> ShaderFromNir<'a> {
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
b.push_op(OpLd {
dst: dst.into(),
dst: dst.clone().into(),
addr: addr,
offset: offset,
access: access,
@ -2978,7 +2982,7 @@ impl<'a> ShaderFromNir<'a> {
let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4));
b.push_op(OpLd {
dst: dst.into(),
dst: dst.clone().into(),
addr: addr,
offset: offset,
access: access,
@ -2998,13 +3002,13 @@ impl<'a> ShaderFromNir<'a> {
{
debug_assert!(idx + comps <= NAK_SV_CLOCK + 2);
b.push_op(OpCS2R {
dst: dst.into(),
dst: dst.clone().into(),
idx: idx,
});
} else {
debug_assert!(intrin.def.bit_size == 32);
b.push_op(OpS2R {
dst: dst.into(),
dst: dst.clone().into(),
idx: idx,
});
}
@ -3028,11 +3032,14 @@ impl<'a> ShaderFromNir<'a> {
if off.is_zero() {
for (i, comp) in dst.iter().enumerate() {
let i = u16::try_from(i).unwrap();
b.copy_to((*comp).into(), cb.offset(i * 4).into());
b.copy_to(
(*comp).into(),
cb.clone().offset(i * 4).into(),
);
}
} else {
b.push_op(OpLdc {
dst: dst.into(),
dst: dst.clone().into(),
cb: cb.into(),
offset: off,
mode: LdcMode::Indexed,
@ -3055,7 +3062,7 @@ impl<'a> ShaderFromNir<'a> {
offset: off_imm,
};
b.push_op(OpLdc {
dst: dst.into(),
dst: dst.clone().into(),
cb: cb.into(),
offset: off_idx.into(),
mode: LdcMode::IndexedSegmented,
@ -3080,11 +3087,14 @@ impl<'a> ShaderFromNir<'a> {
if off.is_zero() {
for (i, comp) in dst.iter().enumerate() {
let i = u16::try_from(i).unwrap();
b.copy_to((*comp).into(), cb.offset(i * 4).into());
b.copy_to(
(*comp).into(),
cb.clone().offset(i * 4).into(),
);
}
} else {
b.push_op(OpLdc {
dst: dst.into(),
dst: dst.clone().into(),
cb: cb.into(),
offset: off,
mode: LdcMode::Indexed,
@ -3096,14 +3106,14 @@ impl<'a> ShaderFromNir<'a> {
nir_intrinsic_pin_cx_handle_nv => {
let handle = self.get_ssa_ref(&srcs[0]);
b.push_op(OpPin {
src: handle.into(),
src: handle.clone().into(),
dst: handle.into(),
});
}
nir_intrinsic_unpin_cx_handle_nv => {
let handle = self.get_ssa_ref(&srcs[0]);
b.push_op(OpUnpin {
src: handle.into(),
src: handle.clone().into(),
dst: handle.into(),
});
}
@ -3227,7 +3237,7 @@ impl<'a> ShaderFromNir<'a> {
let dst = b.alloc_ssa_vec(RegFile::GPR, bit_size.div_ceil(32));
b.push_op(OpAtom {
dst: dst.into(),
dst: dst.clone().into(),
addr: addr,
cmpr: 0.into(),
data: data,
@ -3252,7 +3262,7 @@ impl<'a> ShaderFromNir<'a> {
let dst = b.alloc_ssa_vec(RegFile::GPR, bit_size.div_ceil(32));
b.push_op(OpAtom {
dst: dst.into(),
dst: dst.clone().into(),
addr: addr,
cmpr: cmpr,
data: data,
@ -3723,7 +3733,8 @@ impl<'a> ShaderFromNir<'a> {
for ps in np.iter_srcs() {
if ps.pred().index == nb.index {
let src = *self.get_src(&ps.src).as_ssa().unwrap();
let src = self.get_src(&ps.src);
let src = src.as_ssa().unwrap();
for (i, src) in src.iter().enumerate() {
let phi_id =
phi_map.get_phi_id(np, i.try_into().unwrap());

View file

@ -150,8 +150,8 @@ impl<'a> TestShaderBuilder<'a> {
let comps: u8 = mem_type.bits().div_ceil(32).try_into().unwrap();
let dst = self.alloc_ssa_vec(RegFile::GPR, comps);
self.push_op(OpLd {
dst: dst.into(),
addr: self.data_addr.into(),
dst: dst.clone().into(),
addr: self.data_addr.clone().into(),
offset: offset.into(),
access: access,
});
@ -173,7 +173,7 @@ impl<'a> TestShaderBuilder<'a> {
let comps: u8 = mem_type.bits().div_ceil(32).try_into().unwrap();
assert!(data.comps() == comps);
self.push_op(OpSt {
addr: self.data_addr.into(),
addr: self.data_addr.clone().into(),
data: data.into(),
offset: offset.into(),
access: access,

View file

@ -421,7 +421,7 @@ impl fmt::Display for SSAValue {
/// designed so that is always 16B, regardless of how many SSA values are
/// referenced so it's easy and fairly cheap to copy around and embed in other
/// structures.
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
#[derive(Clone, Eq, Hash, PartialEq)]
pub struct SSARef {
v: [SSAValue; 4],
}
@ -743,7 +743,7 @@ impl fmt::Display for Dst {
}
}
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
#[derive(Clone, Eq, Hash, PartialEq)]
pub enum CBuf {
Binding(u8),
@ -764,7 +764,7 @@ impl fmt::Display for CBuf {
}
}
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
#[derive(Clone, Eq, Hash, PartialEq)]
pub struct CBufRef {
pub buf: CBuf,
pub offset: u16,
@ -1245,7 +1245,7 @@ impl Src {
}
pub fn as_bool(&self) -> Option<bool> {
match self.src_ref {
match &self.src_ref {
SrcRef::True => Some(!self.src_mod.is_bnot()),
SrcRef::False => Some(self.src_mod.is_bnot()),
SrcRef::SSA(vec) => {
@ -1298,7 +1298,7 @@ impl Src {
}
pub fn is_uniform(&self) -> bool {
match self.src_ref {
match &self.src_ref {
SrcRef::Zero
| SrcRef::True
| SrcRef::False
@ -5376,7 +5376,7 @@ pub struct OpLdc {
impl DisplayOp for OpLdc {
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let SrcRef::CBuf(cb) = self.cb.src_ref else {
let SrcRef::CBuf(cb) = &self.cb.src_ref else {
panic!("Not a cbuf");
};
write!(f, "ldc{}{} {}[", self.mode, self.mem_type, cb.buf)?;

View file

@ -26,7 +26,7 @@ pub fn src_is_upred_reg(src: &Src) -> bool {
}
pub fn src_is_reg(src: &Src, reg_file: RegFile) -> bool {
match src.src_ref {
match &src.src_ref {
SrcRef::Zero | SrcRef::True | SrcRef::False => true,
SrcRef::SSA(ssa) => ssa.file() == Some(reg_file),
SrcRef::Imm32(_) | SrcRef::CBuf(_) => false,
@ -143,10 +143,12 @@ pub trait LegalizeBuildHelpers: SSABuilder {
}));
}
let old_src_ref =
std::mem::replace(&mut src.src_ref, val.clone().into());
if val.comps() == 1 {
self.copy_to(val.into(), src.src_ref.clone().into());
self.copy_to(val[0].into(), old_src_ref.into());
} else {
match src.src_ref {
match old_src_ref {
SrcRef::Imm32(u) => {
// Immediates go in the top bits
self.copy_to(val[0].into(), 0.into());
@ -154,7 +156,7 @@ pub trait LegalizeBuildHelpers: SSABuilder {
}
SrcRef::CBuf(cb) => {
// CBufs load 8B
self.copy_to(val[0].into(), cb.into());
self.copy_to(val[0].into(), cb.clone().into());
self.copy_to(val[1].into(), cb.offset(4).into());
}
SrcRef::SSA(vec) => {
@ -165,8 +167,6 @@ pub trait LegalizeBuildHelpers: SSABuilder {
_ => panic!("Invalid 64-bit SrcRef"),
}
}
src.src_ref = val.into();
}
fn copy_alu_src_if_not_reg(
@ -259,7 +259,7 @@ pub trait LegalizeBuildHelpers: SSABuilder {
}
SrcType::F64 => {
let val = self.alloc_ssa_vec(reg_file, 2);
let old_src = std::mem::replace(src, val.into());
let old_src = std::mem::replace(src, val.clone().into());
self.push_op(OpDAdd {
dst: val.into(),
srcs: [Src::ZERO.fneg(), old_src],
@ -451,11 +451,11 @@ fn legalize_instr(
// okay. Just make it look the same as the previous source we
// fixed up.
if let Some(new_vec) = vec_src_map.get(vec) {
src.src_ref = (*new_vec).into();
src.src_ref = new_vec.clone().into();
continue;
}
let mut new_vec = *vec;
let mut new_vec = vec.clone();
for c in 0..vec.comps() {
let ssa = vec[usize::from(c)];
// If the same SSA value shows up in multiple non-identical
@ -471,7 +471,7 @@ fn legalize_instr(
}
}
vec_src_map.insert(*vec, new_vec);
vec_src_map.insert(vec.clone(), new_vec.clone());
src.src_ref = new_vec.into();
}
}
@ -492,7 +492,7 @@ impl Shader<'_> {
for (ip, mut instr) in b.instrs.drain(..).enumerate() {
if let Op::Pin(pin) = &instr.op {
if let Dst::SSA(ssa) = &pin.dst {
pinned.insert(*ssa);
pinned.insert(ssa.clone());
}
}

View file

@ -219,7 +219,7 @@ impl BarPropPass {
let mut bmovs = Vec::new();
for (idx, dst) in op.dsts.iter_mut() {
if self.phi_is_bar.get((*idx).try_into().unwrap()) {
let ssa = *dst.as_ssa().unwrap();
let ssa = dst.as_ssa().unwrap().clone();
let bar = *self.ssa_map.get(&ssa[0]).unwrap();
*dst = bar.into();

View file

@ -121,7 +121,7 @@ impl CopyPropPass {
self.add_copy(bi, dst[1], SrcType::F64, src);
}
SrcRef::CBuf(cb) => {
let lo32 = Src::from(SrcRef::CBuf(cb));
let lo32 = Src::from(SrcRef::CBuf(cb.clone()));
let hi32 = Src {
src_ref: SrcRef::CBuf(cb.offset(4)),
src_mod: src.src_mod,
@ -161,7 +161,7 @@ impl CopyPropPass {
return;
};
match entry.src.src_ref {
match &entry.src.src_ref {
SrcRef::True => {
pred.pred_ref = PredRef::None;
}
@ -196,7 +196,7 @@ impl CopyPropPass {
};
if entry.src.src_mod.is_none() {
if let SrcRef::SSA(entry_ssa) = entry.src.src_ref {
if let SrcRef::SSA(entry_ssa) = &entry.src.src_ref {
assert!(entry_ssa.comps() == 1);
*c_ssa = entry_ssa[0];
progress = true;
@ -395,7 +395,7 @@ impl CopyPropPass {
let lo_entry_or_none = self.get_copy(&src_ssa[0]);
if let Some(CopyPropEntry::Copy(lo_entry)) = lo_entry_or_none {
if lo_entry.src.src_mod.is_none() {
if let SrcRef::SSA(lo_entry_ssa) = lo_entry.src.src_ref {
if let SrcRef::SSA(lo_entry_ssa) = &lo_entry.src.src_ref {
src_ssa[0] = lo_entry_ssa[0];
continue;
}
@ -407,7 +407,7 @@ impl CopyPropPass {
if hi_entry.src.src_mod.is_none()
|| hi_entry.src_type == SrcType::F64
{
if let SrcRef::SSA(hi_entry_ssa) = hi_entry.src.src_ref {
if let SrcRef::SSA(hi_entry_ssa) = &hi_entry.src.src_ref {
src_ssa[1] = hi_entry_ssa[0];
src.src_mod = hi_entry.src.src_mod.modify(src.src_mod);
continue;
@ -439,8 +439,8 @@ impl CopyPropPass {
return;
}
let new_src_ref = match hi_entry.src.src_ref {
SrcRef::Zero => match lo_entry.src.src_ref {
let new_src_ref = match &hi_entry.src.src_ref {
SrcRef::Zero => match &lo_entry.src.src_ref {
SrcRef::Zero | SrcRef::Imm32(0) => SrcRef::Zero,
_ => return,
},
@ -448,11 +448,11 @@ impl CopyPropPass {
// 32-bit immediates for f64 srouces are the top 32 bits
// with zero in the lower 32.
match lo_entry.src.src_ref {
SrcRef::Zero | SrcRef::Imm32(0) => SrcRef::Imm32(i),
SrcRef::Zero | SrcRef::Imm32(0) => SrcRef::Imm32(*i),
_ => return,
}
}
SrcRef::CBuf(hi_cb) => match lo_entry.src.src_ref {
SrcRef::CBuf(hi_cb) => match &lo_entry.src.src_ref {
SrcRef::CBuf(lo_cb) => {
if hi_cb.buf != lo_cb.buf {
return;
@ -463,7 +463,7 @@ impl CopyPropPass {
if hi_cb.offset != lo_cb.offset + 4 {
return;
}
SrcRef::CBuf(lo_cb)
SrcRef::CBuf(lo_cb.clone())
}
_ => return,
},
@ -591,7 +591,7 @@ impl CopyPropPass {
}
Op::PLop3(lop) => {
for i in 0..2 {
let dst = match lop.dsts[i] {
let dst = match &lop.dsts[i] {
Dst::SSA(vec) => {
assert!(vec.comps() == 1);
vec[0]

View file

@ -36,7 +36,7 @@ impl LopPass {
}
for src in instr.srcs() {
if let SrcRef::SSA(vec) = src.src_ref {
if let SrcRef::SSA(vec) = &src.src_ref {
for ssa in vec.iter() {
use_counts
.entry(*ssa)
@ -105,7 +105,7 @@ impl LopPass {
) {
loop {
assert!(srcs[src_idx].src_mod.is_none());
let ssa = match srcs[src_idx].src_ref {
let ssa = match &srcs[src_idx].src_ref {
SrcRef::SSA(vec) => {
assert!(vec.comps() == 1);
vec[0]
@ -213,7 +213,7 @@ impl LopPass {
self.try_prop_to_src(slice::from_mut(&mut op.op), &mut op.srcs, i);
}
if let Dst::SSA(ssa) = op.dst {
if let Dst::SSA(ssa) = &op.dst {
assert!(ssa.comps() == 1);
self.add_lop(ssa[0], op.op, op.srcs.clone());
}
@ -246,7 +246,7 @@ impl LopPass {
}
for i in 0..2 {
if let Dst::SSA(ssa) = op.dsts[i] {
if let Dst::SSA(ssa) = &op.dsts[i] {
assert!(ssa.comps() == 1);
self.add_lop(ssa[0], op.ops[i], op.srcs.clone());
}

View file

@ -17,7 +17,7 @@ struct PrmtSrcs {
impl PrmtSrcs {
fn new() -> PrmtSrcs {
PrmtSrcs {
srcs: [const { SrcRef::Zero }; 2],
srcs: [SrcRef::Zero, SrcRef::Zero],
num_srcs: 0,
imm_src: usize::MAX,
num_imm_bytes: 0,
@ -93,7 +93,7 @@ impl PrmtPass {
}
fn add_prmt(&mut self, op: &OpPrmt) {
let Dst::SSA(dst_ssa) = op.dst else {
let Dst::SSA(dst_ssa) = &op.dst else {
return;
};
debug_assert!(dst_ssa.comps() == 1);

View file

@ -142,11 +142,11 @@ impl AluSrc {
assert!(src.src_swizzle.is_none());
// do not assert src_mod, can be encoded by opcode.
match src.src_ref {
match &src.src_ref {
SrcRef::Zero => AluSrc::Reg(zero_reg()),
SrcRef::Reg(r) => AluSrc::Reg(r),
SrcRef::Imm32(x) => AluSrc::Imm(x),
SrcRef::CBuf(x) => AluSrc::CBuf(x),
SrcRef::Reg(r) => AluSrc::Reg(*r),
SrcRef::Imm32(x) => AluSrc::Imm(*x),
SrcRef::CBuf(x) => AluSrc::CBuf(x.clone()),
_ => panic!("Unhandled ALU src type"),
}
} else {
@ -2135,20 +2135,20 @@ fn atom_src_as_ssa(
atom_type: AtomType,
) -> SSARef {
if let Some(ssa) = src.as_ssa() {
return *ssa;
return ssa.clone();
}
let tmp;
if atom_type.bits() == 32 {
tmp = b.alloc_ssa_vec(RegFile::GPR, 1);
let tmp = b.alloc_ssa(RegFile::GPR);
b.copy_to(tmp.into(), 0.into());
tmp.into()
} else {
debug_assert!(atom_type.bits() == 64);
tmp = b.alloc_ssa_vec(RegFile::GPR, 2);
let tmp = b.alloc_ssa_vec(RegFile::GPR, 2);
b.copy_to(tmp[0].into(), 0.into());
b.copy_to(tmp[1].into(), 0.into());
tmp
}
tmp
}
impl SM20Op for OpAtom {

View file

@ -11,7 +11,7 @@ use std::collections::HashMap;
use std::ops::Range;
pub fn instr_latency(_sm: u8, op: &Op, dst_idx: usize) -> u32 {
let file = match op.dsts_as_slice()[dst_idx] {
let file = match &op.dsts_as_slice()[dst_idx] {
Dst::None => return 0,
Dst::SSA(vec) => vec.file().unwrap(),
Dst::Reg(reg) => reg.file(),
@ -2651,20 +2651,20 @@ fn atom_src_as_ssa(
atom_type: AtomType,
) -> SSARef {
if let Some(ssa) = src.as_ssa() {
return *ssa;
return ssa.clone();
}
let tmp;
if atom_type.bits() == 32 {
tmp = b.alloc_ssa_vec(RegFile::GPR, 1);
let tmp = b.alloc_ssa(RegFile::GPR);
b.copy_to(tmp.into(), 0.into());
tmp.into()
} else {
debug_assert!(atom_type.bits() == 64);
tmp = b.alloc_ssa_vec(RegFile::GPR, 2);
let tmp = b.alloc_ssa_vec(RegFile::GPR, 2);
b.copy_to(tmp[0].into(), 0.into());
b.copy_to(tmp[1].into(), 0.into());
tmp
}
tmp
}
impl SM50Op for OpAtom {

View file

@ -22,7 +22,7 @@ impl ShaderModel70 {
}
fn instr_latency(&self, op: &Op, dst_idx: usize) -> u32 {
let file = match op.dsts_as_slice()[dst_idx] {
let file = match &op.dsts_as_slice()[dst_idx] {
Dst::None => return 0,
Dst::SSA(vec) => vec.file().unwrap(),
Dst::Reg(reg) => reg.file(),

View file

@ -282,7 +282,7 @@ impl ALUSrc {
return ALUSrc::None;
};
match src.src_ref {
match &src.src_ref {
SrcRef::Zero | SrcRef::Reg(_) => {
let reg = match src.src_ref {
SrcRef::Zero => {
@ -317,11 +317,11 @@ impl ALUSrc {
SrcRef::Imm32(i) => {
assert!(src.src_mod.is_none());
assert!(src.src_swizzle.is_none());
ALUSrc::Imm32(i)
ALUSrc::Imm32(*i)
}
SrcRef::CBuf(cb) => {
let alu_ref = ALUCBufRef {
cb: cb,
cb: cb.clone(),
abs: src_mod_has_abs(src.src_mod),
neg: src_mod_has_neg(src.src_mod),
swizzle: src.src_swizzle,

View file

@ -1170,7 +1170,7 @@ impl SM75Latency {
read: Option<&Op>,
src_idx: usize,
) -> u32 {
let dst_file = match write.dsts_as_slice()[dst_idx] {
let dst_file = match &write.dsts_as_slice()[dst_idx] {
Dst::None => return 0,
Dst::SSA(vec) => vec.file().unwrap(),
Dst::Reg(reg) => reg.file(),
@ -1233,7 +1233,7 @@ impl SM75Latency {
}
pub fn war(read: &Op, src_idx: usize, write: &Op, dst_idx: usize) -> u32 {
let dst_file = match write.dsts_as_slice()[dst_idx] {
let dst_file = match &write.dsts_as_slice()[dst_idx] {
Dst::None => return 0,
Dst::SSA(vec) => vec.file().unwrap(),
Dst::Reg(reg) => reg.file(),
@ -1291,7 +1291,7 @@ impl SM75Latency {
b_dst_idx: usize,
a_op_pred: bool,
) -> u32 {
let dst_file = match a.dsts_as_slice()[a_dst_idx] {
let dst_file = match &a.dsts_as_slice()[a_dst_idx] {
Dst::None => return 0,
Dst::SSA(vec) => vec.file().unwrap(),
Dst::Reg(reg) => reg.file(),

View file

@ -1406,7 +1406,7 @@ impl SM80Latency {
read: Option<&Op>,
src_idx: usize,
) -> u32 {
let dst_file = match write.dsts_as_slice()[dst_idx] {
let dst_file = match &write.dsts_as_slice()[dst_idx] {
Dst::None => return 0,
Dst::SSA(vec) => vec.file().unwrap(),
Dst::Reg(reg) => reg.file(),
@ -1467,7 +1467,7 @@ impl SM80Latency {
}
pub fn war(read: &Op, src_idx: usize, write: &Op, dst_idx: usize) -> u32 {
let dst_file = match write.dsts_as_slice()[dst_idx] {
let dst_file = match &write.dsts_as_slice()[dst_idx] {
Dst::None => return 0,
Dst::SSA(vec) => vec.file().unwrap(),
Dst::Reg(reg) => reg.file(),
@ -1521,7 +1521,7 @@ impl SM80Latency {
b_dst_idx: usize,
a_op_pred: bool,
) -> u32 {
let dst_file = match a.dsts_as_slice()[a_dst_idx] {
let dst_file = match &a.dsts_as_slice()[a_dst_idx] {
Dst::None => return 0,
Dst::SSA(vec) => vec.file().unwrap(),
Dst::Reg(reg) => reg.file(),

View file

@ -301,7 +301,7 @@ impl Function {
if let Some(phi) = b.phi_srcs() {
for (idx, src) in phi.srcs.iter() {
if let SrcRef::SSA(vec) = src.src_ref {
if let SrcRef::SSA(vec) = &src.src_ref {
debug_assert!(vec.comps() == 1);
cg.add_ssa(vec[0]);
}