From dee3a0aa58f288bbd0069be8e1c83dab9c2e2391 Mon Sep 17 00:00:00 2001 From: Mel Henning Date: Wed, 30 Apr 2025 20:35:23 -0400 Subject: [PATCH] nak: CBuf and SSARef are no longer Copy Reviewed-by: Faith Ekstrand Part-of: --- src/nouveau/compiler/nak/assign_regs.rs | 26 ++-- src/nouveau/compiler/nak/builder.rs | 4 +- src/nouveau/compiler/nak/from_nir.rs | 111 ++++++++++-------- src/nouveau/compiler/nak/hw_tests.rs | 6 +- src/nouveau/compiler/nak/ir.rs | 12 +- src/nouveau/compiler/nak/legalize.rs | 22 ++-- src/nouveau/compiler/nak/opt_bar_prop.rs | 2 +- src/nouveau/compiler/nak/opt_copy_prop.rs | 22 ++-- src/nouveau/compiler/nak/opt_lop.rs | 8 +- src/nouveau/compiler/nak/opt_prmt.rs | 4 +- src/nouveau/compiler/nak/sm20.rs | 18 +-- src/nouveau/compiler/nak/sm50.rs | 12 +- src/nouveau/compiler/nak/sm70.rs | 2 +- src/nouveau/compiler/nak/sm70_encode.rs | 6 +- .../compiler/nak/sm75_instr_latencies.rs | 6 +- .../compiler/nak/sm80_instr_latencies.rs | 6 +- src/nouveau/compiler/nak/to_cssa.rs | 2 +- 17 files changed, 140 insertions(+), 129 deletions(-) diff --git a/src/nouveau/compiler/nak/assign_regs.rs b/src/nouveau/compiler/nak/assign_regs.rs index 44313b3ccde..04739c419ff 100644 --- a/src/nouveau/compiler/nak/assign_regs.rs +++ b/src/nouveau/compiler/nak/assign_regs.rs @@ -92,14 +92,14 @@ impl SSAUseMap { v.push((ip, SSAUse::FixedReg(reg))); } - fn add_vec_use(&mut self, ip: usize, vec: SSARef) { + fn add_vec_use(&mut self, ip: usize, vec: &SSARef) { if vec.comps() == 1 { return; } for ssa in vec.iter() { let v = self.ssa_map.entry(*ssa).or_default(); - v.push((ip, SSAUse::Vec(vec))); + v.push((ip, SSAUse::Vec(vec.clone()))); } } @@ -133,7 +133,7 @@ impl SSAUseMap { // We don't care about predicates because they're scalar for src in instr.srcs() { if let Some(ssa) = src_ssa_ref(src) { - self.add_vec_use(ip, *ssa); + self.add_vec_use(ip, ssa); } } } @@ -530,7 +530,7 @@ impl<'a> VecRegAllocator<'a> { RegRef::new(self.file(), reg, 1) } - pub fn assign_pin_vec_reg(&mut self, vec: SSARef, reg: u32) -> RegRef { + pub fn assign_pin_vec_reg(&mut self, vec: &SSARef, reg: u32) -> RegRef { for c in 0..vec.comps() { let ssa = vec[usize::from(c)]; self.assign_pin_reg(ssa, reg + u32::from(c)); @@ -675,12 +675,12 @@ impl<'a> VecRegAllocator<'a> { RegRef::new(self.file(), reg, comps) } - pub fn alloc_vector(&mut self, vec: SSARef) -> RegRef { + pub fn alloc_vector(&mut self, vec: &SSARef) -> RegRef { let comps = vec.comps(); let align = u32::from(comps).next_power_of_two(); if let Some(reg) = self.ra.try_find_unused_reg_range(0, align, comps) { - return self.assign_pin_vec_reg(vec, reg); + return self.assign_pin_vec_reg(&vec, reg); } let reg = self @@ -690,7 +690,7 @@ impl<'a> VecRegAllocator<'a> { for c in 0..comps { self.evict_reg_if_used(reg + u32::from(c)); } - self.assign_pin_vec_reg(vec, reg) + self.assign_pin_vec_reg(&vec, reg) } pub fn free_killed(&mut self, killed: &KillSet) { @@ -817,7 +817,7 @@ fn instr_assign_regs_file( for ssa in vec.iter() { avail.remove(ssa); } - killed_vecs.push(*vec); + killed_vecs.push(vec.clone()); } } } @@ -865,7 +865,7 @@ fn instr_assign_regs_file( for vec_dst in vec_dsts { let dst = &mut instr.dsts_mut()[vec_dst.dst_idx]; *dst = vra - .assign_pin_vec_reg(*dst.as_ssa().unwrap(), vec_dst.reg) + .assign_pin_vec_reg(dst.as_ssa().unwrap(), vec_dst.reg) .into(); } @@ -877,7 +877,7 @@ fn instr_assign_regs_file( for vec_dst in vec_dsts { let dst = &mut instr.dsts_mut()[vec_dst.dst_idx]; *dst = vra - .assign_pin_vec_reg(*dst.as_ssa().unwrap(), vec_dst.reg) + .assign_pin_vec_reg(dst.as_ssa().unwrap(), vec_dst.reg) .into(); } @@ -894,7 +894,7 @@ fn instr_assign_regs_file( for dst in instr.dsts_mut() { if let Dst::SSA(ssa) = dst { if ssa.file().unwrap() == vra.file() && ssa.comps() > 1 { - *dst = vra.alloc_vector(*ssa).into(); + *dst = vra.alloc_vector(ssa).into(); } } } @@ -1008,7 +1008,7 @@ impl AssignRegsBlock { ) -> Option> { match &mut instr.op { Op::Undef(undef) => { - if let Dst::SSA(ssa) = undef.dst { + if let Dst::SSA(ssa) = &undef.dst { assert!(ssa.comps() == 1); self.alloc_scalar(ip, sum, phi_webs, ssa[0]); } @@ -1164,7 +1164,7 @@ impl AssignRegsBlock { let dst_ra = &mut self.ra[dst_vec.file().unwrap()]; let mut vra = VecRegAllocator::new(dst_ra); - let dst_reg = vra.alloc_vector(*dst_vec); + let dst_reg = vra.alloc_vector(dst_vec); vra.finish(pcopy); let mut pin_copy = OpParCopy::new(); diff --git a/src/nouveau/compiler/nak/builder.rs b/src/nouveau/compiler/nak/builder.rs index 926e8835b8d..612cdfce034 100644 --- a/src/nouveau/compiler/nak/builder.rs +++ b/src/nouveau/compiler/nak/builder.rs @@ -23,7 +23,7 @@ pub trait Builder { } fn lop2_to(&mut self, dst: Dst, op: LogicOp2, mut x: Src, mut y: Src) { - let is_predicate = match dst { + let is_predicate = match &dst { Dst::None => panic!("No LOP destination"), Dst::SSA(ssa) => ssa.is_predicate(), Dst::Reg(reg) => reg.is_predicate(), @@ -462,7 +462,7 @@ pub trait SSABuilder: Builder { let dst = self.alloc_ssa_vec(RegFile::GPR, 2); if self.sm() >= 70 { self.push_op(OpIMad64 { - dst: dst.into(), + dst: dst.clone().into(), srcs: [x, y, 0.into()], signed, }); diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index d990af909f6..b44dc93bb87 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -813,7 +813,7 @@ impl<'a> ShaderFromNir<'a> { let dst = b.alloc_ssa_vec(RegFile::GPR, dst_bits.div_ceil(32)); b.push_op(OpF2F { - dst: dst.into(), + dst: dst.clone().into(), src: srcs(0), src_type: FloatType::from_bits(src_bits.into()), dst_type: dst_type, @@ -865,7 +865,7 @@ impl<'a> ShaderFromNir<'a> { ftz: self.float_ctl[src_type].ftz, }); b.push_op(OpI2I { - dst: dst.into(), + dst: dst.clone().into(), src: tmp.into(), src_type: tmp_type, dst_type, @@ -875,7 +875,7 @@ impl<'a> ShaderFromNir<'a> { }); } else { b.push_op(OpF2I { - dst: dst.into(), + dst: dst.clone().into(), src: srcs(0), src_type, dst_type, @@ -897,14 +897,14 @@ impl<'a> ShaderFromNir<'a> { if alu.def.bit_size() == 64 { dst = b.alloc_ssa_vec(RegFile::GPR, 2); b.push_op(OpDAdd { - dst: dst.into(), + dst: dst.clone().into(), srcs: [x, y], rnd_mode: self.float_ctl[ftype].rnd_mode, }); } else if alu.def.bit_size() == 32 { dst = b.alloc_ssa_vec(RegFile::GPR, 1); b.push_op(OpFAdd { - dst: dst.into(), + dst: dst.clone().into(), srcs: [x, y], saturate: self.try_saturate_alu_dst(&alu.def), rnd_mode: self.float_ctl[ftype].rnd_mode, @@ -917,7 +917,7 @@ impl<'a> ShaderFromNir<'a> { dst = b.alloc_ssa_vec(RegFile::GPR, 1); b.push_op(OpHAdd2 { - dst: dst.into(), + dst: dst.clone().into(), srcs: [restrict_f16v2_src(x), restrict_f16v2_src(y)], saturate: self.try_saturate_alu_dst(&alu.def), ftz: self.float_ctl[ftype].ftz, @@ -984,7 +984,7 @@ impl<'a> ShaderFromNir<'a> { if alu.get_src(0).bit_size() == 64 { assert!(alu.def.num_components == 1); b.push_op(OpDSetP { - dst: dst.into(), + dst: dst.clone().into(), set_op: PredSetOp::And, cmp_op: cmp_op, srcs: [srcs(0), srcs(1)], @@ -993,7 +993,7 @@ impl<'a> ShaderFromNir<'a> { } else if alu.get_src(0).bit_size() == 32 { assert!(alu.def.num_components == 1); b.push_op(OpFSetP { - dst: dst.into(), + dst: dst.clone().into(), set_op: PredSetOp::And, cmp_op: cmp_op, srcs: [srcs(0), srcs(1)], @@ -1040,14 +1040,14 @@ impl<'a> ShaderFromNir<'a> { debug_assert!(!self.float_ctl[ftype].ftz); dst = b.alloc_ssa_vec(RegFile::GPR, 2); b.push_op(OpDFma { - dst: dst.into(), + dst: dst.clone().into(), srcs: [srcs(0), srcs(1), srcs(2)], rnd_mode: self.float_ctl[ftype].rnd_mode, }); } else if alu.def.bit_size() == 32 { dst = b.alloc_ssa_vec(RegFile::GPR, 1); b.push_op(OpFFma { - dst: dst.into(), + dst: dst.clone().into(), srcs: [srcs(0), srcs(1), srcs(2)], saturate: self.try_saturate_alu_dst(&alu.def), rnd_mode: self.float_ctl[ftype].rnd_mode, @@ -1063,7 +1063,7 @@ impl<'a> ShaderFromNir<'a> { dst = b.alloc_ssa_vec(RegFile::GPR, 1); b.push_op(OpHFma2 { - dst: dst.into(), + dst: dst.clone().into(), srcs: [ restrict_f16v2_src(srcs(0)), restrict_f16v2_src(srcs(1)), @@ -1105,14 +1105,14 @@ impl<'a> ShaderFromNir<'a> { if alu.def.bit_size() == 64 { dst = b.alloc_ssa_vec(RegFile::GPR, 2); b.push_op(OpDMnMx { - dst: dst.into(), + dst: dst.clone().into(), srcs: [srcs(0), srcs(1)], min: (alu.op == nir_op_fmin).into(), }); } else if alu.def.bit_size() == 32 { dst = b.alloc_ssa_vec(RegFile::GPR, 1); b.push_op(OpFMnMx { - dst: dst.into(), + dst: dst.clone().into(), srcs: [srcs(0), srcs(1)], min: (alu.op == nir_op_fmin).into(), ftz: self.float_ctl.fp32.ftz, @@ -1120,7 +1120,7 @@ impl<'a> ShaderFromNir<'a> { } else if alu.def.bit_size() == 16 { dst = b.alloc_ssa_vec(RegFile::GPR, 1); b.push_op(OpHMnMx2 { - dst: dst.into(), + dst: dst.clone().into(), srcs: [ restrict_f16v2_src(srcs(0)), restrict_f16v2_src(srcs(1)), @@ -1140,14 +1140,14 @@ impl<'a> ShaderFromNir<'a> { debug_assert!(!self.float_ctl[ftype].ftz); dst = b.alloc_ssa_vec(RegFile::GPR, 2); b.push_op(OpDMul { - dst: dst.into(), + dst: dst.clone().into(), srcs: [srcs(0), srcs(1)], rnd_mode: self.float_ctl[ftype].rnd_mode, }); } else if alu.def.bit_size() == 32 { dst = b.alloc_ssa_vec(RegFile::GPR, 1); b.push_op(OpFMul { - dst: dst.into(), + dst: dst.clone().into(), srcs: [srcs(0), srcs(1)], saturate: self.try_saturate_alu_dst(&alu.def), rnd_mode: self.float_ctl[ftype].rnd_mode, @@ -1161,7 +1161,7 @@ impl<'a> ShaderFromNir<'a> { dst = b.alloc_ssa_vec(RegFile::GPR, 1); b.push_op(OpHMul2 { - dst: dst.into(), + dst: dst.clone().into(), srcs: [ restrict_f16v2_src(srcs(0)), restrict_f16v2_src(srcs(1)), @@ -1310,7 +1310,7 @@ impl<'a> ShaderFromNir<'a> { let dst_type = FloatType::from_bits(dst_bits.into()); let dst = b.alloc_ssa_vec(RegFile::GPR, dst_bits.div_ceil(32)); b.push_op(OpI2F { - dst: dst.into(), + dst: dst.clone().into(), src: srcs(0), dst_type: dst_type, src_type: IntType::from_bits(src_bits.into(), true), @@ -1411,8 +1411,8 @@ impl<'a> ShaderFromNir<'a> { dst.into() } nir_op_ige | nir_op_ilt | nir_op_uge | nir_op_ult => { - let x = *srcs(0).as_ssa().unwrap(); - let y = *srcs(1).as_ssa().unwrap(); + let x = srcs(0).to_ssa(); + let y = srcs(1).to_ssa(); let (cmp_type, cmp_op) = match alu.op { nir_op_ige => (IntCmpType::I32, IntCmpOp::Ge), nir_op_ilt => (IntCmpType::I32, IntCmpOp::Lt), @@ -1624,7 +1624,7 @@ impl<'a> ShaderFromNir<'a> { let dst_type = FloatType::from_bits(dst_bits.into()); let dst = b.alloc_ssa_vec(RegFile::GPR, dst_bits.div_ceil(32)); b.push_op(OpI2F { - dst: dst.into(), + dst: dst.clone().into(), src: srcs(0), dst_type: dst_type, src_type: IntType::from_bits(src_bits.into(), false), @@ -1827,7 +1827,7 @@ impl<'a> ShaderFromNir<'a> { dsts[0] = SSARef::try_from(&dst[0..2]).unwrap().into(); dsts[1] = SSARef::try_from(&dst[2..]).unwrap().into(); } else { - dsts[0] = dst.into(); + dsts[0] = dst.clone().into(); } let fault = if flags.is_sparse() { @@ -2164,7 +2164,7 @@ impl<'a> ShaderFromNir<'a> { assert!(intrin.def.bit_size() == 32); let dst = b.alloc_ssa_vec(RegFile::GPR, comps); b.push_op(OpALd { - dst: dst.into(), + dst: dst.clone().into(), vtx, addr, offset, @@ -2233,7 +2233,7 @@ impl<'a> ShaderFromNir<'a> { src_base_type == ALUType::INT, ); b.push_op(OpI2I { - dst: dst.into(), + dst: dst.clone().into(), src: self.get_src(&srcs[0]), src_type, dst_type, @@ -2249,7 +2249,7 @@ impl<'a> ShaderFromNir<'a> { // pre-Volta assert!(b.sm() >= 70 || dst_bit_size > 8); b.push_op(OpF2I { - dst: dst.into(), + dst: dst.clone().into(), src: self.get_src(&srcs[0]), src_type, dst_type, @@ -2270,7 +2270,7 @@ impl<'a> ShaderFromNir<'a> { src_base_type == ALUType::INT, ); b.push_op(OpI2F { - dst: dst.into(), + dst: dst.clone().into(), src: self.get_src(&srcs[0]), src_type, dst_type, @@ -2281,7 +2281,7 @@ impl<'a> ShaderFromNir<'a> { let src_type = FloatType::from_bits(src_bit_size.into()); b.push_op(OpF2F { - dst: dst.into(), + dst: dst.clone().into(), src: self.get_src(&srcs[0]), src_type, dst_type, @@ -2511,7 +2511,7 @@ impl<'a> ShaderFromNir<'a> { dst: if self.sm.sm() >= 70 && is_reduction { Dst::None } else { - dst.into() + dst.clone().into() }, fault: Dst::None, handle: handle, @@ -2559,7 +2559,7 @@ impl<'a> ShaderFromNir<'a> { let dst = b.alloc_ssa_vec(RegFile::GPR, comps); b.push_op(OpSuLd { - dst: dst.into(), + dst: dst.clone().into(), fault: Dst::None, image_access, image_dim: dim, @@ -2594,10 +2594,10 @@ impl<'a> ShaderFromNir<'a> { ImageAccess::Formatted(ChannelMask::for_comps(comps - 1)); let dst = b.alloc_ssa_vec(RegFile::GPR, comps - 1); - let fault = b.alloc_ssa_vec(RegFile::Pred, 1); + let fault = b.alloc_ssa(RegFile::Pred); b.push_op(OpSuLd { - dst: dst.into(), + dst: dst.clone().into(), fault: fault.into(), image_access, image_dim: dim, @@ -2716,7 +2716,11 @@ impl<'a> ShaderFromNir<'a> { atom_op.is_reduction() && intrin.def.components_read() == 0; b.push_op(OpAtom { - dst: if is_reduction { Dst::None } else { dst.into() }, + dst: if is_reduction { + Dst::None + } else { + dst.clone().into() + }, addr: addr, cmpr: 0.into(), data: data, @@ -2741,7 +2745,7 @@ impl<'a> ShaderFromNir<'a> { let dst = b.alloc_ssa_vec(RegFile::GPR, bit_size.div_ceil(32)); b.push_op(OpAtom { - dst: dst.into(), + dst: dst.clone().into(), addr: addr, cmpr: cmpr, data: data, @@ -2847,7 +2851,7 @@ impl<'a> ShaderFromNir<'a> { let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4)); b.push_op(OpLd { - dst: dst.into(), + dst: dst.clone().into(), addr: addr, offset: offset, access: access, @@ -2873,7 +2877,7 @@ impl<'a> ShaderFromNir<'a> { let dst = b.alloc_ssa_vec(RegFile::GPR, 2); b.push_op(OpLdTram { - dst: dst.into(), + dst: dst.clone().into(), addr, use_c, }); @@ -2931,7 +2935,7 @@ impl<'a> ShaderFromNir<'a> { let comps = 2; let dst = b.alloc_ssa_vec(RegFile::GPR, comps); b.push_op(OpALd { - dst: dst.into(), + dst: dst.clone().into(), vtx: vtx.into(), addr: NAK_ATTR_TESS_COORD, offset: 0.into(), @@ -2956,7 +2960,7 @@ impl<'a> ShaderFromNir<'a> { let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4)); b.push_op(OpLd { - dst: dst.into(), + dst: dst.clone().into(), addr: addr, offset: offset, access: access, @@ -2978,7 +2982,7 @@ impl<'a> ShaderFromNir<'a> { let dst = b.alloc_ssa_vec(RegFile::GPR, size_B.div_ceil(4)); b.push_op(OpLd { - dst: dst.into(), + dst: dst.clone().into(), addr: addr, offset: offset, access: access, @@ -2998,13 +3002,13 @@ impl<'a> ShaderFromNir<'a> { { debug_assert!(idx + comps <= NAK_SV_CLOCK + 2); b.push_op(OpCS2R { - dst: dst.into(), + dst: dst.clone().into(), idx: idx, }); } else { debug_assert!(intrin.def.bit_size == 32); b.push_op(OpS2R { - dst: dst.into(), + dst: dst.clone().into(), idx: idx, }); } @@ -3028,11 +3032,14 @@ impl<'a> ShaderFromNir<'a> { if off.is_zero() { for (i, comp) in dst.iter().enumerate() { let i = u16::try_from(i).unwrap(); - b.copy_to((*comp).into(), cb.offset(i * 4).into()); + b.copy_to( + (*comp).into(), + cb.clone().offset(i * 4).into(), + ); } } else { b.push_op(OpLdc { - dst: dst.into(), + dst: dst.clone().into(), cb: cb.into(), offset: off, mode: LdcMode::Indexed, @@ -3055,7 +3062,7 @@ impl<'a> ShaderFromNir<'a> { offset: off_imm, }; b.push_op(OpLdc { - dst: dst.into(), + dst: dst.clone().into(), cb: cb.into(), offset: off_idx.into(), mode: LdcMode::IndexedSegmented, @@ -3080,11 +3087,14 @@ impl<'a> ShaderFromNir<'a> { if off.is_zero() { for (i, comp) in dst.iter().enumerate() { let i = u16::try_from(i).unwrap(); - b.copy_to((*comp).into(), cb.offset(i * 4).into()); + b.copy_to( + (*comp).into(), + cb.clone().offset(i * 4).into(), + ); } } else { b.push_op(OpLdc { - dst: dst.into(), + dst: dst.clone().into(), cb: cb.into(), offset: off, mode: LdcMode::Indexed, @@ -3096,14 +3106,14 @@ impl<'a> ShaderFromNir<'a> { nir_intrinsic_pin_cx_handle_nv => { let handle = self.get_ssa_ref(&srcs[0]); b.push_op(OpPin { - src: handle.into(), + src: handle.clone().into(), dst: handle.into(), }); } nir_intrinsic_unpin_cx_handle_nv => { let handle = self.get_ssa_ref(&srcs[0]); b.push_op(OpUnpin { - src: handle.into(), + src: handle.clone().into(), dst: handle.into(), }); } @@ -3227,7 +3237,7 @@ impl<'a> ShaderFromNir<'a> { let dst = b.alloc_ssa_vec(RegFile::GPR, bit_size.div_ceil(32)); b.push_op(OpAtom { - dst: dst.into(), + dst: dst.clone().into(), addr: addr, cmpr: 0.into(), data: data, @@ -3252,7 +3262,7 @@ impl<'a> ShaderFromNir<'a> { let dst = b.alloc_ssa_vec(RegFile::GPR, bit_size.div_ceil(32)); b.push_op(OpAtom { - dst: dst.into(), + dst: dst.clone().into(), addr: addr, cmpr: cmpr, data: data, @@ -3723,7 +3733,8 @@ impl<'a> ShaderFromNir<'a> { for ps in np.iter_srcs() { if ps.pred().index == nb.index { - let src = *self.get_src(&ps.src).as_ssa().unwrap(); + let src = self.get_src(&ps.src); + let src = src.as_ssa().unwrap(); for (i, src) in src.iter().enumerate() { let phi_id = phi_map.get_phi_id(np, i.try_into().unwrap()); diff --git a/src/nouveau/compiler/nak/hw_tests.rs b/src/nouveau/compiler/nak/hw_tests.rs index 4c86fbf6951..c2bd2aa8c1e 100644 --- a/src/nouveau/compiler/nak/hw_tests.rs +++ b/src/nouveau/compiler/nak/hw_tests.rs @@ -150,8 +150,8 @@ impl<'a> TestShaderBuilder<'a> { let comps: u8 = mem_type.bits().div_ceil(32).try_into().unwrap(); let dst = self.alloc_ssa_vec(RegFile::GPR, comps); self.push_op(OpLd { - dst: dst.into(), - addr: self.data_addr.into(), + dst: dst.clone().into(), + addr: self.data_addr.clone().into(), offset: offset.into(), access: access, }); @@ -173,7 +173,7 @@ impl<'a> TestShaderBuilder<'a> { let comps: u8 = mem_type.bits().div_ceil(32).try_into().unwrap(); assert!(data.comps() == comps); self.push_op(OpSt { - addr: self.data_addr.into(), + addr: self.data_addr.clone().into(), data: data.into(), offset: offset.into(), access: access, diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index 2829ae4aabe..86379c552c2 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -421,7 +421,7 @@ impl fmt::Display for SSAValue { /// designed so that is always 16B, regardless of how many SSA values are /// referenced so it's easy and fairly cheap to copy around and embed in other /// structures. -#[derive(Clone, Copy, Eq, Hash, PartialEq)] +#[derive(Clone, Eq, Hash, PartialEq)] pub struct SSARef { v: [SSAValue; 4], } @@ -743,7 +743,7 @@ impl fmt::Display for Dst { } } -#[derive(Clone, Copy, Eq, Hash, PartialEq)] +#[derive(Clone, Eq, Hash, PartialEq)] pub enum CBuf { Binding(u8), @@ -764,7 +764,7 @@ impl fmt::Display for CBuf { } } -#[derive(Clone, Copy, Eq, Hash, PartialEq)] +#[derive(Clone, Eq, Hash, PartialEq)] pub struct CBufRef { pub buf: CBuf, pub offset: u16, @@ -1245,7 +1245,7 @@ impl Src { } pub fn as_bool(&self) -> Option { - match self.src_ref { + match &self.src_ref { SrcRef::True => Some(!self.src_mod.is_bnot()), SrcRef::False => Some(self.src_mod.is_bnot()), SrcRef::SSA(vec) => { @@ -1298,7 +1298,7 @@ impl Src { } pub fn is_uniform(&self) -> bool { - match self.src_ref { + match &self.src_ref { SrcRef::Zero | SrcRef::True | SrcRef::False @@ -5376,7 +5376,7 @@ pub struct OpLdc { impl DisplayOp for OpLdc { fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let SrcRef::CBuf(cb) = self.cb.src_ref else { + let SrcRef::CBuf(cb) = &self.cb.src_ref else { panic!("Not a cbuf"); }; write!(f, "ldc{}{} {}[", self.mode, self.mem_type, cb.buf)?; diff --git a/src/nouveau/compiler/nak/legalize.rs b/src/nouveau/compiler/nak/legalize.rs index b5f01f3f17d..a91c8805709 100644 --- a/src/nouveau/compiler/nak/legalize.rs +++ b/src/nouveau/compiler/nak/legalize.rs @@ -26,7 +26,7 @@ pub fn src_is_upred_reg(src: &Src) -> bool { } pub fn src_is_reg(src: &Src, reg_file: RegFile) -> bool { - match src.src_ref { + match &src.src_ref { SrcRef::Zero | SrcRef::True | SrcRef::False => true, SrcRef::SSA(ssa) => ssa.file() == Some(reg_file), SrcRef::Imm32(_) | SrcRef::CBuf(_) => false, @@ -143,10 +143,12 @@ pub trait LegalizeBuildHelpers: SSABuilder { })); } + let old_src_ref = + std::mem::replace(&mut src.src_ref, val.clone().into()); if val.comps() == 1 { - self.copy_to(val.into(), src.src_ref.clone().into()); + self.copy_to(val[0].into(), old_src_ref.into()); } else { - match src.src_ref { + match old_src_ref { SrcRef::Imm32(u) => { // Immediates go in the top bits self.copy_to(val[0].into(), 0.into()); @@ -154,7 +156,7 @@ pub trait LegalizeBuildHelpers: SSABuilder { } SrcRef::CBuf(cb) => { // CBufs load 8B - self.copy_to(val[0].into(), cb.into()); + self.copy_to(val[0].into(), cb.clone().into()); self.copy_to(val[1].into(), cb.offset(4).into()); } SrcRef::SSA(vec) => { @@ -165,8 +167,6 @@ pub trait LegalizeBuildHelpers: SSABuilder { _ => panic!("Invalid 64-bit SrcRef"), } } - - src.src_ref = val.into(); } fn copy_alu_src_if_not_reg( @@ -259,7 +259,7 @@ pub trait LegalizeBuildHelpers: SSABuilder { } SrcType::F64 => { let val = self.alloc_ssa_vec(reg_file, 2); - let old_src = std::mem::replace(src, val.into()); + let old_src = std::mem::replace(src, val.clone().into()); self.push_op(OpDAdd { dst: val.into(), srcs: [Src::ZERO.fneg(), old_src], @@ -451,11 +451,11 @@ fn legalize_instr( // okay. Just make it look the same as the previous source we // fixed up. if let Some(new_vec) = vec_src_map.get(vec) { - src.src_ref = (*new_vec).into(); + src.src_ref = new_vec.clone().into(); continue; } - let mut new_vec = *vec; + let mut new_vec = vec.clone(); for c in 0..vec.comps() { let ssa = vec[usize::from(c)]; // If the same SSA value shows up in multiple non-identical @@ -471,7 +471,7 @@ fn legalize_instr( } } - vec_src_map.insert(*vec, new_vec); + vec_src_map.insert(vec.clone(), new_vec.clone()); src.src_ref = new_vec.into(); } } @@ -492,7 +492,7 @@ impl Shader<'_> { for (ip, mut instr) in b.instrs.drain(..).enumerate() { if let Op::Pin(pin) = &instr.op { if let Dst::SSA(ssa) = &pin.dst { - pinned.insert(*ssa); + pinned.insert(ssa.clone()); } } diff --git a/src/nouveau/compiler/nak/opt_bar_prop.rs b/src/nouveau/compiler/nak/opt_bar_prop.rs index bdb9741ab03..8f623146ca7 100644 --- a/src/nouveau/compiler/nak/opt_bar_prop.rs +++ b/src/nouveau/compiler/nak/opt_bar_prop.rs @@ -219,7 +219,7 @@ impl BarPropPass { let mut bmovs = Vec::new(); for (idx, dst) in op.dsts.iter_mut() { if self.phi_is_bar.get((*idx).try_into().unwrap()) { - let ssa = *dst.as_ssa().unwrap(); + let ssa = dst.as_ssa().unwrap().clone(); let bar = *self.ssa_map.get(&ssa[0]).unwrap(); *dst = bar.into(); diff --git a/src/nouveau/compiler/nak/opt_copy_prop.rs b/src/nouveau/compiler/nak/opt_copy_prop.rs index ba399d63165..72675e6bd92 100644 --- a/src/nouveau/compiler/nak/opt_copy_prop.rs +++ b/src/nouveau/compiler/nak/opt_copy_prop.rs @@ -121,7 +121,7 @@ impl CopyPropPass { self.add_copy(bi, dst[1], SrcType::F64, src); } SrcRef::CBuf(cb) => { - let lo32 = Src::from(SrcRef::CBuf(cb)); + let lo32 = Src::from(SrcRef::CBuf(cb.clone())); let hi32 = Src { src_ref: SrcRef::CBuf(cb.offset(4)), src_mod: src.src_mod, @@ -161,7 +161,7 @@ impl CopyPropPass { return; }; - match entry.src.src_ref { + match &entry.src.src_ref { SrcRef::True => { pred.pred_ref = PredRef::None; } @@ -196,7 +196,7 @@ impl CopyPropPass { }; if entry.src.src_mod.is_none() { - if let SrcRef::SSA(entry_ssa) = entry.src.src_ref { + if let SrcRef::SSA(entry_ssa) = &entry.src.src_ref { assert!(entry_ssa.comps() == 1); *c_ssa = entry_ssa[0]; progress = true; @@ -395,7 +395,7 @@ impl CopyPropPass { let lo_entry_or_none = self.get_copy(&src_ssa[0]); if let Some(CopyPropEntry::Copy(lo_entry)) = lo_entry_or_none { if lo_entry.src.src_mod.is_none() { - if let SrcRef::SSA(lo_entry_ssa) = lo_entry.src.src_ref { + if let SrcRef::SSA(lo_entry_ssa) = &lo_entry.src.src_ref { src_ssa[0] = lo_entry_ssa[0]; continue; } @@ -407,7 +407,7 @@ impl CopyPropPass { if hi_entry.src.src_mod.is_none() || hi_entry.src_type == SrcType::F64 { - if let SrcRef::SSA(hi_entry_ssa) = hi_entry.src.src_ref { + if let SrcRef::SSA(hi_entry_ssa) = &hi_entry.src.src_ref { src_ssa[1] = hi_entry_ssa[0]; src.src_mod = hi_entry.src.src_mod.modify(src.src_mod); continue; @@ -439,8 +439,8 @@ impl CopyPropPass { return; } - let new_src_ref = match hi_entry.src.src_ref { - SrcRef::Zero => match lo_entry.src.src_ref { + let new_src_ref = match &hi_entry.src.src_ref { + SrcRef::Zero => match &lo_entry.src.src_ref { SrcRef::Zero | SrcRef::Imm32(0) => SrcRef::Zero, _ => return, }, @@ -448,11 +448,11 @@ impl CopyPropPass { // 32-bit immediates for f64 srouces are the top 32 bits // with zero in the lower 32. match lo_entry.src.src_ref { - SrcRef::Zero | SrcRef::Imm32(0) => SrcRef::Imm32(i), + SrcRef::Zero | SrcRef::Imm32(0) => SrcRef::Imm32(*i), _ => return, } } - SrcRef::CBuf(hi_cb) => match lo_entry.src.src_ref { + SrcRef::CBuf(hi_cb) => match &lo_entry.src.src_ref { SrcRef::CBuf(lo_cb) => { if hi_cb.buf != lo_cb.buf { return; @@ -463,7 +463,7 @@ impl CopyPropPass { if hi_cb.offset != lo_cb.offset + 4 { return; } - SrcRef::CBuf(lo_cb) + SrcRef::CBuf(lo_cb.clone()) } _ => return, }, @@ -591,7 +591,7 @@ impl CopyPropPass { } Op::PLop3(lop) => { for i in 0..2 { - let dst = match lop.dsts[i] { + let dst = match &lop.dsts[i] { Dst::SSA(vec) => { assert!(vec.comps() == 1); vec[0] diff --git a/src/nouveau/compiler/nak/opt_lop.rs b/src/nouveau/compiler/nak/opt_lop.rs index 422ca9242db..0bc3d3dcecc 100644 --- a/src/nouveau/compiler/nak/opt_lop.rs +++ b/src/nouveau/compiler/nak/opt_lop.rs @@ -36,7 +36,7 @@ impl LopPass { } for src in instr.srcs() { - if let SrcRef::SSA(vec) = src.src_ref { + if let SrcRef::SSA(vec) = &src.src_ref { for ssa in vec.iter() { use_counts .entry(*ssa) @@ -105,7 +105,7 @@ impl LopPass { ) { loop { assert!(srcs[src_idx].src_mod.is_none()); - let ssa = match srcs[src_idx].src_ref { + let ssa = match &srcs[src_idx].src_ref { SrcRef::SSA(vec) => { assert!(vec.comps() == 1); vec[0] @@ -213,7 +213,7 @@ impl LopPass { self.try_prop_to_src(slice::from_mut(&mut op.op), &mut op.srcs, i); } - if let Dst::SSA(ssa) = op.dst { + if let Dst::SSA(ssa) = &op.dst { assert!(ssa.comps() == 1); self.add_lop(ssa[0], op.op, op.srcs.clone()); } @@ -246,7 +246,7 @@ impl LopPass { } for i in 0..2 { - if let Dst::SSA(ssa) = op.dsts[i] { + if let Dst::SSA(ssa) = &op.dsts[i] { assert!(ssa.comps() == 1); self.add_lop(ssa[0], op.ops[i], op.srcs.clone()); } diff --git a/src/nouveau/compiler/nak/opt_prmt.rs b/src/nouveau/compiler/nak/opt_prmt.rs index 765ee6ef3e5..1d73ca5374b 100644 --- a/src/nouveau/compiler/nak/opt_prmt.rs +++ b/src/nouveau/compiler/nak/opt_prmt.rs @@ -17,7 +17,7 @@ struct PrmtSrcs { impl PrmtSrcs { fn new() -> PrmtSrcs { PrmtSrcs { - srcs: [const { SrcRef::Zero }; 2], + srcs: [SrcRef::Zero, SrcRef::Zero], num_srcs: 0, imm_src: usize::MAX, num_imm_bytes: 0, @@ -93,7 +93,7 @@ impl PrmtPass { } fn add_prmt(&mut self, op: &OpPrmt) { - let Dst::SSA(dst_ssa) = op.dst else { + let Dst::SSA(dst_ssa) = &op.dst else { return; }; debug_assert!(dst_ssa.comps() == 1); diff --git a/src/nouveau/compiler/nak/sm20.rs b/src/nouveau/compiler/nak/sm20.rs index 3d5c394afb7..35f5ca1354a 100644 --- a/src/nouveau/compiler/nak/sm20.rs +++ b/src/nouveau/compiler/nak/sm20.rs @@ -142,11 +142,11 @@ impl AluSrc { assert!(src.src_swizzle.is_none()); // do not assert src_mod, can be encoded by opcode. - match src.src_ref { + match &src.src_ref { SrcRef::Zero => AluSrc::Reg(zero_reg()), - SrcRef::Reg(r) => AluSrc::Reg(r), - SrcRef::Imm32(x) => AluSrc::Imm(x), - SrcRef::CBuf(x) => AluSrc::CBuf(x), + SrcRef::Reg(r) => AluSrc::Reg(*r), + SrcRef::Imm32(x) => AluSrc::Imm(*x), + SrcRef::CBuf(x) => AluSrc::CBuf(x.clone()), _ => panic!("Unhandled ALU src type"), } } else { @@ -2135,20 +2135,20 @@ fn atom_src_as_ssa( atom_type: AtomType, ) -> SSARef { if let Some(ssa) = src.as_ssa() { - return *ssa; + return ssa.clone(); } - let tmp; if atom_type.bits() == 32 { - tmp = b.alloc_ssa_vec(RegFile::GPR, 1); + let tmp = b.alloc_ssa(RegFile::GPR); b.copy_to(tmp.into(), 0.into()); + tmp.into() } else { debug_assert!(atom_type.bits() == 64); - tmp = b.alloc_ssa_vec(RegFile::GPR, 2); + let tmp = b.alloc_ssa_vec(RegFile::GPR, 2); b.copy_to(tmp[0].into(), 0.into()); b.copy_to(tmp[1].into(), 0.into()); + tmp } - tmp } impl SM20Op for OpAtom { diff --git a/src/nouveau/compiler/nak/sm50.rs b/src/nouveau/compiler/nak/sm50.rs index 9b305294632..c3f6b52be08 100644 --- a/src/nouveau/compiler/nak/sm50.rs +++ b/src/nouveau/compiler/nak/sm50.rs @@ -11,7 +11,7 @@ use std::collections::HashMap; use std::ops::Range; pub fn instr_latency(_sm: u8, op: &Op, dst_idx: usize) -> u32 { - let file = match op.dsts_as_slice()[dst_idx] { + let file = match &op.dsts_as_slice()[dst_idx] { Dst::None => return 0, Dst::SSA(vec) => vec.file().unwrap(), Dst::Reg(reg) => reg.file(), @@ -2651,20 +2651,20 @@ fn atom_src_as_ssa( atom_type: AtomType, ) -> SSARef { if let Some(ssa) = src.as_ssa() { - return *ssa; + return ssa.clone(); } - let tmp; if atom_type.bits() == 32 { - tmp = b.alloc_ssa_vec(RegFile::GPR, 1); + let tmp = b.alloc_ssa(RegFile::GPR); b.copy_to(tmp.into(), 0.into()); + tmp.into() } else { debug_assert!(atom_type.bits() == 64); - tmp = b.alloc_ssa_vec(RegFile::GPR, 2); + let tmp = b.alloc_ssa_vec(RegFile::GPR, 2); b.copy_to(tmp[0].into(), 0.into()); b.copy_to(tmp[1].into(), 0.into()); + tmp } - tmp } impl SM50Op for OpAtom { diff --git a/src/nouveau/compiler/nak/sm70.rs b/src/nouveau/compiler/nak/sm70.rs index e595ce1bcb8..7321ab1e15d 100644 --- a/src/nouveau/compiler/nak/sm70.rs +++ b/src/nouveau/compiler/nak/sm70.rs @@ -22,7 +22,7 @@ impl ShaderModel70 { } fn instr_latency(&self, op: &Op, dst_idx: usize) -> u32 { - let file = match op.dsts_as_slice()[dst_idx] { + let file = match &op.dsts_as_slice()[dst_idx] { Dst::None => return 0, Dst::SSA(vec) => vec.file().unwrap(), Dst::Reg(reg) => reg.file(), diff --git a/src/nouveau/compiler/nak/sm70_encode.rs b/src/nouveau/compiler/nak/sm70_encode.rs index be674d34bef..128b7ea6f09 100644 --- a/src/nouveau/compiler/nak/sm70_encode.rs +++ b/src/nouveau/compiler/nak/sm70_encode.rs @@ -282,7 +282,7 @@ impl ALUSrc { return ALUSrc::None; }; - match src.src_ref { + match &src.src_ref { SrcRef::Zero | SrcRef::Reg(_) => { let reg = match src.src_ref { SrcRef::Zero => { @@ -317,11 +317,11 @@ impl ALUSrc { SrcRef::Imm32(i) => { assert!(src.src_mod.is_none()); assert!(src.src_swizzle.is_none()); - ALUSrc::Imm32(i) + ALUSrc::Imm32(*i) } SrcRef::CBuf(cb) => { let alu_ref = ALUCBufRef { - cb: cb, + cb: cb.clone(), abs: src_mod_has_abs(src.src_mod), neg: src_mod_has_neg(src.src_mod), swizzle: src.src_swizzle, diff --git a/src/nouveau/compiler/nak/sm75_instr_latencies.rs b/src/nouveau/compiler/nak/sm75_instr_latencies.rs index 79f5d5f4b96..7148b857ade 100644 --- a/src/nouveau/compiler/nak/sm75_instr_latencies.rs +++ b/src/nouveau/compiler/nak/sm75_instr_latencies.rs @@ -1170,7 +1170,7 @@ impl SM75Latency { read: Option<&Op>, src_idx: usize, ) -> u32 { - let dst_file = match write.dsts_as_slice()[dst_idx] { + let dst_file = match &write.dsts_as_slice()[dst_idx] { Dst::None => return 0, Dst::SSA(vec) => vec.file().unwrap(), Dst::Reg(reg) => reg.file(), @@ -1233,7 +1233,7 @@ impl SM75Latency { } pub fn war(read: &Op, src_idx: usize, write: &Op, dst_idx: usize) -> u32 { - let dst_file = match write.dsts_as_slice()[dst_idx] { + let dst_file = match &write.dsts_as_slice()[dst_idx] { Dst::None => return 0, Dst::SSA(vec) => vec.file().unwrap(), Dst::Reg(reg) => reg.file(), @@ -1291,7 +1291,7 @@ impl SM75Latency { b_dst_idx: usize, a_op_pred: bool, ) -> u32 { - let dst_file = match a.dsts_as_slice()[a_dst_idx] { + let dst_file = match &a.dsts_as_slice()[a_dst_idx] { Dst::None => return 0, Dst::SSA(vec) => vec.file().unwrap(), Dst::Reg(reg) => reg.file(), diff --git a/src/nouveau/compiler/nak/sm80_instr_latencies.rs b/src/nouveau/compiler/nak/sm80_instr_latencies.rs index 068e93b287b..7feb9d66046 100644 --- a/src/nouveau/compiler/nak/sm80_instr_latencies.rs +++ b/src/nouveau/compiler/nak/sm80_instr_latencies.rs @@ -1406,7 +1406,7 @@ impl SM80Latency { read: Option<&Op>, src_idx: usize, ) -> u32 { - let dst_file = match write.dsts_as_slice()[dst_idx] { + let dst_file = match &write.dsts_as_slice()[dst_idx] { Dst::None => return 0, Dst::SSA(vec) => vec.file().unwrap(), Dst::Reg(reg) => reg.file(), @@ -1467,7 +1467,7 @@ impl SM80Latency { } pub fn war(read: &Op, src_idx: usize, write: &Op, dst_idx: usize) -> u32 { - let dst_file = match write.dsts_as_slice()[dst_idx] { + let dst_file = match &write.dsts_as_slice()[dst_idx] { Dst::None => return 0, Dst::SSA(vec) => vec.file().unwrap(), Dst::Reg(reg) => reg.file(), @@ -1521,7 +1521,7 @@ impl SM80Latency { b_dst_idx: usize, a_op_pred: bool, ) -> u32 { - let dst_file = match a.dsts_as_slice()[a_dst_idx] { + let dst_file = match &a.dsts_as_slice()[a_dst_idx] { Dst::None => return 0, Dst::SSA(vec) => vec.file().unwrap(), Dst::Reg(reg) => reg.file(), diff --git a/src/nouveau/compiler/nak/to_cssa.rs b/src/nouveau/compiler/nak/to_cssa.rs index 262f98c664b..b6e3e7f2cf4 100644 --- a/src/nouveau/compiler/nak/to_cssa.rs +++ b/src/nouveau/compiler/nak/to_cssa.rs @@ -301,7 +301,7 @@ impl Function { if let Some(phi) = b.phi_srcs() { for (idx, src) in phi.srcs.iter() { - if let SrcRef::SSA(vec) = src.src_ref { + if let SrcRef::SSA(vec) = &src.src_ref { debug_assert!(vec.comps() == 1); cg.add_ssa(vec[0]); }