diff --git a/src/nouveau/compiler/nak/assign_regs.rs b/src/nouveau/compiler/nak/assign_regs.rs index 1bef9a56b02..42d5c99ebbc 100644 --- a/src/nouveau/compiler/nak/assign_regs.rs +++ b/src/nouveau/compiler/nak/assign_regs.rs @@ -1134,9 +1134,15 @@ impl AssignRegsBlock { Some(instr) } } - Op::Pin(OpPin { src, dst }) | Op::Unpin(OpUnpin { src, dst }) => { + Op::Pin(_) | Op::Unpin(_) => { assert!(instr.pred.is_true()); + let (src, dst) = match &instr.op { + Op::Pin(pin) => (&pin.src, &pin.dst), + Op::Unpin(unpin) => (&unpin.src, &unpin.dst), + _ => unreachable!(), + }; + // These basically act as a vector version of OpCopy except that // they only work on SSA values and we pin the destination if // it's OpPin. diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index fb0f8d76218..a07cca69fc9 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -3969,10 +3969,13 @@ impl<'a> ShaderFromNir<'a> { Op::Exit(OpExit {}) } else { self.cfg.add_edge(nb.index, target.index); - Op::Bra(OpBra { - target: self.get_block_label(target), - cond: true.into(), - }) + Op::Bra( + OpBra { + target: self.get_block_label(target), + cond: true.into(), + } + .into(), + ) }; b.predicate(pred).push_op(op); } diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index 667eb7bbae3..5f98a3d6634 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -7839,101 +7839,101 @@ impl fmt::Display for OpAnnotate { #[derive(DisplayOp, DstsAsSlice, SrcsAsSlice, FromVariants)] pub enum Op { - FAdd(OpFAdd), - FFma(OpFFma), - FMnMx(OpFMnMx), - FMul(OpFMul), - Rro(OpRro), - MuFu(OpMuFu), - FSet(OpFSet), - FSetP(OpFSetP), - FSwzAdd(OpFSwzAdd), - FSwz(OpFSwz), - DAdd(OpDAdd), - DFma(OpDFma), - DMnMx(OpDMnMx), - DMul(OpDMul), - DSetP(OpDSetP), - HAdd2(OpHAdd2), - HFma2(OpHFma2), - HMul2(OpHMul2), - HSet2(OpHSet2), - HSetP2(OpHSetP2), - Imma(OpImma), - Hmma(OpHmma), - Ldsm(OpLdsm), - HMnMx2(OpHMnMx2), - BMsk(OpBMsk), - BRev(OpBRev), - Bfe(OpBfe), - Flo(OpFlo), - IAbs(OpIAbs), - IAdd2(OpIAdd2), - IAdd2X(OpIAdd2X), - IAdd3(OpIAdd3), - IAdd3X(OpIAdd3X), - IDp4(OpIDp4), - IMad(OpIMad), - IMad64(OpIMad64), - IMul(OpIMul), - IMnMx(OpIMnMx), - ISetP(OpISetP), - Lea(OpLea), - LeaX(OpLeaX), - Lop2(OpLop2), - Lop3(OpLop3), - PopC(OpPopC), - Shf(OpShf), - Shl(OpShl), - Shr(OpShr), - F2F(OpF2F), - F2FP(OpF2FP), - F2I(OpF2I), - I2F(OpI2F), - I2I(OpI2I), - FRnd(OpFRnd), - Mov(OpMov), - Prmt(OpPrmt), - Sel(OpSel), - Shfl(OpShfl), - PLop3(OpPLop3), - PSetP(OpPSetP), - R2UR(OpR2UR), - Redux(OpRedux), - Tex(OpTex), - Tld(OpTld), - Tld4(OpTld4), - Tmml(OpTmml), - Txd(OpTxd), - Txq(OpTxq), - SuLd(OpSuLd), - SuSt(OpSuSt), - SuAtom(OpSuAtom), - SuClamp(OpSuClamp), - SuBfm(OpSuBfm), - SuEau(OpSuEau), - IMadSp(OpIMadSp), - SuLdGa(OpSuLdGa), - SuStGa(OpSuStGa), - Ld(OpLd), - Ldc(OpLdc), - LdSharedLock(OpLdSharedLock), - St(OpSt), - StSCheckUnlock(OpStSCheckUnlock), - Atom(OpAtom), - AL2P(OpAL2P), - ALd(OpALd), - ASt(OpASt), - Ipa(OpIpa), - LdTram(OpLdTram), - CCtl(OpCCtl), - MemBar(OpMemBar), - BClear(OpBClear), - BMov(OpBMov), - Break(OpBreak), - BSSy(OpBSSy), - BSync(OpBSync), - Bra(OpBra), + FAdd(Box), + FFma(Box), + FMnMx(Box), + FMul(Box), + Rro(Box), + MuFu(Box), + FSet(Box), + FSetP(Box), + FSwzAdd(Box), + FSwz(Box), + DAdd(Box), + DFma(Box), + DMnMx(Box), + DMul(Box), + DSetP(Box), + HAdd2(Box), + HFma2(Box), + HMul2(Box), + HSet2(Box), + HSetP2(Box), + Imma(Box), + Hmma(Box), + Ldsm(Box), + HMnMx2(Box), + BMsk(Box), + BRev(Box), + Bfe(Box), + Flo(Box), + IAbs(Box), + IAdd2(Box), + IAdd2X(Box), + IAdd3(Box), + IAdd3X(Box), + IDp4(Box), + IMad(Box), + IMad64(Box), + IMul(Box), + IMnMx(Box), + ISetP(Box), + Lea(Box), + LeaX(Box), + Lop2(Box), + Lop3(Box), + PopC(Box), + Shf(Box), + Shl(Box), + Shr(Box), + F2F(Box), + F2FP(Box), + F2I(Box), + I2F(Box), + I2I(Box), + FRnd(Box), + Mov(Box), + Prmt(Box), + Sel(Box), + Shfl(Box), + PLop3(Box), + PSetP(Box), + R2UR(Box), + Redux(Box), + Tex(Box), + Tld(Box), + Tld4(Box), + Tmml(Box), + Txd(Box), + Txq(Box), + SuLd(Box), + SuSt(Box), + SuAtom(Box), + SuClamp(Box), + SuBfm(Box), + SuEau(Box), + IMadSp(Box), + SuLdGa(Box), + SuStGa(Box), + Ld(Box), + Ldc(Box), + LdSharedLock(Box), + St(Box), + StSCheckUnlock(Box), + Atom(Box), + AL2P(Box), + ALd(Box), + ASt(Box), + Ipa(Box), + LdTram(Box), + CCtl(Box), + MemBar(Box), + BClear(Box), + BMov(Box), + Break(Box), + BSSy(Box), + BSync(Box), + Bra(Box), SSy(OpSSy), Sync(OpSync), Brk(OpBrk), @@ -7941,34 +7941,39 @@ pub enum Op { Cont(OpCont), PCnt(OpPCnt), Exit(OpExit), - WarpSync(OpWarpSync), - Bar(OpBar), - TexDepBar(OpTexDepBar), - CS2R(OpCS2R), - Isberd(OpIsberd), - ViLd(OpViLd), - Kill(OpKill), + WarpSync(Box), + Bar(Box), + TexDepBar(Box), + CS2R(Box), + Isberd(Box), + ViLd(Box), + Kill(Box), Nop(OpNop), - PixLd(OpPixLd), - S2R(OpS2R), - Vote(OpVote), - Match(OpMatch), - Undef(OpUndef), - SrcBar(OpSrcBar), - PhiSrcs(OpPhiSrcs), - PhiDsts(OpPhiDsts), - Copy(OpCopy), - Pin(OpPin), - Unpin(OpUnpin), - Swap(OpSwap), - ParCopy(OpParCopy), - RegOut(OpRegOut), - Out(OpOut), - OutFinal(OpOutFinal), - Annotate(OpAnnotate), + PixLd(Box), + S2R(Box), + Vote(Box), + Match(Box), + Undef(Box), + SrcBar(Box), + PhiSrcs(Box), + PhiDsts(Box), + Copy(Box), + Pin(Box), + Unpin(Box), + Swap(Box), + ParCopy(Box), + RegOut(Box), + Out(Box), + OutFinal(Box), + Annotate(Box), } impl_display_for_op!(Op); +#[cfg(target_arch = "x86_64")] +const _: () = { + debug_assert!(size_of::() == 16); +}; + impl Op { pub fn is_branch(&self) -> bool { match self { @@ -8620,7 +8625,7 @@ impl BasicBlock { pub fn phi_dsts(&self) -> Option<&OpPhiDsts> { self.phi_dsts_ip().map(|ip| match &self.instrs[ip].op { - Op::PhiDsts(phi) => phi, + Op::PhiDsts(phi) => phi.deref(), _ => panic!("Expected to find the phi"), }) } @@ -8628,7 +8633,7 @@ impl BasicBlock { #[allow(dead_code)] pub fn phi_dsts_mut(&mut self) -> Option<&mut OpPhiDsts> { self.phi_dsts_ip().map(|ip| match &mut self.instrs[ip].op { - Op::PhiDsts(phi) => phi, + Op::PhiDsts(phi) => phi.deref_mut(), _ => panic!("Expected to find the phi"), }) } @@ -8646,14 +8651,14 @@ impl BasicBlock { } pub fn phi_srcs(&self) -> Option<&OpPhiSrcs> { self.phi_srcs_ip().map(|ip| match &self.instrs[ip].op { - Op::PhiSrcs(phi) => phi, + Op::PhiSrcs(phi) => phi.deref(), _ => panic!("Expected to find the phi"), }) } pub fn phi_srcs_mut(&mut self) -> Option<&mut OpPhiSrcs> { self.phi_srcs_ip().map(|ip| match &mut self.instrs[ip].op { - Op::PhiSrcs(phi) => phi, + Op::PhiSrcs(phi) => phi.deref_mut(), _ => panic!("Expected to find the phi"), }) } diff --git a/src/nouveau/compiler/nak/legalize.rs b/src/nouveau/compiler/nak/legalize.rs index 64a017b3133..98a4153138b 100644 --- a/src/nouveau/compiler/nak/legalize.rs +++ b/src/nouveau/compiler/nak/legalize.rs @@ -459,20 +459,17 @@ fn legalize_instr( } // OpBreak and OpBSsy impose additional RA constraints - match &mut instr.op { - Op::Break(OpBreak { - bar_in, bar_out, .. - }) - | Op::BSSy(OpBSSy { - bar_in, bar_out, .. - }) => { - let bar_in_ssa = bar_in.src_ref.as_ssa().unwrap(); - if !bar_out.is_none() && bl.is_live_after_ip(&bar_in_ssa[0], ip) { - let gpr = b.bmov_to_gpr(bar_in.clone()); - let tmp = b.bmov_to_bar(gpr.into()); - *bar_in = tmp.into(); - } + let mut legalize_break_bssy = |bar_in: &mut Src, bar_out: &mut Dst| { + let bar_in_ssa = bar_in.src_ref.as_ssa().unwrap(); + if !bar_out.is_none() && bl.is_live_after_ip(&bar_in_ssa[0], ip) { + let gpr = b.bmov_to_gpr(bar_in.clone()); + let tmp = b.bmov_to_bar(gpr.into()); + *bar_in = tmp.into(); } + }; + match &mut instr.op { + Op::Break(op) => legalize_break_bssy(&mut op.bar_in, &mut op.bar_out), + Op::BSSy(op) => legalize_break_bssy(&mut op.bar_in, &mut op.bar_out), _ => (), } diff --git a/src/nouveau/compiler/nak/lower_copy_swap.rs b/src/nouveau/compiler/nak/lower_copy_swap.rs index 00272b0f417..c4e243e8223 100644 --- a/src/nouveau/compiler/nak/lower_copy_swap.rs +++ b/src/nouveau/compiler/nak/lower_copy_swap.rs @@ -260,7 +260,7 @@ impl LowerCopySwap { .into(), })); } - self.lower_r2ur(&mut b, r2ur); + self.lower_r2ur(&mut b, *r2ur); b.into_mapped_instrs() } Op::Copy(copy) => { @@ -272,7 +272,7 @@ impl LowerCopySwap { .into(), })); } - self.lower_copy(&mut b, copy); + self.lower_copy(&mut b, *copy); b.into_mapped_instrs() } Op::Swap(swap) => { @@ -284,7 +284,7 @@ impl LowerCopySwap { .into(), })); } - self.lower_swap(&mut b, swap); + self.lower_swap(&mut b, *swap); b.into_mapped_instrs() } _ => MappedInstrs::One(instr), diff --git a/src/nouveau/compiler/nak/lower_par_copies.rs b/src/nouveau/compiler/nak/lower_par_copies.rs index e7562871259..fc16adaf56a 100644 --- a/src/nouveau/compiler/nak/lower_par_copies.rs +++ b/src/nouveau/compiler/nak/lower_par_copies.rs @@ -265,7 +265,7 @@ impl Shader<'_> { .into(), })); } - match lower_par_copy(pc, sm) { + match lower_par_copy(*pc, sm) { MappedInstrs::None => { if let Some(instr) = instrs.pop() { MappedInstrs::One(instr) diff --git a/src/nouveau/compiler/nak/opt_jump_thread.rs b/src/nouveau/compiler/nak/opt_jump_thread.rs index 8a114a18051..1b734258ea8 100644 --- a/src/nouveau/compiler/nak/opt_jump_thread.rs +++ b/src/nouveau/compiler/nak/opt_jump_thread.rs @@ -87,10 +87,13 @@ fn jump_thread(func: &mut Function) -> bool { .get(&target_label) .map(clone_branch) .unwrap_or_else(|| { - Op::Bra(OpBra { - target: target_label, - cond: true.into(), - }) + Op::Bra( + OpBra { + target: target_label, + cond: true.into(), + } + .into(), + ) }); replacements.insert(block_label, replacement); } @@ -139,10 +142,8 @@ fn rewrite_cfg(func: &mut Function) { fn opt_fall_through(func: &mut Function) { for i in 0..func.blocks.len() - 1 { let remove_last_instr = match func.blocks[i].branch() { - Some(b) => match b.op { - Op::Bra(OpBra { target, .. }) => { - target == func.blocks[i + 1].label - } + Some(b) => match &b.op { + Op::Bra(bra) => bra.target == func.blocks[i + 1].label, _ => false, }, None => false,