nak: Place most Op structs in Box<>

Between this and the previous few commits, the Box<> has moved from
outside of Instr to inside the Op enum. This provides a few benefits:

 1. We no longer need to allocate for the worst-case Op on every
    instruction. For example, OpIAdd3X is 232 bytes and OpBra is 40
    bytes, which means we can save some memory on OpBra if we only
    allocate those 40 bytes.

 2. The Op discriminant is available without chasing a pointer. The type
    of op is probably the most frequently used field, and this should
    benefit most passes that care about what type of Op they're
    handling.

 3. Small Ops don't need any indirection at all. For example, OpPBk is
    only 4 bytes, which means we can just store it directly in less
    space than a pointer.

Compared to Box<Instr>, this is around a 1.4% shader compile time
improvement.

Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Reviewed-by: Seán de Búrca <leftmostcat@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37315>
This commit is contained in:
Mel Henning 2025-09-11 00:36:38 -04:00 committed by Marge Bot
parent 9257f5607f
commit 3c32ff7fa9
7 changed files with 165 additions and 153 deletions

View file

@ -1134,9 +1134,15 @@ impl AssignRegsBlock {
Some(instr)
}
}
Op::Pin(OpPin { src, dst }) | Op::Unpin(OpUnpin { src, dst }) => {
Op::Pin(_) | Op::Unpin(_) => {
assert!(instr.pred.is_true());
let (src, dst) = match &instr.op {
Op::Pin(pin) => (&pin.src, &pin.dst),
Op::Unpin(unpin) => (&unpin.src, &unpin.dst),
_ => unreachable!(),
};
// These basically act as a vector version of OpCopy except that
// they only work on SSA values and we pin the destination if
// it's OpPin.

View file

@ -3969,10 +3969,13 @@ impl<'a> ShaderFromNir<'a> {
Op::Exit(OpExit {})
} else {
self.cfg.add_edge(nb.index, target.index);
Op::Bra(OpBra {
target: self.get_block_label(target),
cond: true.into(),
})
Op::Bra(
OpBra {
target: self.get_block_label(target),
cond: true.into(),
}
.into(),
)
};
b.predicate(pred).push_op(op);
}

View file

@ -7839,101 +7839,101 @@ impl fmt::Display for OpAnnotate {
#[derive(DisplayOp, DstsAsSlice, SrcsAsSlice, FromVariants)]
pub enum Op {
FAdd(OpFAdd),
FFma(OpFFma),
FMnMx(OpFMnMx),
FMul(OpFMul),
Rro(OpRro),
MuFu(OpMuFu),
FSet(OpFSet),
FSetP(OpFSetP),
FSwzAdd(OpFSwzAdd),
FSwz(OpFSwz),
DAdd(OpDAdd),
DFma(OpDFma),
DMnMx(OpDMnMx),
DMul(OpDMul),
DSetP(OpDSetP),
HAdd2(OpHAdd2),
HFma2(OpHFma2),
HMul2(OpHMul2),
HSet2(OpHSet2),
HSetP2(OpHSetP2),
Imma(OpImma),
Hmma(OpHmma),
Ldsm(OpLdsm),
HMnMx2(OpHMnMx2),
BMsk(OpBMsk),
BRev(OpBRev),
Bfe(OpBfe),
Flo(OpFlo),
IAbs(OpIAbs),
IAdd2(OpIAdd2),
IAdd2X(OpIAdd2X),
IAdd3(OpIAdd3),
IAdd3X(OpIAdd3X),
IDp4(OpIDp4),
IMad(OpIMad),
IMad64(OpIMad64),
IMul(OpIMul),
IMnMx(OpIMnMx),
ISetP(OpISetP),
Lea(OpLea),
LeaX(OpLeaX),
Lop2(OpLop2),
Lop3(OpLop3),
PopC(OpPopC),
Shf(OpShf),
Shl(OpShl),
Shr(OpShr),
F2F(OpF2F),
F2FP(OpF2FP),
F2I(OpF2I),
I2F(OpI2F),
I2I(OpI2I),
FRnd(OpFRnd),
Mov(OpMov),
Prmt(OpPrmt),
Sel(OpSel),
Shfl(OpShfl),
PLop3(OpPLop3),
PSetP(OpPSetP),
R2UR(OpR2UR),
Redux(OpRedux),
Tex(OpTex),
Tld(OpTld),
Tld4(OpTld4),
Tmml(OpTmml),
Txd(OpTxd),
Txq(OpTxq),
SuLd(OpSuLd),
SuSt(OpSuSt),
SuAtom(OpSuAtom),
SuClamp(OpSuClamp),
SuBfm(OpSuBfm),
SuEau(OpSuEau),
IMadSp(OpIMadSp),
SuLdGa(OpSuLdGa),
SuStGa(OpSuStGa),
Ld(OpLd),
Ldc(OpLdc),
LdSharedLock(OpLdSharedLock),
St(OpSt),
StSCheckUnlock(OpStSCheckUnlock),
Atom(OpAtom),
AL2P(OpAL2P),
ALd(OpALd),
ASt(OpASt),
Ipa(OpIpa),
LdTram(OpLdTram),
CCtl(OpCCtl),
MemBar(OpMemBar),
BClear(OpBClear),
BMov(OpBMov),
Break(OpBreak),
BSSy(OpBSSy),
BSync(OpBSync),
Bra(OpBra),
FAdd(Box<OpFAdd>),
FFma(Box<OpFFma>),
FMnMx(Box<OpFMnMx>),
FMul(Box<OpFMul>),
Rro(Box<OpRro>),
MuFu(Box<OpMuFu>),
FSet(Box<OpFSet>),
FSetP(Box<OpFSetP>),
FSwzAdd(Box<OpFSwzAdd>),
FSwz(Box<OpFSwz>),
DAdd(Box<OpDAdd>),
DFma(Box<OpDFma>),
DMnMx(Box<OpDMnMx>),
DMul(Box<OpDMul>),
DSetP(Box<OpDSetP>),
HAdd2(Box<OpHAdd2>),
HFma2(Box<OpHFma2>),
HMul2(Box<OpHMul2>),
HSet2(Box<OpHSet2>),
HSetP2(Box<OpHSetP2>),
Imma(Box<OpImma>),
Hmma(Box<OpHmma>),
Ldsm(Box<OpLdsm>),
HMnMx2(Box<OpHMnMx2>),
BMsk(Box<OpBMsk>),
BRev(Box<OpBRev>),
Bfe(Box<OpBfe>),
Flo(Box<OpFlo>),
IAbs(Box<OpIAbs>),
IAdd2(Box<OpIAdd2>),
IAdd2X(Box<OpIAdd2X>),
IAdd3(Box<OpIAdd3>),
IAdd3X(Box<OpIAdd3X>),
IDp4(Box<OpIDp4>),
IMad(Box<OpIMad>),
IMad64(Box<OpIMad64>),
IMul(Box<OpIMul>),
IMnMx(Box<OpIMnMx>),
ISetP(Box<OpISetP>),
Lea(Box<OpLea>),
LeaX(Box<OpLeaX>),
Lop2(Box<OpLop2>),
Lop3(Box<OpLop3>),
PopC(Box<OpPopC>),
Shf(Box<OpShf>),
Shl(Box<OpShl>),
Shr(Box<OpShr>),
F2F(Box<OpF2F>),
F2FP(Box<OpF2FP>),
F2I(Box<OpF2I>),
I2F(Box<OpI2F>),
I2I(Box<OpI2I>),
FRnd(Box<OpFRnd>),
Mov(Box<OpMov>),
Prmt(Box<OpPrmt>),
Sel(Box<OpSel>),
Shfl(Box<OpShfl>),
PLop3(Box<OpPLop3>),
PSetP(Box<OpPSetP>),
R2UR(Box<OpR2UR>),
Redux(Box<OpRedux>),
Tex(Box<OpTex>),
Tld(Box<OpTld>),
Tld4(Box<OpTld4>),
Tmml(Box<OpTmml>),
Txd(Box<OpTxd>),
Txq(Box<OpTxq>),
SuLd(Box<OpSuLd>),
SuSt(Box<OpSuSt>),
SuAtom(Box<OpSuAtom>),
SuClamp(Box<OpSuClamp>),
SuBfm(Box<OpSuBfm>),
SuEau(Box<OpSuEau>),
IMadSp(Box<OpIMadSp>),
SuLdGa(Box<OpSuLdGa>),
SuStGa(Box<OpSuStGa>),
Ld(Box<OpLd>),
Ldc(Box<OpLdc>),
LdSharedLock(Box<OpLdSharedLock>),
St(Box<OpSt>),
StSCheckUnlock(Box<OpStSCheckUnlock>),
Atom(Box<OpAtom>),
AL2P(Box<OpAL2P>),
ALd(Box<OpALd>),
ASt(Box<OpASt>),
Ipa(Box<OpIpa>),
LdTram(Box<OpLdTram>),
CCtl(Box<OpCCtl>),
MemBar(Box<OpMemBar>),
BClear(Box<OpBClear>),
BMov(Box<OpBMov>),
Break(Box<OpBreak>),
BSSy(Box<OpBSSy>),
BSync(Box<OpBSync>),
Bra(Box<OpBra>),
SSy(OpSSy),
Sync(OpSync),
Brk(OpBrk),
@ -7941,34 +7941,39 @@ pub enum Op {
Cont(OpCont),
PCnt(OpPCnt),
Exit(OpExit),
WarpSync(OpWarpSync),
Bar(OpBar),
TexDepBar(OpTexDepBar),
CS2R(OpCS2R),
Isberd(OpIsberd),
ViLd(OpViLd),
Kill(OpKill),
WarpSync(Box<OpWarpSync>),
Bar(Box<OpBar>),
TexDepBar(Box<OpTexDepBar>),
CS2R(Box<OpCS2R>),
Isberd(Box<OpIsberd>),
ViLd(Box<OpViLd>),
Kill(Box<OpKill>),
Nop(OpNop),
PixLd(OpPixLd),
S2R(OpS2R),
Vote(OpVote),
Match(OpMatch),
Undef(OpUndef),
SrcBar(OpSrcBar),
PhiSrcs(OpPhiSrcs),
PhiDsts(OpPhiDsts),
Copy(OpCopy),
Pin(OpPin),
Unpin(OpUnpin),
Swap(OpSwap),
ParCopy(OpParCopy),
RegOut(OpRegOut),
Out(OpOut),
OutFinal(OpOutFinal),
Annotate(OpAnnotate),
PixLd(Box<OpPixLd>),
S2R(Box<OpS2R>),
Vote(Box<OpVote>),
Match(Box<OpMatch>),
Undef(Box<OpUndef>),
SrcBar(Box<OpSrcBar>),
PhiSrcs(Box<OpPhiSrcs>),
PhiDsts(Box<OpPhiDsts>),
Copy(Box<OpCopy>),
Pin(Box<OpPin>),
Unpin(Box<OpUnpin>),
Swap(Box<OpSwap>),
ParCopy(Box<OpParCopy>),
RegOut(Box<OpRegOut>),
Out(Box<OpOut>),
OutFinal(Box<OpOutFinal>),
Annotate(Box<OpAnnotate>),
}
impl_display_for_op!(Op);
#[cfg(target_arch = "x86_64")]
const _: () = {
debug_assert!(size_of::<Op>() == 16);
};
impl Op {
pub fn is_branch(&self) -> bool {
match self {
@ -8620,7 +8625,7 @@ impl BasicBlock {
pub fn phi_dsts(&self) -> Option<&OpPhiDsts> {
self.phi_dsts_ip().map(|ip| match &self.instrs[ip].op {
Op::PhiDsts(phi) => phi,
Op::PhiDsts(phi) => phi.deref(),
_ => panic!("Expected to find the phi"),
})
}
@ -8628,7 +8633,7 @@ impl BasicBlock {
#[allow(dead_code)]
pub fn phi_dsts_mut(&mut self) -> Option<&mut OpPhiDsts> {
self.phi_dsts_ip().map(|ip| match &mut self.instrs[ip].op {
Op::PhiDsts(phi) => phi,
Op::PhiDsts(phi) => phi.deref_mut(),
_ => panic!("Expected to find the phi"),
})
}
@ -8646,14 +8651,14 @@ impl BasicBlock {
}
pub fn phi_srcs(&self) -> Option<&OpPhiSrcs> {
self.phi_srcs_ip().map(|ip| match &self.instrs[ip].op {
Op::PhiSrcs(phi) => phi,
Op::PhiSrcs(phi) => phi.deref(),
_ => panic!("Expected to find the phi"),
})
}
pub fn phi_srcs_mut(&mut self) -> Option<&mut OpPhiSrcs> {
self.phi_srcs_ip().map(|ip| match &mut self.instrs[ip].op {
Op::PhiSrcs(phi) => phi,
Op::PhiSrcs(phi) => phi.deref_mut(),
_ => panic!("Expected to find the phi"),
})
}

View file

@ -459,20 +459,17 @@ fn legalize_instr(
}
// OpBreak and OpBSsy impose additional RA constraints
match &mut instr.op {
Op::Break(OpBreak {
bar_in, bar_out, ..
})
| Op::BSSy(OpBSSy {
bar_in, bar_out, ..
}) => {
let bar_in_ssa = bar_in.src_ref.as_ssa().unwrap();
if !bar_out.is_none() && bl.is_live_after_ip(&bar_in_ssa[0], ip) {
let gpr = b.bmov_to_gpr(bar_in.clone());
let tmp = b.bmov_to_bar(gpr.into());
*bar_in = tmp.into();
}
let mut legalize_break_bssy = |bar_in: &mut Src, bar_out: &mut Dst| {
let bar_in_ssa = bar_in.src_ref.as_ssa().unwrap();
if !bar_out.is_none() && bl.is_live_after_ip(&bar_in_ssa[0], ip) {
let gpr = b.bmov_to_gpr(bar_in.clone());
let tmp = b.bmov_to_bar(gpr.into());
*bar_in = tmp.into();
}
};
match &mut instr.op {
Op::Break(op) => legalize_break_bssy(&mut op.bar_in, &mut op.bar_out),
Op::BSSy(op) => legalize_break_bssy(&mut op.bar_in, &mut op.bar_out),
_ => (),
}

View file

@ -260,7 +260,7 @@ impl LowerCopySwap {
.into(),
}));
}
self.lower_r2ur(&mut b, r2ur);
self.lower_r2ur(&mut b, *r2ur);
b.into_mapped_instrs()
}
Op::Copy(copy) => {
@ -272,7 +272,7 @@ impl LowerCopySwap {
.into(),
}));
}
self.lower_copy(&mut b, copy);
self.lower_copy(&mut b, *copy);
b.into_mapped_instrs()
}
Op::Swap(swap) => {
@ -284,7 +284,7 @@ impl LowerCopySwap {
.into(),
}));
}
self.lower_swap(&mut b, swap);
self.lower_swap(&mut b, *swap);
b.into_mapped_instrs()
}
_ => MappedInstrs::One(instr),

View file

@ -265,7 +265,7 @@ impl Shader<'_> {
.into(),
}));
}
match lower_par_copy(pc, sm) {
match lower_par_copy(*pc, sm) {
MappedInstrs::None => {
if let Some(instr) = instrs.pop() {
MappedInstrs::One(instr)

View file

@ -87,10 +87,13 @@ fn jump_thread(func: &mut Function) -> bool {
.get(&target_label)
.map(clone_branch)
.unwrap_or_else(|| {
Op::Bra(OpBra {
target: target_label,
cond: true.into(),
})
Op::Bra(
OpBra {
target: target_label,
cond: true.into(),
}
.into(),
)
});
replacements.insert(block_label, replacement);
}
@ -139,10 +142,8 @@ fn rewrite_cfg(func: &mut Function) {
fn opt_fall_through(func: &mut Function) {
for i in 0..func.blocks.len() - 1 {
let remove_last_instr = match func.blocks[i].branch() {
Some(b) => match b.op {
Op::Bra(OpBra { target, .. }) => {
target == func.blocks[i + 1].label
}
Some(b) => match &b.op {
Op::Bra(bra) => bra.target == func.blocks[i + 1].label,
_ => false,
},
None => false,