mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-28 14:40:10 +01:00
nak: Place most Op structs in Box<>
Between this and the previous few commits, the Box<> has moved from
outside of Instr to inside the Op enum. This provides a few benefits:
1. We no longer need to allocate for the worst-case Op on every
instruction. For example, OpIAdd3X is 232 bytes and OpBra is 40
bytes, which means we can save some memory on OpBra if we only
allocate those 40 bytes.
2. The Op discriminant is available without chasing a pointer. The type
of op is probably the most frequently used field, and this should
benefit most passes that care about what type of Op they're
handling.
3. Small Ops don't need any indirection at all. For example, OpPBk is
only 4 bytes, which means we can just store it directly in less
space than a pointer.
Compared to Box<Instr>, this is around a 1.4% shader compile time
improvement.
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Reviewed-by: Seán de Búrca <leftmostcat@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37315>
This commit is contained in:
parent
9257f5607f
commit
3c32ff7fa9
7 changed files with 165 additions and 153 deletions
|
|
@ -1134,9 +1134,15 @@ impl AssignRegsBlock {
|
|||
Some(instr)
|
||||
}
|
||||
}
|
||||
Op::Pin(OpPin { src, dst }) | Op::Unpin(OpUnpin { src, dst }) => {
|
||||
Op::Pin(_) | Op::Unpin(_) => {
|
||||
assert!(instr.pred.is_true());
|
||||
|
||||
let (src, dst) = match &instr.op {
|
||||
Op::Pin(pin) => (&pin.src, &pin.dst),
|
||||
Op::Unpin(unpin) => (&unpin.src, &unpin.dst),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
// These basically act as a vector version of OpCopy except that
|
||||
// they only work on SSA values and we pin the destination if
|
||||
// it's OpPin.
|
||||
|
|
|
|||
|
|
@ -3969,10 +3969,13 @@ impl<'a> ShaderFromNir<'a> {
|
|||
Op::Exit(OpExit {})
|
||||
} else {
|
||||
self.cfg.add_edge(nb.index, target.index);
|
||||
Op::Bra(OpBra {
|
||||
target: self.get_block_label(target),
|
||||
cond: true.into(),
|
||||
})
|
||||
Op::Bra(
|
||||
OpBra {
|
||||
target: self.get_block_label(target),
|
||||
cond: true.into(),
|
||||
}
|
||||
.into(),
|
||||
)
|
||||
};
|
||||
b.predicate(pred).push_op(op);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7839,101 +7839,101 @@ impl fmt::Display for OpAnnotate {
|
|||
|
||||
#[derive(DisplayOp, DstsAsSlice, SrcsAsSlice, FromVariants)]
|
||||
pub enum Op {
|
||||
FAdd(OpFAdd),
|
||||
FFma(OpFFma),
|
||||
FMnMx(OpFMnMx),
|
||||
FMul(OpFMul),
|
||||
Rro(OpRro),
|
||||
MuFu(OpMuFu),
|
||||
FSet(OpFSet),
|
||||
FSetP(OpFSetP),
|
||||
FSwzAdd(OpFSwzAdd),
|
||||
FSwz(OpFSwz),
|
||||
DAdd(OpDAdd),
|
||||
DFma(OpDFma),
|
||||
DMnMx(OpDMnMx),
|
||||
DMul(OpDMul),
|
||||
DSetP(OpDSetP),
|
||||
HAdd2(OpHAdd2),
|
||||
HFma2(OpHFma2),
|
||||
HMul2(OpHMul2),
|
||||
HSet2(OpHSet2),
|
||||
HSetP2(OpHSetP2),
|
||||
Imma(OpImma),
|
||||
Hmma(OpHmma),
|
||||
Ldsm(OpLdsm),
|
||||
HMnMx2(OpHMnMx2),
|
||||
BMsk(OpBMsk),
|
||||
BRev(OpBRev),
|
||||
Bfe(OpBfe),
|
||||
Flo(OpFlo),
|
||||
IAbs(OpIAbs),
|
||||
IAdd2(OpIAdd2),
|
||||
IAdd2X(OpIAdd2X),
|
||||
IAdd3(OpIAdd3),
|
||||
IAdd3X(OpIAdd3X),
|
||||
IDp4(OpIDp4),
|
||||
IMad(OpIMad),
|
||||
IMad64(OpIMad64),
|
||||
IMul(OpIMul),
|
||||
IMnMx(OpIMnMx),
|
||||
ISetP(OpISetP),
|
||||
Lea(OpLea),
|
||||
LeaX(OpLeaX),
|
||||
Lop2(OpLop2),
|
||||
Lop3(OpLop3),
|
||||
PopC(OpPopC),
|
||||
Shf(OpShf),
|
||||
Shl(OpShl),
|
||||
Shr(OpShr),
|
||||
F2F(OpF2F),
|
||||
F2FP(OpF2FP),
|
||||
F2I(OpF2I),
|
||||
I2F(OpI2F),
|
||||
I2I(OpI2I),
|
||||
FRnd(OpFRnd),
|
||||
Mov(OpMov),
|
||||
Prmt(OpPrmt),
|
||||
Sel(OpSel),
|
||||
Shfl(OpShfl),
|
||||
PLop3(OpPLop3),
|
||||
PSetP(OpPSetP),
|
||||
R2UR(OpR2UR),
|
||||
Redux(OpRedux),
|
||||
Tex(OpTex),
|
||||
Tld(OpTld),
|
||||
Tld4(OpTld4),
|
||||
Tmml(OpTmml),
|
||||
Txd(OpTxd),
|
||||
Txq(OpTxq),
|
||||
SuLd(OpSuLd),
|
||||
SuSt(OpSuSt),
|
||||
SuAtom(OpSuAtom),
|
||||
SuClamp(OpSuClamp),
|
||||
SuBfm(OpSuBfm),
|
||||
SuEau(OpSuEau),
|
||||
IMadSp(OpIMadSp),
|
||||
SuLdGa(OpSuLdGa),
|
||||
SuStGa(OpSuStGa),
|
||||
Ld(OpLd),
|
||||
Ldc(OpLdc),
|
||||
LdSharedLock(OpLdSharedLock),
|
||||
St(OpSt),
|
||||
StSCheckUnlock(OpStSCheckUnlock),
|
||||
Atom(OpAtom),
|
||||
AL2P(OpAL2P),
|
||||
ALd(OpALd),
|
||||
ASt(OpASt),
|
||||
Ipa(OpIpa),
|
||||
LdTram(OpLdTram),
|
||||
CCtl(OpCCtl),
|
||||
MemBar(OpMemBar),
|
||||
BClear(OpBClear),
|
||||
BMov(OpBMov),
|
||||
Break(OpBreak),
|
||||
BSSy(OpBSSy),
|
||||
BSync(OpBSync),
|
||||
Bra(OpBra),
|
||||
FAdd(Box<OpFAdd>),
|
||||
FFma(Box<OpFFma>),
|
||||
FMnMx(Box<OpFMnMx>),
|
||||
FMul(Box<OpFMul>),
|
||||
Rro(Box<OpRro>),
|
||||
MuFu(Box<OpMuFu>),
|
||||
FSet(Box<OpFSet>),
|
||||
FSetP(Box<OpFSetP>),
|
||||
FSwzAdd(Box<OpFSwzAdd>),
|
||||
FSwz(Box<OpFSwz>),
|
||||
DAdd(Box<OpDAdd>),
|
||||
DFma(Box<OpDFma>),
|
||||
DMnMx(Box<OpDMnMx>),
|
||||
DMul(Box<OpDMul>),
|
||||
DSetP(Box<OpDSetP>),
|
||||
HAdd2(Box<OpHAdd2>),
|
||||
HFma2(Box<OpHFma2>),
|
||||
HMul2(Box<OpHMul2>),
|
||||
HSet2(Box<OpHSet2>),
|
||||
HSetP2(Box<OpHSetP2>),
|
||||
Imma(Box<OpImma>),
|
||||
Hmma(Box<OpHmma>),
|
||||
Ldsm(Box<OpLdsm>),
|
||||
HMnMx2(Box<OpHMnMx2>),
|
||||
BMsk(Box<OpBMsk>),
|
||||
BRev(Box<OpBRev>),
|
||||
Bfe(Box<OpBfe>),
|
||||
Flo(Box<OpFlo>),
|
||||
IAbs(Box<OpIAbs>),
|
||||
IAdd2(Box<OpIAdd2>),
|
||||
IAdd2X(Box<OpIAdd2X>),
|
||||
IAdd3(Box<OpIAdd3>),
|
||||
IAdd3X(Box<OpIAdd3X>),
|
||||
IDp4(Box<OpIDp4>),
|
||||
IMad(Box<OpIMad>),
|
||||
IMad64(Box<OpIMad64>),
|
||||
IMul(Box<OpIMul>),
|
||||
IMnMx(Box<OpIMnMx>),
|
||||
ISetP(Box<OpISetP>),
|
||||
Lea(Box<OpLea>),
|
||||
LeaX(Box<OpLeaX>),
|
||||
Lop2(Box<OpLop2>),
|
||||
Lop3(Box<OpLop3>),
|
||||
PopC(Box<OpPopC>),
|
||||
Shf(Box<OpShf>),
|
||||
Shl(Box<OpShl>),
|
||||
Shr(Box<OpShr>),
|
||||
F2F(Box<OpF2F>),
|
||||
F2FP(Box<OpF2FP>),
|
||||
F2I(Box<OpF2I>),
|
||||
I2F(Box<OpI2F>),
|
||||
I2I(Box<OpI2I>),
|
||||
FRnd(Box<OpFRnd>),
|
||||
Mov(Box<OpMov>),
|
||||
Prmt(Box<OpPrmt>),
|
||||
Sel(Box<OpSel>),
|
||||
Shfl(Box<OpShfl>),
|
||||
PLop3(Box<OpPLop3>),
|
||||
PSetP(Box<OpPSetP>),
|
||||
R2UR(Box<OpR2UR>),
|
||||
Redux(Box<OpRedux>),
|
||||
Tex(Box<OpTex>),
|
||||
Tld(Box<OpTld>),
|
||||
Tld4(Box<OpTld4>),
|
||||
Tmml(Box<OpTmml>),
|
||||
Txd(Box<OpTxd>),
|
||||
Txq(Box<OpTxq>),
|
||||
SuLd(Box<OpSuLd>),
|
||||
SuSt(Box<OpSuSt>),
|
||||
SuAtom(Box<OpSuAtom>),
|
||||
SuClamp(Box<OpSuClamp>),
|
||||
SuBfm(Box<OpSuBfm>),
|
||||
SuEau(Box<OpSuEau>),
|
||||
IMadSp(Box<OpIMadSp>),
|
||||
SuLdGa(Box<OpSuLdGa>),
|
||||
SuStGa(Box<OpSuStGa>),
|
||||
Ld(Box<OpLd>),
|
||||
Ldc(Box<OpLdc>),
|
||||
LdSharedLock(Box<OpLdSharedLock>),
|
||||
St(Box<OpSt>),
|
||||
StSCheckUnlock(Box<OpStSCheckUnlock>),
|
||||
Atom(Box<OpAtom>),
|
||||
AL2P(Box<OpAL2P>),
|
||||
ALd(Box<OpALd>),
|
||||
ASt(Box<OpASt>),
|
||||
Ipa(Box<OpIpa>),
|
||||
LdTram(Box<OpLdTram>),
|
||||
CCtl(Box<OpCCtl>),
|
||||
MemBar(Box<OpMemBar>),
|
||||
BClear(Box<OpBClear>),
|
||||
BMov(Box<OpBMov>),
|
||||
Break(Box<OpBreak>),
|
||||
BSSy(Box<OpBSSy>),
|
||||
BSync(Box<OpBSync>),
|
||||
Bra(Box<OpBra>),
|
||||
SSy(OpSSy),
|
||||
Sync(OpSync),
|
||||
Brk(OpBrk),
|
||||
|
|
@ -7941,34 +7941,39 @@ pub enum Op {
|
|||
Cont(OpCont),
|
||||
PCnt(OpPCnt),
|
||||
Exit(OpExit),
|
||||
WarpSync(OpWarpSync),
|
||||
Bar(OpBar),
|
||||
TexDepBar(OpTexDepBar),
|
||||
CS2R(OpCS2R),
|
||||
Isberd(OpIsberd),
|
||||
ViLd(OpViLd),
|
||||
Kill(OpKill),
|
||||
WarpSync(Box<OpWarpSync>),
|
||||
Bar(Box<OpBar>),
|
||||
TexDepBar(Box<OpTexDepBar>),
|
||||
CS2R(Box<OpCS2R>),
|
||||
Isberd(Box<OpIsberd>),
|
||||
ViLd(Box<OpViLd>),
|
||||
Kill(Box<OpKill>),
|
||||
Nop(OpNop),
|
||||
PixLd(OpPixLd),
|
||||
S2R(OpS2R),
|
||||
Vote(OpVote),
|
||||
Match(OpMatch),
|
||||
Undef(OpUndef),
|
||||
SrcBar(OpSrcBar),
|
||||
PhiSrcs(OpPhiSrcs),
|
||||
PhiDsts(OpPhiDsts),
|
||||
Copy(OpCopy),
|
||||
Pin(OpPin),
|
||||
Unpin(OpUnpin),
|
||||
Swap(OpSwap),
|
||||
ParCopy(OpParCopy),
|
||||
RegOut(OpRegOut),
|
||||
Out(OpOut),
|
||||
OutFinal(OpOutFinal),
|
||||
Annotate(OpAnnotate),
|
||||
PixLd(Box<OpPixLd>),
|
||||
S2R(Box<OpS2R>),
|
||||
Vote(Box<OpVote>),
|
||||
Match(Box<OpMatch>),
|
||||
Undef(Box<OpUndef>),
|
||||
SrcBar(Box<OpSrcBar>),
|
||||
PhiSrcs(Box<OpPhiSrcs>),
|
||||
PhiDsts(Box<OpPhiDsts>),
|
||||
Copy(Box<OpCopy>),
|
||||
Pin(Box<OpPin>),
|
||||
Unpin(Box<OpUnpin>),
|
||||
Swap(Box<OpSwap>),
|
||||
ParCopy(Box<OpParCopy>),
|
||||
RegOut(Box<OpRegOut>),
|
||||
Out(Box<OpOut>),
|
||||
OutFinal(Box<OpOutFinal>),
|
||||
Annotate(Box<OpAnnotate>),
|
||||
}
|
||||
impl_display_for_op!(Op);
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
const _: () = {
|
||||
debug_assert!(size_of::<Op>() == 16);
|
||||
};
|
||||
|
||||
impl Op {
|
||||
pub fn is_branch(&self) -> bool {
|
||||
match self {
|
||||
|
|
@ -8620,7 +8625,7 @@ impl BasicBlock {
|
|||
|
||||
pub fn phi_dsts(&self) -> Option<&OpPhiDsts> {
|
||||
self.phi_dsts_ip().map(|ip| match &self.instrs[ip].op {
|
||||
Op::PhiDsts(phi) => phi,
|
||||
Op::PhiDsts(phi) => phi.deref(),
|
||||
_ => panic!("Expected to find the phi"),
|
||||
})
|
||||
}
|
||||
|
|
@ -8628,7 +8633,7 @@ impl BasicBlock {
|
|||
#[allow(dead_code)]
|
||||
pub fn phi_dsts_mut(&mut self) -> Option<&mut OpPhiDsts> {
|
||||
self.phi_dsts_ip().map(|ip| match &mut self.instrs[ip].op {
|
||||
Op::PhiDsts(phi) => phi,
|
||||
Op::PhiDsts(phi) => phi.deref_mut(),
|
||||
_ => panic!("Expected to find the phi"),
|
||||
})
|
||||
}
|
||||
|
|
@ -8646,14 +8651,14 @@ impl BasicBlock {
|
|||
}
|
||||
pub fn phi_srcs(&self) -> Option<&OpPhiSrcs> {
|
||||
self.phi_srcs_ip().map(|ip| match &self.instrs[ip].op {
|
||||
Op::PhiSrcs(phi) => phi,
|
||||
Op::PhiSrcs(phi) => phi.deref(),
|
||||
_ => panic!("Expected to find the phi"),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn phi_srcs_mut(&mut self) -> Option<&mut OpPhiSrcs> {
|
||||
self.phi_srcs_ip().map(|ip| match &mut self.instrs[ip].op {
|
||||
Op::PhiSrcs(phi) => phi,
|
||||
Op::PhiSrcs(phi) => phi.deref_mut(),
|
||||
_ => panic!("Expected to find the phi"),
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -459,20 +459,17 @@ fn legalize_instr(
|
|||
}
|
||||
|
||||
// OpBreak and OpBSsy impose additional RA constraints
|
||||
match &mut instr.op {
|
||||
Op::Break(OpBreak {
|
||||
bar_in, bar_out, ..
|
||||
})
|
||||
| Op::BSSy(OpBSSy {
|
||||
bar_in, bar_out, ..
|
||||
}) => {
|
||||
let bar_in_ssa = bar_in.src_ref.as_ssa().unwrap();
|
||||
if !bar_out.is_none() && bl.is_live_after_ip(&bar_in_ssa[0], ip) {
|
||||
let gpr = b.bmov_to_gpr(bar_in.clone());
|
||||
let tmp = b.bmov_to_bar(gpr.into());
|
||||
*bar_in = tmp.into();
|
||||
}
|
||||
let mut legalize_break_bssy = |bar_in: &mut Src, bar_out: &mut Dst| {
|
||||
let bar_in_ssa = bar_in.src_ref.as_ssa().unwrap();
|
||||
if !bar_out.is_none() && bl.is_live_after_ip(&bar_in_ssa[0], ip) {
|
||||
let gpr = b.bmov_to_gpr(bar_in.clone());
|
||||
let tmp = b.bmov_to_bar(gpr.into());
|
||||
*bar_in = tmp.into();
|
||||
}
|
||||
};
|
||||
match &mut instr.op {
|
||||
Op::Break(op) => legalize_break_bssy(&mut op.bar_in, &mut op.bar_out),
|
||||
Op::BSSy(op) => legalize_break_bssy(&mut op.bar_in, &mut op.bar_out),
|
||||
_ => (),
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -260,7 +260,7 @@ impl LowerCopySwap {
|
|||
.into(),
|
||||
}));
|
||||
}
|
||||
self.lower_r2ur(&mut b, r2ur);
|
||||
self.lower_r2ur(&mut b, *r2ur);
|
||||
b.into_mapped_instrs()
|
||||
}
|
||||
Op::Copy(copy) => {
|
||||
|
|
@ -272,7 +272,7 @@ impl LowerCopySwap {
|
|||
.into(),
|
||||
}));
|
||||
}
|
||||
self.lower_copy(&mut b, copy);
|
||||
self.lower_copy(&mut b, *copy);
|
||||
b.into_mapped_instrs()
|
||||
}
|
||||
Op::Swap(swap) => {
|
||||
|
|
@ -284,7 +284,7 @@ impl LowerCopySwap {
|
|||
.into(),
|
||||
}));
|
||||
}
|
||||
self.lower_swap(&mut b, swap);
|
||||
self.lower_swap(&mut b, *swap);
|
||||
b.into_mapped_instrs()
|
||||
}
|
||||
_ => MappedInstrs::One(instr),
|
||||
|
|
|
|||
|
|
@ -265,7 +265,7 @@ impl Shader<'_> {
|
|||
.into(),
|
||||
}));
|
||||
}
|
||||
match lower_par_copy(pc, sm) {
|
||||
match lower_par_copy(*pc, sm) {
|
||||
MappedInstrs::None => {
|
||||
if let Some(instr) = instrs.pop() {
|
||||
MappedInstrs::One(instr)
|
||||
|
|
|
|||
|
|
@ -87,10 +87,13 @@ fn jump_thread(func: &mut Function) -> bool {
|
|||
.get(&target_label)
|
||||
.map(clone_branch)
|
||||
.unwrap_or_else(|| {
|
||||
Op::Bra(OpBra {
|
||||
target: target_label,
|
||||
cond: true.into(),
|
||||
})
|
||||
Op::Bra(
|
||||
OpBra {
|
||||
target: target_label,
|
||||
cond: true.into(),
|
||||
}
|
||||
.into(),
|
||||
)
|
||||
});
|
||||
replacements.insert(block_label, replacement);
|
||||
}
|
||||
|
|
@ -139,10 +142,8 @@ fn rewrite_cfg(func: &mut Function) {
|
|||
fn opt_fall_through(func: &mut Function) {
|
||||
for i in 0..func.blocks.len() - 1 {
|
||||
let remove_last_instr = match func.blocks[i].branch() {
|
||||
Some(b) => match b.op {
|
||||
Op::Bra(OpBra { target, .. }) => {
|
||||
target == func.blocks[i + 1].label
|
||||
}
|
||||
Some(b) => match &b.op {
|
||||
Op::Bra(bra) => bra.target == func.blocks[i + 1].label,
|
||||
_ => false,
|
||||
},
|
||||
None => false,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue