mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 15:10:10 +01:00
nak: Add a new OpFSwz and use it for derivatives on Kepler
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34540>
This commit is contained in:
parent
309c48cbb7
commit
ddbf2ec883
3 changed files with 155 additions and 45 deletions
|
|
@ -2293,31 +2293,48 @@ impl<'a> ShaderFromNir<'a> {
|
|||
|
||||
assert!(intrin.def.bit_size() == 32);
|
||||
let ftype = FloatType::F32;
|
||||
let scratch = b.alloc_ssa(RegFile::GPR, 1);
|
||||
|
||||
b.push_op(OpShfl {
|
||||
dst: scratch[0].into(),
|
||||
in_bounds: Dst::None,
|
||||
src: self.get_src(&srcs[0]),
|
||||
lane: 1_u32.into(),
|
||||
c: (0x3_u32 | 0x1c_u32 << 8).into(),
|
||||
op: ShflOp::Bfly,
|
||||
});
|
||||
|
||||
let dst = b.alloc_ssa(RegFile::GPR, 1);
|
||||
|
||||
b.push_op(OpFSwzAdd {
|
||||
dst: dst[0].into(),
|
||||
srcs: [scratch[0].into(), self.get_src(&srcs[0])],
|
||||
ops: [
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubRight,
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubRight,
|
||||
],
|
||||
rnd_mode: self.float_ctl[ftype].rnd_mode,
|
||||
ftz: self.float_ctl[ftype].ftz,
|
||||
});
|
||||
if self.sm.sm() >= 50 {
|
||||
let scratch = b.alloc_ssa(RegFile::GPR, 1);
|
||||
|
||||
b.push_op(OpShfl {
|
||||
dst: scratch[0].into(),
|
||||
in_bounds: Dst::None,
|
||||
src: self.get_src(&srcs[0]),
|
||||
lane: 1_u32.into(),
|
||||
c: (0x3_u32 | 0x1c_u32 << 8).into(),
|
||||
op: ShflOp::Bfly,
|
||||
});
|
||||
|
||||
b.push_op(OpFSwzAdd {
|
||||
dst: dst[0].into(),
|
||||
srcs: [scratch[0].into(), self.get_src(&srcs[0])],
|
||||
ops: [
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubRight,
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubRight,
|
||||
],
|
||||
rnd_mode: self.float_ctl[ftype].rnd_mode,
|
||||
ftz: self.float_ctl[ftype].ftz,
|
||||
});
|
||||
} else {
|
||||
b.push_op(OpFSwz {
|
||||
dst: dst[0].into(),
|
||||
srcs: [self.get_src(&srcs[0]), self.get_src(&srcs[0])],
|
||||
ops: [
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubRight,
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubRight,
|
||||
],
|
||||
rnd_mode: self.float_ctl[ftype].rnd_mode,
|
||||
ftz: self.float_ctl[ftype].ftz,
|
||||
shuffle: FSwzShuffle::SwapHorizontal,
|
||||
});
|
||||
}
|
||||
|
||||
self.set_dst(&intrin.def, dst);
|
||||
}
|
||||
|
|
@ -2328,31 +2345,47 @@ impl<'a> ShaderFromNir<'a> {
|
|||
|
||||
assert!(intrin.def.bit_size() == 32);
|
||||
let ftype = FloatType::F32;
|
||||
let scratch = b.alloc_ssa(RegFile::GPR, 1);
|
||||
|
||||
b.push_op(OpShfl {
|
||||
dst: scratch[0].into(),
|
||||
in_bounds: Dst::None,
|
||||
src: self.get_src(&srcs[0]),
|
||||
lane: 2_u32.into(),
|
||||
c: (0x3_u32 | 0x1c_u32 << 8).into(),
|
||||
op: ShflOp::Bfly,
|
||||
});
|
||||
|
||||
let dst = b.alloc_ssa(RegFile::GPR, 1);
|
||||
|
||||
b.push_op(OpFSwzAdd {
|
||||
dst: dst[0].into(),
|
||||
srcs: [scratch[0].into(), self.get_src(&srcs[0])],
|
||||
ops: [
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubRight,
|
||||
FSwzAddOp::SubRight,
|
||||
],
|
||||
rnd_mode: self.float_ctl[ftype].rnd_mode,
|
||||
ftz: self.float_ctl[ftype].ftz,
|
||||
});
|
||||
if self.sm.sm() >= 50 {
|
||||
let scratch = b.alloc_ssa(RegFile::GPR, 1);
|
||||
|
||||
b.push_op(OpShfl {
|
||||
dst: scratch[0].into(),
|
||||
in_bounds: Dst::None,
|
||||
src: self.get_src(&srcs[0]),
|
||||
lane: 2_u32.into(),
|
||||
c: (0x3_u32 | 0x1c_u32 << 8).into(),
|
||||
op: ShflOp::Bfly,
|
||||
});
|
||||
|
||||
b.push_op(OpFSwzAdd {
|
||||
dst: dst[0].into(),
|
||||
srcs: [scratch[0].into(), self.get_src(&srcs[0])],
|
||||
ops: [
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubRight,
|
||||
FSwzAddOp::SubRight,
|
||||
],
|
||||
rnd_mode: self.float_ctl[ftype].rnd_mode,
|
||||
ftz: self.float_ctl[ftype].ftz,
|
||||
});
|
||||
} else {
|
||||
b.push_op(OpFSwz {
|
||||
dst: dst[0].into(),
|
||||
srcs: [self.get_src(&srcs[0]), self.get_src(&srcs[0])],
|
||||
ops: [
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubRight,
|
||||
FSwzAddOp::SubRight,
|
||||
],
|
||||
rnd_mode: self.float_ctl[ftype].rnd_mode,
|
||||
ftz: self.float_ctl[ftype].ftz,
|
||||
shuffle: FSwzShuffle::SwapVertical,
|
||||
});
|
||||
}
|
||||
|
||||
self.set_dst(&intrin.def, dst);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2869,6 +2869,80 @@ impl DisplayOp for OpFSwzAdd {
|
|||
}
|
||||
impl_display_for_op!(OpFSwzAdd);
|
||||
|
||||
/// Describes where the second src is taken before doing the ops
|
||||
#[allow(dead_code)]
|
||||
#[derive(Clone, Copy, Eq, PartialEq)]
|
||||
pub enum FSwzShuffle {
|
||||
Quad0,
|
||||
Quad1,
|
||||
Quad2,
|
||||
Quad3,
|
||||
// swap [0, 1] and [2, 3]
|
||||
SwapHorizontal,
|
||||
// swap [0, 2] and [1, 3]
|
||||
SwapVertical,
|
||||
}
|
||||
|
||||
impl fmt::Display for FSwzShuffle {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
FSwzShuffle::Quad0 => write!(f, ".0000"),
|
||||
FSwzShuffle::Quad1 => write!(f, ".1111"),
|
||||
FSwzShuffle::Quad2 => write!(f, ".2222"),
|
||||
FSwzShuffle::Quad3 => write!(f, ".3333"),
|
||||
FSwzShuffle::SwapHorizontal => write!(f, ".1032"),
|
||||
FSwzShuffle::SwapVertical => write!(f, ".2301"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Op only present in Kepler and older
|
||||
/// It first does a shuffle on the second src and then applies
|
||||
/// src0 op src1, each thread on a quad might do a different operation.
|
||||
///
|
||||
/// This is used to encode ddx/ddy
|
||||
/// ex: ddx
|
||||
/// src1 = shuffle swap horizontal src1
|
||||
/// ops = [sub, subr, sub, subr]
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpFSwz {
|
||||
#[dst_type(F32)]
|
||||
pub dst: Dst,
|
||||
|
||||
#[src_type(GPR)]
|
||||
pub srcs: [Src; 2],
|
||||
|
||||
pub rnd_mode: FRndMode,
|
||||
pub ftz: bool,
|
||||
pub shuffle: FSwzShuffle,
|
||||
|
||||
pub ops: [FSwzAddOp; 4],
|
||||
}
|
||||
|
||||
impl DisplayOp for OpFSwz {
|
||||
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "fswz{}", self.shuffle)?;
|
||||
if self.rnd_mode != FRndMode::NearestEven {
|
||||
write!(f, "{}", self.rnd_mode)?;
|
||||
}
|
||||
if self.ftz {
|
||||
write!(f, ".ftz")?;
|
||||
}
|
||||
write!(
|
||||
f,
|
||||
" {} {} [{}, {}, {}, {}]",
|
||||
self.srcs[0],
|
||||
self.srcs[1],
|
||||
self.ops[0],
|
||||
self.ops[1],
|
||||
self.ops[2],
|
||||
self.ops[3],
|
||||
)
|
||||
}
|
||||
}
|
||||
impl_display_for_op!(OpFSwz);
|
||||
|
||||
pub enum RroOp {
|
||||
SinCos,
|
||||
Exp2,
|
||||
|
|
@ -6472,6 +6546,7 @@ pub enum Op {
|
|||
FSet(OpFSet),
|
||||
FSetP(OpFSetP),
|
||||
FSwzAdd(OpFSwzAdd),
|
||||
FSwz(OpFSwz),
|
||||
DAdd(OpDAdd),
|
||||
DFma(OpDFma),
|
||||
DMnMx(OpDMnMx),
|
||||
|
|
@ -6621,6 +6696,7 @@ impl Op {
|
|||
| Op::HSet2(_)
|
||||
| Op::HSetP2(_)
|
||||
| Op::HMnMx2(_)
|
||||
| Op::FSwz(_)
|
||||
| Op::FSwzAdd(_) => true,
|
||||
|
||||
// Multi-function unit is variable latency
|
||||
|
|
|
|||
|
|
@ -96,6 +96,7 @@ pub fn side_effect_type(op: &Op) -> SideEffect {
|
|||
| Op::HSet2(_)
|
||||
| Op::HSetP2(_)
|
||||
| Op::HMnMx2(_)
|
||||
| Op::FSwz(_)
|
||||
| Op::FSwzAdd(_) => SideEffect::None,
|
||||
|
||||
// Multi-function unit
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue