mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 17:20:10 +01:00
nak: Add support for fddx and fddy
This uses SHFL in combination with FSWZADD. Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24998>
This commit is contained in:
parent
e174fc9ab3
commit
04911df940
4 changed files with 288 additions and 1 deletions
|
|
@ -515,6 +515,33 @@ impl SM75Instr {
|
|||
self.set_pred_src(87..90, 90, op.accum);
|
||||
}
|
||||
|
||||
fn encode_fswzadd(&mut self, op: &OpFSwzAdd) {
|
||||
self.set_opcode(0x822);
|
||||
self.set_dst(op.dst);
|
||||
|
||||
self.set_reg_src(24..32, op.srcs[0]);
|
||||
self.set_reg_src(64..72, op.srcs[1]);
|
||||
|
||||
let mut subop = 0x0_u8;
|
||||
|
||||
for (i, swz_op) in op.ops.iter().enumerate() {
|
||||
let swz_op = match swz_op {
|
||||
FSwzAddOp::Add => 0,
|
||||
FSwzAddOp::SubRight => 2,
|
||||
FSwzAddOp::SubLeft => 1,
|
||||
FSwzAddOp::MoveLeft => 3,
|
||||
};
|
||||
|
||||
subop |= swz_op << ((op.ops.len() - i - 1) * 2);
|
||||
}
|
||||
|
||||
self.set_field(32..40, subop);
|
||||
|
||||
self.set_bit(77, false); /* NDV */
|
||||
self.set_rnd_mode(78..80, op.rnd_mode);
|
||||
self.set_bit(80, false); /* TODO: FTZ */
|
||||
}
|
||||
|
||||
fn encode_mufu(&mut self, op: &OpMuFu) {
|
||||
self.encode_alu(
|
||||
0x108,
|
||||
|
|
@ -854,6 +881,54 @@ impl SM75Instr {
|
|||
self.set_pred_src(87..90, 90, op.cond);
|
||||
}
|
||||
|
||||
fn encode_shfl(&mut self, op: &OpShfl) {
|
||||
assert!(op.lane.src_mod.is_none());
|
||||
assert!(op.c.src_mod.is_none());
|
||||
|
||||
match &op.lane.src_ref {
|
||||
SrcRef::Reg(_) => match &op.c.src_ref {
|
||||
SrcRef::Reg(_) => {
|
||||
self.set_opcode(0x389);
|
||||
self.set_reg_src(32..40, op.lane);
|
||||
self.set_reg_src(64..72, op.c);
|
||||
}
|
||||
SrcRef::Imm32(imm_c) => {
|
||||
self.set_opcode(0x589);
|
||||
self.set_reg_src(32..40, op.lane);
|
||||
self.set_field(40..53, *imm_c);
|
||||
}
|
||||
_ => panic!("Invalid instruction form"),
|
||||
},
|
||||
SrcRef::Imm32(imm_lane) => match &op.c.src_ref {
|
||||
SrcRef::Reg(_) => {
|
||||
self.set_opcode(0x989);
|
||||
self.set_field(53..58, *imm_lane);
|
||||
self.set_reg_src(64..72, op.c);
|
||||
}
|
||||
SrcRef::Imm32(imm_c) => {
|
||||
self.set_opcode(0xf89);
|
||||
self.set_field(40..53, *imm_c);
|
||||
self.set_field(53..58, *imm_lane);
|
||||
}
|
||||
_ => panic!("Invalid instruction form"),
|
||||
},
|
||||
_ => panic!("Invalid instruction form"),
|
||||
};
|
||||
|
||||
self.set_dst(op.dst);
|
||||
self.set_pred_dst(81..84, Dst::None);
|
||||
self.set_reg_src(24..32, op.src);
|
||||
self.set_field(
|
||||
58..60,
|
||||
match op.op {
|
||||
ShflOp::Idx => 0_u8,
|
||||
ShflOp::Up => 1_u8,
|
||||
ShflOp::Down => 2_u8,
|
||||
ShflOp::Bfly => 3_u8,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
fn encode_plop3(&mut self, op: &OpPLop3) {
|
||||
self.set_opcode(0x81c);
|
||||
self.set_field(16..24, op.ops[1].lut);
|
||||
|
|
@ -1506,6 +1581,17 @@ impl SM75Instr {
|
|||
self.set_field(90..91, false); /* NOT */
|
||||
}
|
||||
|
||||
fn encode_warpsync(&mut self, op: &OpWarpSync) {
|
||||
self.encode_alu(
|
||||
0x148,
|
||||
None,
|
||||
ALUSrc::None,
|
||||
ALUSrc::Imm32(op.mask),
|
||||
ALUSrc::None,
|
||||
);
|
||||
self.set_pred_src(87..90, 90, SrcRef::True.into());
|
||||
}
|
||||
|
||||
fn encode_bar(&mut self, _op: &OpBar) {
|
||||
self.set_opcode(0x31d);
|
||||
|
||||
|
|
@ -1583,6 +1669,7 @@ impl SM75Instr {
|
|||
Op::FMul(op) => si.encode_fmul(&op),
|
||||
Op::FSet(op) => si.encode_fset(&op),
|
||||
Op::FSetP(op) => si.encode_fsetp(&op),
|
||||
Op::FSwzAdd(op) => si.encode_fswzadd(&op),
|
||||
Op::MuFu(op) => si.encode_mufu(&op),
|
||||
Op::Brev(op) => si.encode_brev(&op),
|
||||
Op::Flo(op) => si.encode_flo(&op),
|
||||
|
|
@ -1603,6 +1690,7 @@ impl SM75Instr {
|
|||
Op::Mov(op) => si.encode_mov(&op),
|
||||
Op::Prmt(op) => si.encode_prmt(&op),
|
||||
Op::Sel(op) => si.encode_sel(&op),
|
||||
Op::Shfl(op) => si.encode_shfl(&op),
|
||||
Op::PLop3(op) => si.encode_plop3(&op),
|
||||
Op::Tex(op) => si.encode_tex(&op),
|
||||
Op::Tld(op) => si.encode_tld(&op),
|
||||
|
|
@ -1624,6 +1712,7 @@ impl SM75Instr {
|
|||
Op::MemBar(op) => si.encode_membar(&op),
|
||||
Op::Bra(op) => si.encode_bra(&op, ip, block_offsets),
|
||||
Op::Exit(op) => si.encode_exit(&op),
|
||||
Op::WarpSync(op) => si.encode_warpsync(&op),
|
||||
Op::Bar(op) => si.encode_bar(&op),
|
||||
Op::CS2R(op) => si.encode_cs2r(&op),
|
||||
Op::Kill(op) => si.encode_kill(&op),
|
||||
|
|
|
|||
|
|
@ -804,6 +804,68 @@ impl<'a> ShaderFromNir<'a> {
|
|||
});
|
||||
dst
|
||||
}
|
||||
nir_op_fddx | nir_op_fddx_coarse | nir_op_fddx_fine => {
|
||||
// TODO: Real coarse derivatives
|
||||
|
||||
assert!(alu.def.bit_size() == 32);
|
||||
let scratch = b.alloc_ssa(RegFile::GPR, 1);
|
||||
|
||||
b.push_op(OpWarpSync { mask: u32::MAX });
|
||||
b.push_op(OpShfl {
|
||||
dst: scratch[0].into(),
|
||||
src: srcs[0],
|
||||
lane: Src::new_imm_u32(1),
|
||||
c: Src::new_imm_u32(0x3 | 0x1c << 8),
|
||||
op: ShflOp::Bfly,
|
||||
});
|
||||
|
||||
let dst = b.alloc_ssa(RegFile::GPR, 1);
|
||||
|
||||
b.push_op(OpFSwzAdd {
|
||||
dst: dst[0].into(),
|
||||
srcs: [scratch[0].into(), srcs[0]],
|
||||
ops: [
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubRight,
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubRight,
|
||||
],
|
||||
rnd_mode: FRndMode::NearestEven,
|
||||
});
|
||||
|
||||
dst
|
||||
}
|
||||
nir_op_fddy | nir_op_fddy_coarse | nir_op_fddy_fine => {
|
||||
// TODO: Real coarse derivatives
|
||||
|
||||
assert!(alu.def.bit_size() == 32);
|
||||
let scratch = b.alloc_ssa(RegFile::GPR, 1);
|
||||
|
||||
b.push_op(OpWarpSync { mask: u32::MAX });
|
||||
b.push_op(OpShfl {
|
||||
dst: scratch[0].into(),
|
||||
src: srcs[0],
|
||||
lane: Src::new_imm_u32(2),
|
||||
c: Src::new_imm_u32(0x3 | 0x1c << 8),
|
||||
op: ShflOp::Bfly,
|
||||
});
|
||||
|
||||
let dst = b.alloc_ssa(RegFile::GPR, 1);
|
||||
|
||||
b.push_op(OpFSwzAdd {
|
||||
dst: dst[0].into(),
|
||||
srcs: [scratch[0].into(), srcs[0]],
|
||||
ops: [
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubRight,
|
||||
FSwzAddOp::SubRight,
|
||||
],
|
||||
rnd_mode: FRndMode::NearestEven,
|
||||
});
|
||||
|
||||
dst
|
||||
}
|
||||
_ => panic!("Unsupported ALU instruction: {}", alu.info().name()),
|
||||
};
|
||||
self.set_dst(&alu.def, dst);
|
||||
|
|
|
|||
|
|
@ -1988,6 +1988,59 @@ impl fmt::Display for OpFSetP {
|
|||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Clone, Copy, Eq, PartialEq)]
|
||||
pub enum FSwzAddOp {
|
||||
Add,
|
||||
SubRight,
|
||||
SubLeft,
|
||||
MoveLeft,
|
||||
}
|
||||
|
||||
impl fmt::Display for FSwzAddOp {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
FSwzAddOp::Add => write!(f, "ADD"),
|
||||
FSwzAddOp::SubRight => write!(f, "SUBR"),
|
||||
FSwzAddOp::SubLeft => write!(f, "SUB"),
|
||||
FSwzAddOp::MoveLeft => write!(f, "MOV2"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpFSwzAdd {
|
||||
pub dst: Dst,
|
||||
|
||||
#[src_type(GPR)]
|
||||
pub srcs: [Src; 2],
|
||||
|
||||
pub rnd_mode: FRndMode,
|
||||
|
||||
pub ops: [FSwzAddOp; 4],
|
||||
}
|
||||
|
||||
impl fmt::Display for OpFSwzAdd {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "FSWZADD",)?;
|
||||
if self.rnd_mode != FRndMode::NearestEven {
|
||||
write!(f, ".{}", self.rnd_mode)?;
|
||||
}
|
||||
write!(
|
||||
f,
|
||||
" {} {{ {}, {} }} [{}, {}, {}, {}]",
|
||||
self.dst,
|
||||
self.srcs[0],
|
||||
self.srcs[1],
|
||||
self.ops[0],
|
||||
self.ops[1],
|
||||
self.ops[2],
|
||||
self.ops[3],
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Clone, Copy, Eq, PartialEq)]
|
||||
pub enum MuFuOp {
|
||||
|
|
@ -2299,6 +2352,26 @@ impl fmt::Display for OpLop3 {
|
|||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Clone, Copy, Eq, PartialEq)]
|
||||
pub enum ShflOp {
|
||||
Idx,
|
||||
Up,
|
||||
Down,
|
||||
Bfly,
|
||||
}
|
||||
|
||||
impl fmt::Display for ShflOp {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
ShflOp::Idx => write!(f, "IDX"),
|
||||
ShflOp::Up => write!(f, "UP"),
|
||||
ShflOp::Down => write!(f, "DOWN"),
|
||||
ShflOp::Bfly => write!(f, "BFLY"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpShf {
|
||||
|
|
@ -2619,6 +2692,33 @@ impl fmt::Display for OpSel {
|
|||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpShfl {
|
||||
pub dst: Dst,
|
||||
|
||||
#[src_type(SSA)]
|
||||
pub src: Src,
|
||||
|
||||
#[src_type(ALU)]
|
||||
pub lane: Src,
|
||||
|
||||
#[src_type(ALU)]
|
||||
pub c: Src,
|
||||
|
||||
pub op: ShflOp,
|
||||
}
|
||||
|
||||
impl fmt::Display for OpShfl {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"SHFL.{} {} {{ {}, {}, {} }}",
|
||||
self.op, self.dst, self.src, self.lane, self.c
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpPLop3 {
|
||||
|
|
@ -3254,6 +3354,18 @@ impl fmt::Display for OpExit {
|
|||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpWarpSync {
|
||||
pub mask: u32,
|
||||
}
|
||||
|
||||
impl fmt::Display for OpWarpSync {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "WARPSYNC 0x{:x}", self.mask)
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpBar {}
|
||||
|
|
@ -3651,6 +3763,7 @@ pub enum Op {
|
|||
MuFu(OpMuFu),
|
||||
FSet(OpFSet),
|
||||
FSetP(OpFSetP),
|
||||
FSwzAdd(OpFSwzAdd),
|
||||
DAdd(OpDAdd),
|
||||
Brev(OpBrev),
|
||||
Flo(OpFlo),
|
||||
|
|
@ -3672,6 +3785,7 @@ pub enum Op {
|
|||
Mov(OpMov),
|
||||
Prmt(OpPrmt),
|
||||
Sel(OpSel),
|
||||
Shfl(OpShfl),
|
||||
PLop3(OpPLop3),
|
||||
Tex(OpTex),
|
||||
Tld(OpTld),
|
||||
|
|
@ -3693,6 +3807,7 @@ pub enum Op {
|
|||
MemBar(OpMemBar),
|
||||
Bra(OpBra),
|
||||
Exit(OpExit),
|
||||
WarpSync(OpWarpSync),
|
||||
Bar(OpBar),
|
||||
CS2R(OpCS2R),
|
||||
Kill(OpKill),
|
||||
|
|
@ -4016,6 +4131,7 @@ impl Instr {
|
|||
| Op::Kill(_)
|
||||
| Op::Bra(_)
|
||||
| Op::Exit(_)
|
||||
| Op::WarpSync(_)
|
||||
| Op::Bar(_)
|
||||
| Op::FSOut(_) => false,
|
||||
_ => true,
|
||||
|
|
@ -4030,7 +4146,8 @@ impl Instr {
|
|||
| Op::FMnMx(_)
|
||||
| Op::FMul(_)
|
||||
| Op::FSet(_)
|
||||
| Op::FSetP(_) => true,
|
||||
| Op::FSetP(_)
|
||||
| Op::FSwzAdd(_) => true,
|
||||
|
||||
// Multi-function unit is variable latency
|
||||
Op::MuFu(_) => false,
|
||||
|
|
@ -4056,6 +4173,7 @@ impl Instr {
|
|||
|
||||
// Move ops
|
||||
Op::Mov(_) | Op::Prmt(_) | Op::Sel(_) => true,
|
||||
Op::Shfl(_) => false,
|
||||
|
||||
// Predicate ops
|
||||
Op::PLop3(_) => true,
|
||||
|
|
@ -4084,6 +4202,7 @@ impl Instr {
|
|||
|
||||
// Control-flow ops
|
||||
Op::Bra(_) | Op::Exit(_) => true,
|
||||
Op::WarpSync(_) => false,
|
||||
|
||||
// Miscellaneous ops
|
||||
Op::Bar(_)
|
||||
|
|
|
|||
|
|
@ -46,6 +46,13 @@ fn copy_src(b: &mut impl SSABuilder, src: &mut Src, file: RegFile) {
|
|||
src.src_ref = val.into();
|
||||
}
|
||||
|
||||
fn copy_src_if_cbuf(b: &mut impl SSABuilder, src: &mut Src, file: RegFile) {
|
||||
match src.src_ref {
|
||||
SrcRef::CBuf(_) => copy_src(b, src, file),
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
fn copy_src_if_not_reg(b: &mut impl SSABuilder, src: &mut Src, file: RegFile) {
|
||||
if !src_is_reg(&src) {
|
||||
copy_src(b, src, file);
|
||||
|
|
@ -232,6 +239,16 @@ fn legalize_instr(b: &mut impl SSABuilder, instr: &mut Instr) {
|
|||
copy_src_if_not_reg(b, src0, RegFile::GPR);
|
||||
copy_src_if_not_reg(b, src2, RegFile::GPR);
|
||||
}
|
||||
Op::FSwzAdd(op) => {
|
||||
let [ref mut src0, ref mut src1] = op.srcs;
|
||||
copy_src_if_not_reg(b, src0, RegFile::GPR);
|
||||
copy_src_if_not_reg(b, src1, RegFile::GPR);
|
||||
}
|
||||
Op::Shfl(op) => {
|
||||
copy_src_if_not_reg(b, &mut op.src, RegFile::GPR);
|
||||
copy_src_if_cbuf(b, &mut op.lane, RegFile::GPR);
|
||||
copy_src_if_cbuf(b, &mut op.c, RegFile::GPR);
|
||||
}
|
||||
Op::Ldc(_) => (), // Nothing to do
|
||||
Op::Copy(_) => (), // Nothing to do
|
||||
_ => {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue