nak: Use OpLop2 and OpPSetP pre-SM70

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26114>
This commit is contained in:
Faith Ekstrand 2023-11-09 10:51:46 -06:00 committed by Marge Bot
parent 36e80caac9
commit e404877a02
5 changed files with 220 additions and 60 deletions

View file

@ -24,10 +24,7 @@ pub trait Builder {
}
}
fn lop2_to(&mut self, dst: Dst, op: LogicOp3, x: Src, y: Src) {
/* Only uses x and y */
assert!(!op.src_used(2));
fn lop2_to(&mut self, dst: Dst, op: LogicOp2, mut x: Src, mut y: Src) {
let is_predicate = match dst {
Dst::None => panic!("No LOP destination"),
Dst::SSA(ssa) => ssa.is_predicate(),
@ -36,18 +33,54 @@ pub trait Builder {
assert!(x.is_predicate() == is_predicate);
assert!(y.is_predicate() == is_predicate);
if is_predicate {
self.push_op(OpPLop3 {
dsts: [dst.into(), Dst::None],
srcs: [x, y, Src::new_imm_bool(true)],
ops: [op, LogicOp3::new_const(false)],
});
if self.sm() >= 70 {
let mut op = op.to_lut();
if x.src_mod.is_bnot() {
op = LogicOp3::new_lut(&|x, y, _| op.eval(!x, y, 0));
x.src_mod = SrcMod::None;
}
if y.src_mod.is_bnot() {
op = LogicOp3::new_lut(&|x, y, _| op.eval(x, !y, 0));
y.src_mod = SrcMod::None;
}
if is_predicate {
self.push_op(OpPLop3 {
dsts: [dst.into(), Dst::None],
srcs: [x, y, true.into()],
ops: [op, LogicOp3::new_const(false)],
});
} else {
self.push_op(OpLop3 {
dst: dst.into(),
srcs: [x, y, 0.into()],
op: op,
});
}
} else {
self.push_op(OpLop3 {
dst: dst.into(),
srcs: [x, y, Src::new_zero()],
op: op,
});
if is_predicate {
let mut x = x;
let cmp_op = match op {
LogicOp2::And => PredSetOp::And,
LogicOp2::Or => PredSetOp::Or,
LogicOp2::Xor => PredSetOp::Xor,
LogicOp2::PassB => {
// Pass through B by AND with PT
x = true.into();
PredSetOp::And
}
};
self.push_op(OpPSetP {
dsts: [dst.into(), Dst::None],
ops: [cmp_op, PredSetOp::And],
srcs: [x, y, true.into()],
});
} else {
self.push_op(OpLop2 {
dst: dst.into(),
srcs: [x, y],
op: op,
});
}
}
}
@ -259,7 +292,7 @@ pub trait SSABuilder: Builder {
dst
}
fn lop2(&mut self, op: LogicOp3, x: Src, y: Src) -> SSARef {
fn lop2(&mut self, op: LogicOp2, x: Src, y: Src) -> SSARef {
let dst = if x.is_predicate() {
self.alloc_ssa(RegFile::Pred, 1)
} else {

View file

@ -315,15 +315,19 @@ impl SM50Instr {
self.set_field(range, reg.base_idx());
}
fn set_reg_src(&mut self, range: Range<usize>, src: Src) {
assert!(src.src_mod.is_none());
match src.src_ref {
fn set_reg_src_ref(&mut self, range: Range<usize>, src_ref: SrcRef) {
match src_ref {
SrcRef::Zero => self.set_reg(range, RegRef::zero(RegFile::GPR, 1)),
SrcRef::Reg(reg) => self.set_reg(range, reg),
_ => panic!("Not a register"),
}
}
fn set_reg_src(&mut self, range: Range<usize>, src: Src) {
assert!(src.src_mod.is_none());
self.set_reg_src_ref(range, src.src_ref);
}
fn set_pred_dst(&mut self, range: Range<usize>, dst: Dst) {
match dst {
Dst::None => {
@ -631,6 +635,20 @@ impl SM50Instr {
self.set_pred_src(39..42, 42, op.cond);
}
fn encode_psetp(&mut self, op: &OpPSetP) {
self.set_opcode(0x5090);
self.set_pred_dst(3..6, op.dsts[0]);
self.set_pred_dst(0..3, op.dsts[1]); /* dst1 */
self.set_pred_src(12..15, 15, op.srcs[0]);
self.set_pred_src(29..32, 32, op.srcs[1]);
self.set_pred_src(39..42, 42, op.srcs[2]);
self.set_pred_set_op(24..26, op.ops[0]);
self.set_pred_set_op(45..47, op.ops[1]);
}
fn set_mem_type(&mut self, range: Range<usize>, mem_type: MemType) {
assert!(range.len() == 3);
self.set_field(
@ -717,6 +735,63 @@ impl SM50Instr {
}
}
fn encode_lop2(&mut self, op: &OpLop2) {
if let Some(imm32) = op.srcs[1].as_imm_not_i20() {
self.set_opcode(0x0400);
self.set_dst(op.dst);
self.set_reg_src_ref(8..16, op.srcs[0].src_ref);
self.set_bit(55, op.srcs[0].src_mod.is_bnot());
self.set_src_imm32(20..52, imm32);
self.set_field(
53..55,
match op.op {
LogicOp2::And => 0_u8,
LogicOp2::Or => 1_u8,
LogicOp2::Xor => 2_u8,
LogicOp2::PassB => {
panic!("PASS_B is not supported for LOP32I");
}
},
);
} else {
match &op.srcs[1].src_ref {
SrcRef::Zero | SrcRef::Reg(_) => {
self.set_opcode(0x5c40);
self.set_reg_src_ref(20..28, op.srcs[1].src_ref);
}
SrcRef::Imm32(i) => {
self.set_opcode(0x3840);
self.set_src_imm_i20(20..39, 56, *i);
}
SrcRef::CBuf(cb) => {
self.set_opcode(0x4c40);
self.set_src_cb(20..39, cb);
}
src1 => panic!("unsupported src1 type for IMUL: {src1}"),
}
self.set_dst(op.dst);
self.set_reg_src_ref(8..16, op.srcs[0].src_ref);
self.set_bit(39, op.srcs[0].src_mod.is_bnot());
self.set_bit(40, op.srcs[1].src_mod.is_bnot());
self.set_field(
41..43,
match op.op {
LogicOp2::And => 0_u8,
LogicOp2::Or => 1_u8,
LogicOp2::Xor => 2_u8,
LogicOp2::PassB => 3_u8,
},
);
self.set_pred_dst(48..51, Dst::None);
}
}
fn encode_shf(&mut self, op: &OpShf) {
/* TODO: This should happen as part of a legalization pass */
assert!(op.shift.is_reg_or_zero());
@ -1645,12 +1720,14 @@ impl SM50Instr {
Op::IAdd3(op) => si.encode_iadd3(&op),
Op::Mov(op) => si.encode_mov(&op),
Op::Sel(op) => si.encode_sel(&op),
Op::PSetP(op) => si.encode_psetp(&op),
Op::SuSt(op) => si.encode_sust(&op),
Op::S2R(op) => si.encode_s2r(&op),
Op::PopC(op) => si.encode_popc(&op),
Op::Brev(op) => si.encode_brev(&op),
Op::Prmt(op) => si.encode_prmt(&op),
Op::Ld(op) => si.encode_ld(&op),
Op::Lop2(op) => si.encode_lop2(&op),
Op::Shf(op) => si.encode_shf(&op),
Op::F2F(op) => si.encode_f2f(&op),
Op::F2I(op) => si.encode_f2i(&op),

View file

@ -774,13 +774,10 @@ impl<'a> ShaderFromNir<'a> {
b.iadd(srcs[0], srcs[1])
}
}
nir_op_iand => {
b.lop2(LogicOp3::new_lut(&|x, y, _| x & y), srcs[0], srcs[1])
}
nir_op_iand => b.lop2(LogicOp2::And, srcs[0], srcs[1]),
nir_op_ieq => {
if alu.get_src(0).bit_size() == 1 {
let lop = LogicOp3::new_lut(&|x, y, _| !(x ^ y));
b.lop2(lop, srcs[0], srcs[1])
b.lop2(LogicOp2::Xor, srcs[0], srcs[1].bnot())
} else if alu.get_src(0).bit_size() == 64 {
b.isetp64(IntCmpType::I32, IntCmpOp::Eq, srcs[0], srcs[1])
} else {
@ -850,8 +847,7 @@ impl<'a> ShaderFromNir<'a> {
}
nir_op_ine => {
if alu.get_src(0).bit_size() == 1 {
let lop = LogicOp3::new_lut(&|x, y, _| x ^ y);
b.lop2(lop, srcs[0], srcs[1])
b.lop2(LogicOp2::Xor, srcs[0], srcs[1])
} else if alu.get_src(0).bit_size() == 64 {
b.isetp64(IntCmpType::I32, IntCmpOp::Ne, srcs[0], srcs[1])
} else {
@ -882,28 +878,21 @@ impl<'a> ShaderFromNir<'a> {
}
}
nir_op_inot => {
let lop = LogicOp3::new_lut(&|x, _, _| !x);
if alu.def.bit_size() == 1 {
b.lop2(lop, srcs[0], true.into())
b.lop2(LogicOp2::PassB, true.into(), srcs[0].bnot())
} else {
assert!(alu.def.bit_size() == 32);
b.lop2(lop, srcs[0], 0.into())
b.lop2(LogicOp2::PassB, 0.into(), srcs[0].bnot())
}
}
nir_op_ior => {
b.lop2(LogicOp3::new_lut(&|x, y, _| x | y), srcs[0], srcs[1])
}
nir_op_ior => b.lop2(LogicOp2::Or, srcs[0], srcs[1]),
nir_op_ishl => {
let x = *srcs[0].as_ssa().unwrap();
let shift = srcs[1];
if alu.def.bit_size() == 64 {
// For 64-bit shifts, we have to use clamp mode so we need
// to mask the shift in order satisfy NIR semantics.
let shift = b.lop2(
LogicOp3::new_lut(&|x, y, _| x & y),
shift,
0x3f.into(),
);
let shift = b.lop2(LogicOp2::And, shift, 0x3f.into());
let dst = b.alloc_ssa(RegFile::GPR, 2);
b.push_op(OpShf {
dst: dst[0].into(),
@ -948,11 +937,7 @@ impl<'a> ShaderFromNir<'a> {
if alu.def.bit_size() == 64 {
// For 64-bit shifts, we have to use clamp mode so we need
// to mask the shift in order satisfy NIR semantics.
let shift = b.lop2(
LogicOp3::new_lut(&|x, y, _| x & y),
shift,
0x3f.into(),
);
let shift = b.lop2(LogicOp2::And, shift, 0x3f.into());
let dst = b.alloc_ssa(RegFile::GPR, 2);
b.push_op(OpShf {
dst: dst[0].into(),
@ -991,9 +976,7 @@ impl<'a> ShaderFromNir<'a> {
dst
}
}
nir_op_ixor => {
b.lop2(LogicOp3::new_lut(&|x, y, _| x ^ y), srcs[0], srcs[1])
}
nir_op_ixor => b.lop2(LogicOp2::Xor, srcs[0], srcs[1]),
nir_op_pack_half_2x16_split => {
assert!(alu.get_src(0).bit_size() == 32);
let low = b.alloc_ssa(RegFile::GPR, 1);
@ -1163,11 +1146,7 @@ impl<'a> ShaderFromNir<'a> {
if alu.def.bit_size() == 64 {
// For 64-bit shifts, we have to use clamp mode so we need
// to mask the shift in order satisfy NIR semantics.
let shift = b.lop2(
LogicOp3::new_lut(&|x, y, _| x & y),
shift,
0x3f.into(),
);
let shift = b.lop2(LogicOp2::And, shift, 0x3f.into());
let dst = b.alloc_ssa(RegFile::GPR, 2);
b.push_op(OpShf {
dst: dst[0].into(),

View file

@ -1452,6 +1452,36 @@ impl fmt::Display for IntCmpType {
}
}
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
pub enum LogicOp2 {
And,
Or,
Xor,
PassB,
}
impl fmt::Display for LogicOp2 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
LogicOp2::And => write!(f, "and"),
LogicOp2::Or => write!(f, "or"),
LogicOp2::Xor => write!(f, "xor"),
LogicOp2::PassB => write!(f, "pass_b"),
}
}
}
impl LogicOp2 {
pub fn to_lut(self) -> LogicOp3 {
match self {
LogicOp2::And => LogicOp3::new_lut(&|x, y, _| x & y),
LogicOp2::Or => LogicOp3::new_lut(&|x, y, _| x | y),
LogicOp2::Xor => LogicOp3::new_lut(&|x, y, _| x ^ y),
LogicOp2::PassB => LogicOp3::new_lut(&|_, b, _| b),
}
}
}
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
pub struct LogicOp3 {
pub lut: u8,
@ -2597,6 +2627,23 @@ impl DisplayOp for OpISetP {
}
impl_display_for_op!(OpISetP);
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpLop2 {
pub dst: Dst,
#[src_type(ALU)]
pub srcs: [Src; 2],
pub op: LogicOp2,
}
impl DisplayOp for OpLop2 {
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "lop2.{} {} {}", self.op, self.srcs[0], self.srcs[1],)
}
}
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpLop3 {
@ -2997,6 +3044,27 @@ impl DisplayOp for OpPLop3 {
}
impl_display_for_op!(OpPLop3);
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpPSetP {
pub dsts: [Dst; 2],
pub ops: [PredSetOp; 2],
#[src_type(Pred)]
pub srcs: [Src; 3],
}
impl DisplayOp for OpPSetP {
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"psetp{}{} {} {} {}",
self.ops[0], self.ops[1], self.srcs[0], self.srcs[1], self.srcs[2],
)
}
}
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpPopC {
@ -4346,6 +4414,7 @@ pub enum Op {
IMad64(OpIMad64),
IMnMx(OpIMnMx),
ISetP(OpISetP),
Lop2(OpLop2),
Lop3(OpLop3),
PopC(OpPopC),
Shf(OpShf),
@ -4358,6 +4427,7 @@ pub enum Op {
Sel(OpSel),
Shfl(OpShfl),
PLop3(OpPLop3),
PSetP(OpPSetP),
Tex(OpTex),
Tld(OpTld),
Tld4(OpTld4),
@ -4779,6 +4849,7 @@ impl Instr {
| Op::IMad64(_)
| Op::IMnMx(_)
| Op::ISetP(_)
| Op::Lop2(_)
| Op::Lop3(_)
| Op::Shf(_) => true,
@ -4790,7 +4861,7 @@ impl Instr {
Op::Shfl(_) => false,
// Predicate ops
Op::PLop3(_) => true,
Op::PLop3(_) | Op::PSetP(_) => true,
// Texture ops
Op::Tex(_)

View file

@ -77,7 +77,7 @@ impl LowerCopySwap {
SrcRef::True => {
b.lop2_to(
copy.dst,
LogicOp3::new_const(true),
LogicOp2::PassB,
Src::new_imm_bool(true),
Src::new_imm_bool(true),
);
@ -85,18 +85,18 @@ impl LowerCopySwap {
SrcRef::False => {
b.lop2_to(
copy.dst,
LogicOp3::new_const(false),
Src::new_imm_bool(true),
LogicOp2::PassB,
Src::new_imm_bool(true),
Src::new_imm_bool(false),
);
}
SrcRef::Reg(src_reg) => match src_reg.file() {
RegFile::Pred => {
b.lop2_to(
copy.dst,
LogicOp3::new_lut(&|x, _, _| x),
copy.src,
LogicOp2::PassB,
Src::new_imm_bool(true),
copy.src,
);
}
_ => panic!("Cannot copy to Pred"),
@ -157,6 +157,7 @@ impl LowerCopySwap {
if x == y {
/* Nothing to do */
} else if x.is_predicate() {
// TODO: Transform this in PLOP2 for SM5x-SM6x
b.push_op(OpPLop3 {
dsts: [x.into(), y.into()],
srcs: [x.into(), y.into(), Src::new_imm_bool(true)],
@ -166,10 +167,9 @@ impl LowerCopySwap {
],
});
} else {
let xor = LogicOp3::new_lut(&|x, y, _| x ^ y);
b.lop2_to(x.into(), xor, x.into(), y.into());
b.lop2_to(y.into(), xor, x.into(), y.into());
b.lop2_to(x.into(), xor, x.into(), y.into());
b.lop2_to(x.into(), LogicOp2::Xor, x.into(), y.into());
b.lop2_to(y.into(), LogicOp2::Xor, x.into(), y.into());
b.lop2_to(x.into(), LogicOp2::Xor, x.into(), y.into());
}
}