mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 20:28:04 +02:00
nak: Use OpLop2 and OpPSetP pre-SM70
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26114>
This commit is contained in:
parent
36e80caac9
commit
e404877a02
5 changed files with 220 additions and 60 deletions
|
|
@ -24,10 +24,7 @@ pub trait Builder {
|
|||
}
|
||||
}
|
||||
|
||||
fn lop2_to(&mut self, dst: Dst, op: LogicOp3, x: Src, y: Src) {
|
||||
/* Only uses x and y */
|
||||
assert!(!op.src_used(2));
|
||||
|
||||
fn lop2_to(&mut self, dst: Dst, op: LogicOp2, mut x: Src, mut y: Src) {
|
||||
let is_predicate = match dst {
|
||||
Dst::None => panic!("No LOP destination"),
|
||||
Dst::SSA(ssa) => ssa.is_predicate(),
|
||||
|
|
@ -36,18 +33,54 @@ pub trait Builder {
|
|||
assert!(x.is_predicate() == is_predicate);
|
||||
assert!(y.is_predicate() == is_predicate);
|
||||
|
||||
if is_predicate {
|
||||
self.push_op(OpPLop3 {
|
||||
dsts: [dst.into(), Dst::None],
|
||||
srcs: [x, y, Src::new_imm_bool(true)],
|
||||
ops: [op, LogicOp3::new_const(false)],
|
||||
});
|
||||
if self.sm() >= 70 {
|
||||
let mut op = op.to_lut();
|
||||
if x.src_mod.is_bnot() {
|
||||
op = LogicOp3::new_lut(&|x, y, _| op.eval(!x, y, 0));
|
||||
x.src_mod = SrcMod::None;
|
||||
}
|
||||
if y.src_mod.is_bnot() {
|
||||
op = LogicOp3::new_lut(&|x, y, _| op.eval(x, !y, 0));
|
||||
y.src_mod = SrcMod::None;
|
||||
}
|
||||
if is_predicate {
|
||||
self.push_op(OpPLop3 {
|
||||
dsts: [dst.into(), Dst::None],
|
||||
srcs: [x, y, true.into()],
|
||||
ops: [op, LogicOp3::new_const(false)],
|
||||
});
|
||||
} else {
|
||||
self.push_op(OpLop3 {
|
||||
dst: dst.into(),
|
||||
srcs: [x, y, 0.into()],
|
||||
op: op,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
self.push_op(OpLop3 {
|
||||
dst: dst.into(),
|
||||
srcs: [x, y, Src::new_zero()],
|
||||
op: op,
|
||||
});
|
||||
if is_predicate {
|
||||
let mut x = x;
|
||||
let cmp_op = match op {
|
||||
LogicOp2::And => PredSetOp::And,
|
||||
LogicOp2::Or => PredSetOp::Or,
|
||||
LogicOp2::Xor => PredSetOp::Xor,
|
||||
LogicOp2::PassB => {
|
||||
// Pass through B by AND with PT
|
||||
x = true.into();
|
||||
PredSetOp::And
|
||||
}
|
||||
};
|
||||
self.push_op(OpPSetP {
|
||||
dsts: [dst.into(), Dst::None],
|
||||
ops: [cmp_op, PredSetOp::And],
|
||||
srcs: [x, y, true.into()],
|
||||
});
|
||||
} else {
|
||||
self.push_op(OpLop2 {
|
||||
dst: dst.into(),
|
||||
srcs: [x, y],
|
||||
op: op,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -259,7 +292,7 @@ pub trait SSABuilder: Builder {
|
|||
dst
|
||||
}
|
||||
|
||||
fn lop2(&mut self, op: LogicOp3, x: Src, y: Src) -> SSARef {
|
||||
fn lop2(&mut self, op: LogicOp2, x: Src, y: Src) -> SSARef {
|
||||
let dst = if x.is_predicate() {
|
||||
self.alloc_ssa(RegFile::Pred, 1)
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -315,15 +315,19 @@ impl SM50Instr {
|
|||
self.set_field(range, reg.base_idx());
|
||||
}
|
||||
|
||||
fn set_reg_src(&mut self, range: Range<usize>, src: Src) {
|
||||
assert!(src.src_mod.is_none());
|
||||
match src.src_ref {
|
||||
fn set_reg_src_ref(&mut self, range: Range<usize>, src_ref: SrcRef) {
|
||||
match src_ref {
|
||||
SrcRef::Zero => self.set_reg(range, RegRef::zero(RegFile::GPR, 1)),
|
||||
SrcRef::Reg(reg) => self.set_reg(range, reg),
|
||||
_ => panic!("Not a register"),
|
||||
}
|
||||
}
|
||||
|
||||
fn set_reg_src(&mut self, range: Range<usize>, src: Src) {
|
||||
assert!(src.src_mod.is_none());
|
||||
self.set_reg_src_ref(range, src.src_ref);
|
||||
}
|
||||
|
||||
fn set_pred_dst(&mut self, range: Range<usize>, dst: Dst) {
|
||||
match dst {
|
||||
Dst::None => {
|
||||
|
|
@ -631,6 +635,20 @@ impl SM50Instr {
|
|||
self.set_pred_src(39..42, 42, op.cond);
|
||||
}
|
||||
|
||||
fn encode_psetp(&mut self, op: &OpPSetP) {
|
||||
self.set_opcode(0x5090);
|
||||
|
||||
self.set_pred_dst(3..6, op.dsts[0]);
|
||||
self.set_pred_dst(0..3, op.dsts[1]); /* dst1 */
|
||||
|
||||
self.set_pred_src(12..15, 15, op.srcs[0]);
|
||||
self.set_pred_src(29..32, 32, op.srcs[1]);
|
||||
self.set_pred_src(39..42, 42, op.srcs[2]);
|
||||
|
||||
self.set_pred_set_op(24..26, op.ops[0]);
|
||||
self.set_pred_set_op(45..47, op.ops[1]);
|
||||
}
|
||||
|
||||
fn set_mem_type(&mut self, range: Range<usize>, mem_type: MemType) {
|
||||
assert!(range.len() == 3);
|
||||
self.set_field(
|
||||
|
|
@ -717,6 +735,63 @@ impl SM50Instr {
|
|||
}
|
||||
}
|
||||
|
||||
fn encode_lop2(&mut self, op: &OpLop2) {
|
||||
if let Some(imm32) = op.srcs[1].as_imm_not_i20() {
|
||||
self.set_opcode(0x0400);
|
||||
|
||||
self.set_dst(op.dst);
|
||||
self.set_reg_src_ref(8..16, op.srcs[0].src_ref);
|
||||
self.set_bit(55, op.srcs[0].src_mod.is_bnot());
|
||||
self.set_src_imm32(20..52, imm32);
|
||||
|
||||
self.set_field(
|
||||
53..55,
|
||||
match op.op {
|
||||
LogicOp2::And => 0_u8,
|
||||
LogicOp2::Or => 1_u8,
|
||||
LogicOp2::Xor => 2_u8,
|
||||
LogicOp2::PassB => {
|
||||
panic!("PASS_B is not supported for LOP32I");
|
||||
}
|
||||
},
|
||||
);
|
||||
} else {
|
||||
match &op.srcs[1].src_ref {
|
||||
SrcRef::Zero | SrcRef::Reg(_) => {
|
||||
self.set_opcode(0x5c40);
|
||||
self.set_reg_src_ref(20..28, op.srcs[1].src_ref);
|
||||
}
|
||||
SrcRef::Imm32(i) => {
|
||||
self.set_opcode(0x3840);
|
||||
self.set_src_imm_i20(20..39, 56, *i);
|
||||
}
|
||||
SrcRef::CBuf(cb) => {
|
||||
self.set_opcode(0x4c40);
|
||||
self.set_src_cb(20..39, cb);
|
||||
}
|
||||
src1 => panic!("unsupported src1 type for IMUL: {src1}"),
|
||||
}
|
||||
|
||||
self.set_dst(op.dst);
|
||||
self.set_reg_src_ref(8..16, op.srcs[0].src_ref);
|
||||
|
||||
self.set_bit(39, op.srcs[0].src_mod.is_bnot());
|
||||
self.set_bit(40, op.srcs[1].src_mod.is_bnot());
|
||||
|
||||
self.set_field(
|
||||
41..43,
|
||||
match op.op {
|
||||
LogicOp2::And => 0_u8,
|
||||
LogicOp2::Or => 1_u8,
|
||||
LogicOp2::Xor => 2_u8,
|
||||
LogicOp2::PassB => 3_u8,
|
||||
},
|
||||
);
|
||||
|
||||
self.set_pred_dst(48..51, Dst::None);
|
||||
}
|
||||
}
|
||||
|
||||
fn encode_shf(&mut self, op: &OpShf) {
|
||||
/* TODO: This should happen as part of a legalization pass */
|
||||
assert!(op.shift.is_reg_or_zero());
|
||||
|
|
@ -1645,12 +1720,14 @@ impl SM50Instr {
|
|||
Op::IAdd3(op) => si.encode_iadd3(&op),
|
||||
Op::Mov(op) => si.encode_mov(&op),
|
||||
Op::Sel(op) => si.encode_sel(&op),
|
||||
Op::PSetP(op) => si.encode_psetp(&op),
|
||||
Op::SuSt(op) => si.encode_sust(&op),
|
||||
Op::S2R(op) => si.encode_s2r(&op),
|
||||
Op::PopC(op) => si.encode_popc(&op),
|
||||
Op::Brev(op) => si.encode_brev(&op),
|
||||
Op::Prmt(op) => si.encode_prmt(&op),
|
||||
Op::Ld(op) => si.encode_ld(&op),
|
||||
Op::Lop2(op) => si.encode_lop2(&op),
|
||||
Op::Shf(op) => si.encode_shf(&op),
|
||||
Op::F2F(op) => si.encode_f2f(&op),
|
||||
Op::F2I(op) => si.encode_f2i(&op),
|
||||
|
|
|
|||
|
|
@ -774,13 +774,10 @@ impl<'a> ShaderFromNir<'a> {
|
|||
b.iadd(srcs[0], srcs[1])
|
||||
}
|
||||
}
|
||||
nir_op_iand => {
|
||||
b.lop2(LogicOp3::new_lut(&|x, y, _| x & y), srcs[0], srcs[1])
|
||||
}
|
||||
nir_op_iand => b.lop2(LogicOp2::And, srcs[0], srcs[1]),
|
||||
nir_op_ieq => {
|
||||
if alu.get_src(0).bit_size() == 1 {
|
||||
let lop = LogicOp3::new_lut(&|x, y, _| !(x ^ y));
|
||||
b.lop2(lop, srcs[0], srcs[1])
|
||||
b.lop2(LogicOp2::Xor, srcs[0], srcs[1].bnot())
|
||||
} else if alu.get_src(0).bit_size() == 64 {
|
||||
b.isetp64(IntCmpType::I32, IntCmpOp::Eq, srcs[0], srcs[1])
|
||||
} else {
|
||||
|
|
@ -850,8 +847,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
}
|
||||
nir_op_ine => {
|
||||
if alu.get_src(0).bit_size() == 1 {
|
||||
let lop = LogicOp3::new_lut(&|x, y, _| x ^ y);
|
||||
b.lop2(lop, srcs[0], srcs[1])
|
||||
b.lop2(LogicOp2::Xor, srcs[0], srcs[1])
|
||||
} else if alu.get_src(0).bit_size() == 64 {
|
||||
b.isetp64(IntCmpType::I32, IntCmpOp::Ne, srcs[0], srcs[1])
|
||||
} else {
|
||||
|
|
@ -882,28 +878,21 @@ impl<'a> ShaderFromNir<'a> {
|
|||
}
|
||||
}
|
||||
nir_op_inot => {
|
||||
let lop = LogicOp3::new_lut(&|x, _, _| !x);
|
||||
if alu.def.bit_size() == 1 {
|
||||
b.lop2(lop, srcs[0], true.into())
|
||||
b.lop2(LogicOp2::PassB, true.into(), srcs[0].bnot())
|
||||
} else {
|
||||
assert!(alu.def.bit_size() == 32);
|
||||
b.lop2(lop, srcs[0], 0.into())
|
||||
b.lop2(LogicOp2::PassB, 0.into(), srcs[0].bnot())
|
||||
}
|
||||
}
|
||||
nir_op_ior => {
|
||||
b.lop2(LogicOp3::new_lut(&|x, y, _| x | y), srcs[0], srcs[1])
|
||||
}
|
||||
nir_op_ior => b.lop2(LogicOp2::Or, srcs[0], srcs[1]),
|
||||
nir_op_ishl => {
|
||||
let x = *srcs[0].as_ssa().unwrap();
|
||||
let shift = srcs[1];
|
||||
if alu.def.bit_size() == 64 {
|
||||
// For 64-bit shifts, we have to use clamp mode so we need
|
||||
// to mask the shift in order satisfy NIR semantics.
|
||||
let shift = b.lop2(
|
||||
LogicOp3::new_lut(&|x, y, _| x & y),
|
||||
shift,
|
||||
0x3f.into(),
|
||||
);
|
||||
let shift = b.lop2(LogicOp2::And, shift, 0x3f.into());
|
||||
let dst = b.alloc_ssa(RegFile::GPR, 2);
|
||||
b.push_op(OpShf {
|
||||
dst: dst[0].into(),
|
||||
|
|
@ -948,11 +937,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
if alu.def.bit_size() == 64 {
|
||||
// For 64-bit shifts, we have to use clamp mode so we need
|
||||
// to mask the shift in order satisfy NIR semantics.
|
||||
let shift = b.lop2(
|
||||
LogicOp3::new_lut(&|x, y, _| x & y),
|
||||
shift,
|
||||
0x3f.into(),
|
||||
);
|
||||
let shift = b.lop2(LogicOp2::And, shift, 0x3f.into());
|
||||
let dst = b.alloc_ssa(RegFile::GPR, 2);
|
||||
b.push_op(OpShf {
|
||||
dst: dst[0].into(),
|
||||
|
|
@ -991,9 +976,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
dst
|
||||
}
|
||||
}
|
||||
nir_op_ixor => {
|
||||
b.lop2(LogicOp3::new_lut(&|x, y, _| x ^ y), srcs[0], srcs[1])
|
||||
}
|
||||
nir_op_ixor => b.lop2(LogicOp2::Xor, srcs[0], srcs[1]),
|
||||
nir_op_pack_half_2x16_split => {
|
||||
assert!(alu.get_src(0).bit_size() == 32);
|
||||
let low = b.alloc_ssa(RegFile::GPR, 1);
|
||||
|
|
@ -1163,11 +1146,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||
if alu.def.bit_size() == 64 {
|
||||
// For 64-bit shifts, we have to use clamp mode so we need
|
||||
// to mask the shift in order satisfy NIR semantics.
|
||||
let shift = b.lop2(
|
||||
LogicOp3::new_lut(&|x, y, _| x & y),
|
||||
shift,
|
||||
0x3f.into(),
|
||||
);
|
||||
let shift = b.lop2(LogicOp2::And, shift, 0x3f.into());
|
||||
let dst = b.alloc_ssa(RegFile::GPR, 2);
|
||||
b.push_op(OpShf {
|
||||
dst: dst[0].into(),
|
||||
|
|
|
|||
|
|
@ -1452,6 +1452,36 @@ impl fmt::Display for IntCmpType {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
|
||||
pub enum LogicOp2 {
|
||||
And,
|
||||
Or,
|
||||
Xor,
|
||||
PassB,
|
||||
}
|
||||
|
||||
impl fmt::Display for LogicOp2 {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
LogicOp2::And => write!(f, "and"),
|
||||
LogicOp2::Or => write!(f, "or"),
|
||||
LogicOp2::Xor => write!(f, "xor"),
|
||||
LogicOp2::PassB => write!(f, "pass_b"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LogicOp2 {
|
||||
pub fn to_lut(self) -> LogicOp3 {
|
||||
match self {
|
||||
LogicOp2::And => LogicOp3::new_lut(&|x, y, _| x & y),
|
||||
LogicOp2::Or => LogicOp3::new_lut(&|x, y, _| x | y),
|
||||
LogicOp2::Xor => LogicOp3::new_lut(&|x, y, _| x ^ y),
|
||||
LogicOp2::PassB => LogicOp3::new_lut(&|_, b, _| b),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
|
||||
pub struct LogicOp3 {
|
||||
pub lut: u8,
|
||||
|
|
@ -2597,6 +2627,23 @@ impl DisplayOp for OpISetP {
|
|||
}
|
||||
impl_display_for_op!(OpISetP);
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpLop2 {
|
||||
pub dst: Dst,
|
||||
|
||||
#[src_type(ALU)]
|
||||
pub srcs: [Src; 2],
|
||||
|
||||
pub op: LogicOp2,
|
||||
}
|
||||
|
||||
impl DisplayOp for OpLop2 {
|
||||
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "lop2.{} {} {}", self.op, self.srcs[0], self.srcs[1],)
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpLop3 {
|
||||
|
|
@ -2997,6 +3044,27 @@ impl DisplayOp for OpPLop3 {
|
|||
}
|
||||
impl_display_for_op!(OpPLop3);
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpPSetP {
|
||||
pub dsts: [Dst; 2],
|
||||
|
||||
pub ops: [PredSetOp; 2],
|
||||
|
||||
#[src_type(Pred)]
|
||||
pub srcs: [Src; 3],
|
||||
}
|
||||
|
||||
impl DisplayOp for OpPSetP {
|
||||
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"psetp{}{} {} {} {}",
|
||||
self.ops[0], self.ops[1], self.srcs[0], self.srcs[1], self.srcs[2],
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpPopC {
|
||||
|
|
@ -4346,6 +4414,7 @@ pub enum Op {
|
|||
IMad64(OpIMad64),
|
||||
IMnMx(OpIMnMx),
|
||||
ISetP(OpISetP),
|
||||
Lop2(OpLop2),
|
||||
Lop3(OpLop3),
|
||||
PopC(OpPopC),
|
||||
Shf(OpShf),
|
||||
|
|
@ -4358,6 +4427,7 @@ pub enum Op {
|
|||
Sel(OpSel),
|
||||
Shfl(OpShfl),
|
||||
PLop3(OpPLop3),
|
||||
PSetP(OpPSetP),
|
||||
Tex(OpTex),
|
||||
Tld(OpTld),
|
||||
Tld4(OpTld4),
|
||||
|
|
@ -4779,6 +4849,7 @@ impl Instr {
|
|||
| Op::IMad64(_)
|
||||
| Op::IMnMx(_)
|
||||
| Op::ISetP(_)
|
||||
| Op::Lop2(_)
|
||||
| Op::Lop3(_)
|
||||
| Op::Shf(_) => true,
|
||||
|
||||
|
|
@ -4790,7 +4861,7 @@ impl Instr {
|
|||
Op::Shfl(_) => false,
|
||||
|
||||
// Predicate ops
|
||||
Op::PLop3(_) => true,
|
||||
Op::PLop3(_) | Op::PSetP(_) => true,
|
||||
|
||||
// Texture ops
|
||||
Op::Tex(_)
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ impl LowerCopySwap {
|
|||
SrcRef::True => {
|
||||
b.lop2_to(
|
||||
copy.dst,
|
||||
LogicOp3::new_const(true),
|
||||
LogicOp2::PassB,
|
||||
Src::new_imm_bool(true),
|
||||
Src::new_imm_bool(true),
|
||||
);
|
||||
|
|
@ -85,18 +85,18 @@ impl LowerCopySwap {
|
|||
SrcRef::False => {
|
||||
b.lop2_to(
|
||||
copy.dst,
|
||||
LogicOp3::new_const(false),
|
||||
Src::new_imm_bool(true),
|
||||
LogicOp2::PassB,
|
||||
Src::new_imm_bool(true),
|
||||
Src::new_imm_bool(false),
|
||||
);
|
||||
}
|
||||
SrcRef::Reg(src_reg) => match src_reg.file() {
|
||||
RegFile::Pred => {
|
||||
b.lop2_to(
|
||||
copy.dst,
|
||||
LogicOp3::new_lut(&|x, _, _| x),
|
||||
copy.src,
|
||||
LogicOp2::PassB,
|
||||
Src::new_imm_bool(true),
|
||||
copy.src,
|
||||
);
|
||||
}
|
||||
_ => panic!("Cannot copy to Pred"),
|
||||
|
|
@ -157,6 +157,7 @@ impl LowerCopySwap {
|
|||
if x == y {
|
||||
/* Nothing to do */
|
||||
} else if x.is_predicate() {
|
||||
// TODO: Transform this in PLOP2 for SM5x-SM6x
|
||||
b.push_op(OpPLop3 {
|
||||
dsts: [x.into(), y.into()],
|
||||
srcs: [x.into(), y.into(), Src::new_imm_bool(true)],
|
||||
|
|
@ -166,10 +167,9 @@ impl LowerCopySwap {
|
|||
],
|
||||
});
|
||||
} else {
|
||||
let xor = LogicOp3::new_lut(&|x, y, _| x ^ y);
|
||||
b.lop2_to(x.into(), xor, x.into(), y.into());
|
||||
b.lop2_to(y.into(), xor, x.into(), y.into());
|
||||
b.lop2_to(x.into(), xor, x.into(), y.into());
|
||||
b.lop2_to(x.into(), LogicOp2::Xor, x.into(), y.into());
|
||||
b.lop2_to(y.into(), LogicOp2::Xor, x.into(), y.into());
|
||||
b.lop2_to(x.into(), LogicOp2::Xor, x.into(), y.into());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue