nak: implement IMUL for SM50

IMAD64 does not exist on SM50, so we're using IMUL instead for
nir_op_{i,u}mul_high and nir_op{i,u}mul_2x32_64. Longer-term we may want
to replace this with XMAD for better perf.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26114>
This commit is contained in:
Benjamin Lee 2023-10-24 19:10:23 -07:00 committed by Marge Bot
parent 00be041ffc
commit ebfd651361
5 changed files with 121 additions and 19 deletions

View file

@ -300,11 +300,45 @@ pub trait SSABuilder: Builder {
fn imul(&mut self, x: Src, y: Src) -> SSARef {
let dst = self.alloc_ssa(RegFile::GPR, 1);
self.push_op(OpIMad {
dst: dst.into(),
srcs: [x, y, 0.into()],
signed: false,
});
if self.sm() > 70 {
self.push_op(OpIMad {
dst: dst.into(),
srcs: [x, y, 0.into()],
signed: false,
});
} else {
self.push_op(OpIMul {
dst: dst[0].into(),
srcs: [x, y],
signed: [false; 2],
high: false,
});
}
dst
}
fn imul_2x32_64(&mut self, x: Src, y: Src, signed: bool) -> SSARef {
let dst = self.alloc_ssa(RegFile::GPR, 2);
if self.sm() > 70 {
self.push_op(OpIMad64 {
dst: dst.into(),
srcs: [x, y, 0.into()],
signed,
});
} else {
self.push_op(OpIMul {
dst: dst[0].into(),
srcs: [x, y],
signed: [signed; 2],
high: false,
});
self.push_op(OpIMul {
dst: dst[1].into(),
srcs: [x, y],
signed: [signed; 2],
high: true,
});
}
dst
}

View file

@ -712,6 +712,43 @@ impl SM50Instr {
self.set_dst(op.dst);
}
fn encode_imul(&mut self, op: &OpIMul) {
assert!(op.srcs[0].src_mod.is_none());
assert!(op.srcs[1].src_mod.is_none());
self.set_dst(op.dst);
self.set_reg_src(8..16, op.srcs[0]);
if let Some(i) = op.srcs[1].as_imm_not_i20() {
self.set_opcode(0x1fc0);
self.set_src_imm32(20..52, i);
self.set_bit(53, op.high);
self.set_bit(54, op.signed[0]);
self.set_bit(55, op.signed[1]);
} else {
match op.srcs[1].src_ref {
SrcRef::Zero | SrcRef::Reg(_) => {
self.set_opcode(0x5c38);
self.set_reg_src(20..28, op.srcs[1]);
}
SrcRef::Imm32(i) => {
self.set_opcode(0x3838);
self.set_src_imm_i20(20..39, 56, i);
}
SrcRef::CBuf(cb) => {
self.set_opcode(0x4c38);
self.set_src_cb(20..39, &cb);
}
src1 => panic!("unsupported src1 type for IMUL: {src1}"),
};
self.set_bit(39, op.high);
self.set_bit(40, op.signed[0]);
self.set_bit(41, op.signed[1]);
}
}
fn encode_f2i(&mut self, op: &OpF2I) {
match &op.src.src_ref {
SrcRef::Zero | SrcRef::Reg(_) => {
@ -1656,6 +1693,7 @@ impl SM50Instr {
Op::I2F(op) => si.encode_i2f(&op),
Op::FRnd(op) => si.encode_frnd(&op),
Op::IMad(op) => si.encode_imad(&op),
Op::IMul(op) => si.encode_imul(&op),
Op::IMnMx(op) => si.encode_imnmx(&op),
Op::ISetP(op) => si.encode_isetp(&op),
Op::Tex(op) => si.encode_tex(&op),

View file

@ -810,22 +810,13 @@ impl<'a> ShaderFromNir<'a> {
b.imul(srcs[0], srcs[1])
}
nir_op_imul_2x32_64 | nir_op_umul_2x32_64 => {
let dst = b.alloc_ssa(RegFile::GPR, 2);
b.push_op(OpIMad64 {
dst: dst.into(),
srcs: [srcs[0], srcs[1], 0.into()],
signed: alu.op == nir_op_imul_2x32_64,
});
dst
let signed = alu.op == nir_op_imul_2x32_64;
b.imul_2x32_64(srcs[0], srcs[1], signed)
}
nir_op_imul_high | nir_op_umul_high => {
let dst = b.alloc_ssa(RegFile::GPR, 2);
b.push_op(OpIMad64 {
dst: dst.into(),
srcs: [srcs[0], srcs[1], 0.into()],
signed: alu.op == nir_op_imul_high,
});
dst[1].into()
let signed = alu.op == nir_op_imul_high;
let dst64 = b.imul_2x32_64(srcs[0], srcs[1], signed);
dst64[1].into()
}
nir_op_ine => {
if alu.get_src(0).bit_size() == 1 {

View file

@ -2630,6 +2630,36 @@ impl DisplayOp for OpIMad {
}
impl_display_for_op!(OpIMad);
/// Only used on SM50
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpIMul {
pub dst: Dst,
#[src_type(ALU)]
pub srcs: [Src; 2],
pub signed: [bool; 2],
pub high: bool,
}
impl DisplayOp for OpIMul {
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "imul")?;
if self.high {
write!(f, ".hi")?;
}
let src_type = |signed| if signed { ".s32" } else { ".u32" };
write!(
f,
"{}{}",
src_type(self.signed[0]),
src_type(self.signed[1])
)?;
write!(f, " {} {}", self.srcs[0], self.srcs[1])
}
}
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpIMad64 {
@ -4559,6 +4589,7 @@ pub enum Op {
IDp4(OpIDp4),
IMad(OpIMad),
IMad64(OpIMad64),
IMul(OpIMul),
IMnMx(OpIMnMx),
ISetP(OpISetP),
Lop2(OpLop2),
@ -4997,6 +5028,7 @@ impl Instr {
| Op::IDp4(_)
| Op::IMad(_)
| Op::IMad64(_)
| Op::IMul(_)
| Op::IMnMx(_)
| Op::ISetP(_)
| Op::Lop2(_)

View file

@ -154,6 +154,13 @@ fn legalize_sm50_instr(
copy_src_if_not_reg(b, &mut op.srcs[1], RegFile::GPR);
copy_src_if_not_reg(b, &mut op.srcs[2], RegFile::GPR);
}
Op::IMul(op) => {
let [ref mut src0, ref mut src1] = op.srcs;
if swap_srcs_if_not_reg(src0, src1) {
op.signed.swap(0, 1);
}
copy_src_if_not_reg(b, src0, RegFile::GPR);
}
Op::F2I(op) => {
copy_src_if_not_reg(b, &mut op.src, RegFile::GPR);
}