mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-21 14:10:37 +02:00
nak: implement IMUL for SM50
IMAD64 does not exist on SM50, so we're using IMUL instead for
nir_op_{i,u}mul_high and nir_op{i,u}mul_2x32_64. Longer-term we may want
to replace this with XMAD for better perf.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26114>
This commit is contained in:
parent
00be041ffc
commit
ebfd651361
5 changed files with 121 additions and 19 deletions
|
|
@ -300,11 +300,45 @@ pub trait SSABuilder: Builder {
|
|||
|
||||
fn imul(&mut self, x: Src, y: Src) -> SSARef {
|
||||
let dst = self.alloc_ssa(RegFile::GPR, 1);
|
||||
self.push_op(OpIMad {
|
||||
dst: dst.into(),
|
||||
srcs: [x, y, 0.into()],
|
||||
signed: false,
|
||||
});
|
||||
if self.sm() > 70 {
|
||||
self.push_op(OpIMad {
|
||||
dst: dst.into(),
|
||||
srcs: [x, y, 0.into()],
|
||||
signed: false,
|
||||
});
|
||||
} else {
|
||||
self.push_op(OpIMul {
|
||||
dst: dst[0].into(),
|
||||
srcs: [x, y],
|
||||
signed: [false; 2],
|
||||
high: false,
|
||||
});
|
||||
}
|
||||
dst
|
||||
}
|
||||
|
||||
fn imul_2x32_64(&mut self, x: Src, y: Src, signed: bool) -> SSARef {
|
||||
let dst = self.alloc_ssa(RegFile::GPR, 2);
|
||||
if self.sm() > 70 {
|
||||
self.push_op(OpIMad64 {
|
||||
dst: dst.into(),
|
||||
srcs: [x, y, 0.into()],
|
||||
signed,
|
||||
});
|
||||
} else {
|
||||
self.push_op(OpIMul {
|
||||
dst: dst[0].into(),
|
||||
srcs: [x, y],
|
||||
signed: [signed; 2],
|
||||
high: false,
|
||||
});
|
||||
self.push_op(OpIMul {
|
||||
dst: dst[1].into(),
|
||||
srcs: [x, y],
|
||||
signed: [signed; 2],
|
||||
high: true,
|
||||
});
|
||||
}
|
||||
dst
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -712,6 +712,43 @@ impl SM50Instr {
|
|||
self.set_dst(op.dst);
|
||||
}
|
||||
|
||||
fn encode_imul(&mut self, op: &OpIMul) {
|
||||
assert!(op.srcs[0].src_mod.is_none());
|
||||
assert!(op.srcs[1].src_mod.is_none());
|
||||
|
||||
self.set_dst(op.dst);
|
||||
self.set_reg_src(8..16, op.srcs[0]);
|
||||
|
||||
if let Some(i) = op.srcs[1].as_imm_not_i20() {
|
||||
self.set_opcode(0x1fc0);
|
||||
self.set_src_imm32(20..52, i);
|
||||
|
||||
self.set_bit(53, op.high);
|
||||
self.set_bit(54, op.signed[0]);
|
||||
self.set_bit(55, op.signed[1]);
|
||||
} else {
|
||||
match op.srcs[1].src_ref {
|
||||
SrcRef::Zero | SrcRef::Reg(_) => {
|
||||
self.set_opcode(0x5c38);
|
||||
self.set_reg_src(20..28, op.srcs[1]);
|
||||
}
|
||||
SrcRef::Imm32(i) => {
|
||||
self.set_opcode(0x3838);
|
||||
self.set_src_imm_i20(20..39, 56, i);
|
||||
}
|
||||
SrcRef::CBuf(cb) => {
|
||||
self.set_opcode(0x4c38);
|
||||
self.set_src_cb(20..39, &cb);
|
||||
}
|
||||
src1 => panic!("unsupported src1 type for IMUL: {src1}"),
|
||||
};
|
||||
|
||||
self.set_bit(39, op.high);
|
||||
self.set_bit(40, op.signed[0]);
|
||||
self.set_bit(41, op.signed[1]);
|
||||
}
|
||||
}
|
||||
|
||||
fn encode_f2i(&mut self, op: &OpF2I) {
|
||||
match &op.src.src_ref {
|
||||
SrcRef::Zero | SrcRef::Reg(_) => {
|
||||
|
|
@ -1656,6 +1693,7 @@ impl SM50Instr {
|
|||
Op::I2F(op) => si.encode_i2f(&op),
|
||||
Op::FRnd(op) => si.encode_frnd(&op),
|
||||
Op::IMad(op) => si.encode_imad(&op),
|
||||
Op::IMul(op) => si.encode_imul(&op),
|
||||
Op::IMnMx(op) => si.encode_imnmx(&op),
|
||||
Op::ISetP(op) => si.encode_isetp(&op),
|
||||
Op::Tex(op) => si.encode_tex(&op),
|
||||
|
|
|
|||
|
|
@ -810,22 +810,13 @@ impl<'a> ShaderFromNir<'a> {
|
|||
b.imul(srcs[0], srcs[1])
|
||||
}
|
||||
nir_op_imul_2x32_64 | nir_op_umul_2x32_64 => {
|
||||
let dst = b.alloc_ssa(RegFile::GPR, 2);
|
||||
b.push_op(OpIMad64 {
|
||||
dst: dst.into(),
|
||||
srcs: [srcs[0], srcs[1], 0.into()],
|
||||
signed: alu.op == nir_op_imul_2x32_64,
|
||||
});
|
||||
dst
|
||||
let signed = alu.op == nir_op_imul_2x32_64;
|
||||
b.imul_2x32_64(srcs[0], srcs[1], signed)
|
||||
}
|
||||
nir_op_imul_high | nir_op_umul_high => {
|
||||
let dst = b.alloc_ssa(RegFile::GPR, 2);
|
||||
b.push_op(OpIMad64 {
|
||||
dst: dst.into(),
|
||||
srcs: [srcs[0], srcs[1], 0.into()],
|
||||
signed: alu.op == nir_op_imul_high,
|
||||
});
|
||||
dst[1].into()
|
||||
let signed = alu.op == nir_op_imul_high;
|
||||
let dst64 = b.imul_2x32_64(srcs[0], srcs[1], signed);
|
||||
dst64[1].into()
|
||||
}
|
||||
nir_op_ine => {
|
||||
if alu.get_src(0).bit_size() == 1 {
|
||||
|
|
|
|||
|
|
@ -2630,6 +2630,36 @@ impl DisplayOp for OpIMad {
|
|||
}
|
||||
impl_display_for_op!(OpIMad);
|
||||
|
||||
/// Only used on SM50
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpIMul {
|
||||
pub dst: Dst,
|
||||
|
||||
#[src_type(ALU)]
|
||||
pub srcs: [Src; 2],
|
||||
|
||||
pub signed: [bool; 2],
|
||||
pub high: bool,
|
||||
}
|
||||
|
||||
impl DisplayOp for OpIMul {
|
||||
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "imul")?;
|
||||
if self.high {
|
||||
write!(f, ".hi")?;
|
||||
}
|
||||
let src_type = |signed| if signed { ".s32" } else { ".u32" };
|
||||
write!(
|
||||
f,
|
||||
"{}{}",
|
||||
src_type(self.signed[0]),
|
||||
src_type(self.signed[1])
|
||||
)?;
|
||||
write!(f, " {} {}", self.srcs[0], self.srcs[1])
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpIMad64 {
|
||||
|
|
@ -4559,6 +4589,7 @@ pub enum Op {
|
|||
IDp4(OpIDp4),
|
||||
IMad(OpIMad),
|
||||
IMad64(OpIMad64),
|
||||
IMul(OpIMul),
|
||||
IMnMx(OpIMnMx),
|
||||
ISetP(OpISetP),
|
||||
Lop2(OpLop2),
|
||||
|
|
@ -4997,6 +5028,7 @@ impl Instr {
|
|||
| Op::IDp4(_)
|
||||
| Op::IMad(_)
|
||||
| Op::IMad64(_)
|
||||
| Op::IMul(_)
|
||||
| Op::IMnMx(_)
|
||||
| Op::ISetP(_)
|
||||
| Op::Lop2(_)
|
||||
|
|
|
|||
|
|
@ -154,6 +154,13 @@ fn legalize_sm50_instr(
|
|||
copy_src_if_not_reg(b, &mut op.srcs[1], RegFile::GPR);
|
||||
copy_src_if_not_reg(b, &mut op.srcs[2], RegFile::GPR);
|
||||
}
|
||||
Op::IMul(op) => {
|
||||
let [ref mut src0, ref mut src1] = op.srcs;
|
||||
if swap_srcs_if_not_reg(src0, src1) {
|
||||
op.signed.swap(0, 1);
|
||||
}
|
||||
copy_src_if_not_reg(b, src0, RegFile::GPR);
|
||||
}
|
||||
Op::F2I(op) => {
|
||||
copy_src_if_not_reg(b, &mut op.src, RegFile::GPR);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue