mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 20:38:06 +02:00
nak/sm50: Add encoding and legalization for dadd/dfma/dmul/dsetp
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26587>
This commit is contained in:
parent
1f5623c557
commit
17d2b2f2cc
2 changed files with 202 additions and 0 deletions
|
|
@ -1626,6 +1626,130 @@ impl SM50Instr {
|
|||
);
|
||||
}
|
||||
|
||||
fn encode_dadd(&mut self, op: &OpDAdd) {
|
||||
match &op.srcs[1].src_ref {
|
||||
SrcRef::Zero | SrcRef::Reg(_) => {
|
||||
self.set_opcode(0x5c70);
|
||||
self.set_reg_fmod_src(20..28, 49, 45, op.srcs[1]);
|
||||
}
|
||||
SrcRef::Imm32(imm) => {
|
||||
self.set_opcode(0x3870);
|
||||
self.set_src_imm_f20(20..39, 56, *imm);
|
||||
assert!(op.srcs[1].src_mod.is_none());
|
||||
}
|
||||
SrcRef::CBuf(_) => {
|
||||
self.set_opcode(0x4c70);
|
||||
self.set_cb_fmod_src(20..39, 49, 45, op.srcs[1]);
|
||||
}
|
||||
_ => panic!("Unsupported src type"),
|
||||
}
|
||||
|
||||
self.set_dst(op.dst);
|
||||
self.set_reg_fmod_src(8..16, 46, 48, op.srcs[0]);
|
||||
self.set_rnd_mode(39..41, op.rnd_mode);
|
||||
}
|
||||
|
||||
fn encode_dfma(&mut self, op: &OpDFma) {
|
||||
match &op.srcs[2].src_ref {
|
||||
SrcRef::Zero | SrcRef::Reg(_) => {
|
||||
match &op.srcs[1].src_ref {
|
||||
SrcRef::Zero | SrcRef::Reg(_) => {
|
||||
self.set_opcode(0x5b70);
|
||||
self.set_reg_src_ref(20..28, op.srcs[1].src_ref);
|
||||
}
|
||||
SrcRef::Imm32(imm) => {
|
||||
self.set_opcode(0x3670);
|
||||
self.set_src_imm_f20(20..39, 56, *imm);
|
||||
assert!(op.srcs[1].src_mod.is_none());
|
||||
}
|
||||
SrcRef::CBuf(cb) => {
|
||||
self.set_opcode(0x4b70);
|
||||
self.set_src_cb(20..39, cb);
|
||||
}
|
||||
_ => panic!("Invalid dfma src1: {}", op.srcs[1]),
|
||||
}
|
||||
self.set_reg_src_ref(39..47, op.srcs[2].src_ref);
|
||||
}
|
||||
SrcRef::CBuf(cb) => {
|
||||
self.set_opcode(0x5370);
|
||||
self.set_reg_src_ref(39..47, op.srcs[1].src_ref);
|
||||
self.set_src_cb(20..39, cb);
|
||||
}
|
||||
_ => panic!("Invalid dfma src2: {}", op.srcs[2]),
|
||||
}
|
||||
|
||||
self.set_dst(op.dst);
|
||||
self.set_reg_src_ref(8..16, op.srcs[0].src_ref);
|
||||
|
||||
assert!(!op.srcs[0].src_mod.has_fabs());
|
||||
assert!(!op.srcs[1].src_mod.has_fabs());
|
||||
assert!(!op.srcs[2].src_mod.has_fabs());
|
||||
self.set_bit(
|
||||
48,
|
||||
op.srcs[0].src_mod.has_fneg() ^ op.srcs[1].src_mod.has_fneg(),
|
||||
);
|
||||
self.set_bit(49, op.srcs[2].src_mod.has_fneg());
|
||||
|
||||
self.set_rnd_mode(50..52, op.rnd_mode);
|
||||
}
|
||||
|
||||
fn encode_dmul(&mut self, op: &OpDMul) {
|
||||
match &op.srcs[1].src_ref {
|
||||
SrcRef::Zero | SrcRef::Reg(_) => {
|
||||
self.set_opcode(0x5c80);
|
||||
self.set_reg_src_ref(20..28, op.srcs[1].src_ref);
|
||||
}
|
||||
SrcRef::Imm32(imm) => {
|
||||
self.set_opcode(0x3880);
|
||||
self.set_src_imm_f20(20..39, 56, *imm);
|
||||
assert!(op.srcs[1].src_mod.is_none());
|
||||
}
|
||||
SrcRef::CBuf(cb) => {
|
||||
self.set_opcode(0x4c80);
|
||||
self.set_src_cb(20..39, cb);
|
||||
}
|
||||
_ => panic!("Invalid dmul src1: {}", op.srcs[1]),
|
||||
}
|
||||
|
||||
self.set_dst(op.dst);
|
||||
self.set_reg_src_ref(8..16, op.srcs[0].src_ref);
|
||||
|
||||
self.set_rnd_mode(39..41, op.rnd_mode);
|
||||
|
||||
assert!(!op.srcs[0].src_mod.has_fabs());
|
||||
assert!(!op.srcs[1].src_mod.has_fabs());
|
||||
self.set_bit(
|
||||
48,
|
||||
op.srcs[0].src_mod.has_fneg() ^ op.srcs[1].src_mod.has_fneg(),
|
||||
);
|
||||
}
|
||||
|
||||
fn encode_dsetp(&mut self, op: &OpDSetP) {
|
||||
match &op.srcs[1].src_ref {
|
||||
SrcRef::Zero | SrcRef::Reg(_) => {
|
||||
self.set_opcode(0x5b80);
|
||||
self.set_reg_fmod_src(20..28, 44, 6, op.srcs[1]);
|
||||
}
|
||||
SrcRef::Imm32(imm) => {
|
||||
self.set_opcode(0x3680);
|
||||
self.set_src_imm_f20(20..39, 56, *imm);
|
||||
assert!(op.srcs[1].src_mod.is_none());
|
||||
}
|
||||
SrcRef::CBuf(_) => {
|
||||
self.set_opcode(0x4b80);
|
||||
self.set_reg_fmod_src(20..39, 44, 6, op.srcs[1]);
|
||||
}
|
||||
_ => panic!("Invalid dmul src1: {}", op.srcs[1]),
|
||||
}
|
||||
|
||||
self.set_pred_dst(3..6, op.dst);
|
||||
self.set_pred_dst(0..3, Dst::None); // dst1
|
||||
self.set_pred_src(39..42, 42, op.accum);
|
||||
self.set_pred_set_op(45..47, op.set_op);
|
||||
self.set_float_cmp_op(48..52, op.cmp_op);
|
||||
self.set_reg_fmod_src(8..16, 7, 43, op.srcs[0]);
|
||||
}
|
||||
|
||||
fn encode_iabs(&mut self, op: &OpIAbs) {
|
||||
assert!(op.src.is_reg_or_zero());
|
||||
|
||||
|
|
@ -1748,6 +1872,10 @@ impl SM50Instr {
|
|||
Op::FSet(op) => si.encode_fset(&op),
|
||||
Op::FSetP(op) => si.encode_fsetp(&op),
|
||||
Op::MuFu(op) => si.encode_mufu(&op),
|
||||
Op::DAdd(op) => si.encode_dadd(&op),
|
||||
Op::DFma(op) => si.encode_dfma(&op),
|
||||
Op::DMul(op) => si.encode_dmul(&op),
|
||||
Op::DSetP(op) => si.encode_dsetp(&op),
|
||||
Op::IAbs(op) => si.encode_iabs(&op),
|
||||
Op::IAdd2(op) => si.encode_iadd2(&op),
|
||||
Op::Mov(op) => si.encode_mov(&op),
|
||||
|
|
|
|||
|
|
@ -131,6 +131,48 @@ fn copy_alu_src_if_i20_overflow(
|
|||
}
|
||||
}
|
||||
|
||||
fn copy_alu_src_if_f20_overflow(
|
||||
b: &mut impl SSABuilder,
|
||||
src: &mut Src,
|
||||
src_type: SrcType,
|
||||
) {
|
||||
if src.as_imm_not_f20().is_some() {
|
||||
copy_alu_src(b, src, src_type);
|
||||
}
|
||||
}
|
||||
|
||||
fn copy_alu_src_if_fabs(
|
||||
b: &mut impl SSABuilder,
|
||||
src: &mut Src,
|
||||
src_type: SrcType,
|
||||
) {
|
||||
if src.src_mod.has_fabs() {
|
||||
match src_type {
|
||||
SrcType::F32 => {
|
||||
let val = b.alloc_ssa(RegFile::GPR, 1);
|
||||
b.push_op(OpFAdd {
|
||||
dst: val.into(),
|
||||
srcs: [Src::new_zero().fneg(), *src],
|
||||
saturate: false,
|
||||
rnd_mode: FRndMode::NearestEven,
|
||||
ftz: false,
|
||||
});
|
||||
*src = val.into();
|
||||
}
|
||||
SrcType::F64 => {
|
||||
let val = b.alloc_ssa(RegFile::GPR, 2);
|
||||
b.push_op(OpDAdd {
|
||||
dst: val.into(),
|
||||
srcs: [Src::new_zero().fneg(), *src],
|
||||
rnd_mode: FRndMode::NearestEven,
|
||||
});
|
||||
*src = val.into();
|
||||
}
|
||||
_ => panic!("Invalid ffabs srouce type"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn legalize_sm50_instr(
|
||||
b: &mut impl SSABuilder,
|
||||
_bl: &impl BlockLiveness,
|
||||
|
|
@ -183,6 +225,38 @@ fn legalize_sm50_instr(
|
|||
Op::MuFu(op) => {
|
||||
copy_alu_src_if_not_reg(b, &mut op.src, SrcType::GPR);
|
||||
}
|
||||
Op::DAdd(op) => {
|
||||
let [ref mut src0, ref mut src1] = op.srcs;
|
||||
swap_srcs_if_not_reg(src0, src1);
|
||||
copy_alu_src_if_not_reg(b, src0, SrcType::F64);
|
||||
copy_alu_src_if_f20_overflow(b, src1, SrcType::F64);
|
||||
}
|
||||
Op::DFma(op) => {
|
||||
let [ref mut src0, ref mut src1, ref mut src2] = op.srcs;
|
||||
copy_alu_src_if_fabs(b, src0, SrcType::F64);
|
||||
copy_alu_src_if_fabs(b, src1, SrcType::F64);
|
||||
copy_alu_src_if_fabs(b, src2, SrcType::F64);
|
||||
swap_srcs_if_not_reg(src0, src1);
|
||||
copy_alu_src_if_not_reg(b, src0, SrcType::F64);
|
||||
copy_alu_src_if_f20_overflow(b, src1, SrcType::F64);
|
||||
copy_alu_src_if_not_reg(b, src2, SrcType::F64);
|
||||
}
|
||||
Op::DMul(op) => {
|
||||
let [ref mut src0, ref mut src1] = op.srcs;
|
||||
copy_alu_src_if_fabs(b, src0, SrcType::F64);
|
||||
copy_alu_src_if_fabs(b, src1, SrcType::F64);
|
||||
swap_srcs_if_not_reg(src0, src1);
|
||||
copy_alu_src_if_not_reg(b, src0, SrcType::F64);
|
||||
copy_alu_src_if_f20_overflow(b, src1, SrcType::F64);
|
||||
}
|
||||
Op::DSetP(op) => {
|
||||
let [ref mut src0, ref mut src1] = op.srcs;
|
||||
if swap_srcs_if_not_reg(src0, src1) {
|
||||
op.cmp_op = op.cmp_op.flip();
|
||||
}
|
||||
copy_alu_src_if_not_reg(b, src0, SrcType::F64);
|
||||
copy_alu_src_if_f20_overflow(b, src1, SrcType::F64);
|
||||
}
|
||||
Op::IAbs(op) => {
|
||||
copy_alu_src_if_not_reg(b, &mut op.src, SrcType::GPR);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue