From 8401a6084098a1be9899d42aeaf404448f4e6941 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Fri, 18 Apr 2025 18:38:27 -0500 Subject: [PATCH] nak/sm20: Add double ops Part-of: --- src/nouveau/compiler/nak/sm20.rs | 150 ++++++++++++++++++++++++++++++- 1 file changed, 149 insertions(+), 1 deletion(-) diff --git a/src/nouveau/compiler/nak/sm20.rs b/src/nouveau/compiler/nak/sm20.rs index 01773e9df78..fcb09c9d694 100644 --- a/src/nouveau/compiler/nak/sm20.rs +++ b/src/nouveau/compiler/nak/sm20.rs @@ -156,7 +156,6 @@ impl AluSrc { } #[repr(u8)] -#[allow(dead_code)] #[derive(Clone, Copy, Eq, Hash, PartialEq)] enum SM20Unit { Float = 0, @@ -890,6 +889,150 @@ impl SM20Op for OpFSwz { } } +impl SM20Op for OpDAdd { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + let [src0, src1] = &mut self.srcs; + swap_srcs_if_not_reg(src0, src1, GPR); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64); + b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64); + } + + fn encode(&self, e: &mut SM20Encoder<'_>) { + e.encode_form_a( + SM20Unit::Double, + 0x12, + Some(&self.dst), + Some(&self.srcs[0]), + Some(&self.srcs[1]), + None, + ); + e.set_bit(6, self.srcs[1].src_mod.has_fabs()); + e.set_bit(7, self.srcs[0].src_mod.has_fabs()); + e.set_bit(8, self.srcs[1].src_mod.has_fneg()); + e.set_bit(9, self.srcs[0].src_mod.has_fneg()); + e.set_rnd_mode(55..57, self.rnd_mode); + } +} + +impl SM20Op for OpDFma { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + let [src0, src1, src2] = &mut self.srcs; + b.copy_alu_src_if_fabs(src0, GPR, SrcType::F64); + b.copy_alu_src_if_fabs(src1, GPR, SrcType::F64); + b.copy_alu_src_if_fabs(src2, GPR, SrcType::F64); + swap_srcs_if_not_reg(src0, src1, GPR); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64); + b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64); + b.copy_alu_src_if_not_reg(src2, GPR, SrcType::F64); + } + + fn encode(&self, e: &mut SM20Encoder<'_>) { + assert!(!self.srcs[0].src_mod.has_fabs()); + assert!(!self.srcs[1].src_mod.has_fabs()); + assert!(!self.srcs[2].src_mod.has_fabs()); + + e.encode_form_a( + SM20Unit::Double, + 0x8, + Some(&self.dst), + Some(&self.srcs[0]), + Some(&self.srcs[1]), + Some(&self.srcs[2]), + ); + e.set_bit(8, self.srcs[2].src_mod.has_fneg()); + let neg0 = self.srcs[0].src_mod.has_fneg(); + let neg1 = self.srcs[1].src_mod.has_fneg(); + e.set_bit(9, neg0 ^ neg1); + e.set_rnd_mode(55..57, self.rnd_mode); + } +} + +impl SM20Op for OpDMnMx { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + let [src0, src1] = &mut self.srcs; + swap_srcs_if_not_reg(src0, src1, GPR); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64); + b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64); + } + + fn encode(&self, e: &mut SM20Encoder<'_>) { + e.encode_form_a( + SM20Unit::Double, + 0x2, + Some(&self.dst), + Some(&self.srcs[0]), + Some(&self.srcs[1]), + None, + ); + e.set_bit(6, self.srcs[1].src_mod.has_fabs()); + e.set_bit(7, self.srcs[0].src_mod.has_fabs()); + e.set_bit(8, self.srcs[1].src_mod.has_fneg()); + e.set_bit(9, self.srcs[0].src_mod.has_fneg()); + e.set_pred_src(49..53, self.min); + } +} + +impl SM20Op for OpDMul { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + let [src0, src1] = &mut self.srcs; + swap_srcs_if_not_reg(src0, src1, GPR); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64); + b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64); + } + + fn encode(&self, e: &mut SM20Encoder<'_>) { + assert!(!self.srcs[0].src_mod.has_fabs()); + assert!(!self.srcs[1].src_mod.has_fabs()); + + e.encode_form_a( + SM20Unit::Double, + 0x14, + Some(&self.dst), + Some(&self.srcs[0]), + Some(&self.srcs[1]), + None, + ); + let neg0 = self.srcs[0].src_mod.has_fneg(); + let neg1 = self.srcs[1].src_mod.has_fneg(); + e.set_bit(9, neg0 ^ neg1); + e.set_rnd_mode(55..57, self.rnd_mode); + } +} + +impl SM20Op for OpDSetP { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + use RegFile::GPR; + let [src0, src1] = &mut self.srcs; + swap_srcs_if_not_reg(src0, src1, GPR); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64); + b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64); + } + + fn encode(&self, e: &mut SM20Encoder<'_>) { + e.encode_form_a( + SM20Unit::Double, + 0x6, + None, + Some(&self.srcs[0]), + Some(&self.srcs[1]), + None, + ); + e.set_bit(6, self.srcs[1].src_mod.has_fabs()); + e.set_bit(7, self.srcs[0].src_mod.has_fabs()); + e.set_bit(8, self.srcs[1].src_mod.has_fneg()); + e.set_bit(9, self.srcs[0].src_mod.has_fneg()); + e.set_pred_dst(14..17, Dst::None); + e.set_pred_dst(17..20, self.dst); + e.set_pred_src(49..53, self.accum); + e.set_pred_set_op(53..55, self.set_op); + e.set_float_cmp_op(55..59, self.cmp_op); + } +} + impl SM20Op for OpBfe { fn legalize(&mut self, b: &mut LegalizeBuilder) { use RegFile::GPR; @@ -2491,6 +2634,11 @@ macro_rules! as_sm20_op_match { Op::FSet(op) => op, Op::FSetP(op) => op, Op::FSwz(op) => op, + Op::DAdd(op) => op, + Op::DFma(op) => op, + Op::DMnMx(op) => op, + Op::DMul(op) => op, + Op::DSetP(op) => op, Op::Bfe(op) => op, Op::Flo(op) => op, Op::IAdd2(op) => op,