From 394bd770bccababa84f3f96e04726cf80a2f2a45 Mon Sep 17 00:00:00 2001 From: Daniel Almeida Date: Fri, 2 Jun 2023 19:38:58 -0300 Subject: [PATCH] nak: add support for floor, ceil and trunc These instructions are not supported and this shows when running the CTS. Add support for them. Signed-off-by: Daniel Almeida Part-of: --- src/nouveau/compiler/nak_encode_sm75.rs | 21 +++++++++++ src/nouveau/compiler/nak_from_nir.rs | 20 +++++++++++ src/nouveau/compiler/nak_ir.rs | 47 ++++++++++++++++++++++++- src/nouveau/compiler/nak_legalize.rs | 4 ++- 4 files changed, 90 insertions(+), 2 deletions(-) diff --git a/src/nouveau/compiler/nak_encode_sm75.rs b/src/nouveau/compiler/nak_encode_sm75.rs index afdd9d4d24c..055d59fbbd5 100644 --- a/src/nouveau/compiler/nak_encode_sm75.rs +++ b/src/nouveau/compiler/nak_encode_sm75.rs @@ -756,6 +756,26 @@ impl SM75Instr { self.set_field(84..86, (op.src_type.bits() / 8).ilog2()); } + fn encode_frnd(&mut self, op: &OpFRnd) { + let opcode = match (op.src_type, op.dst_type) { + (FloatType::F64, FloatType::F64) => 0x113, + _ => 0x107, + }; + + self.encode_alu( + opcode, + Some(op.dst), + ALUSrc::None, + ALUSrc::from_src(&op.src.into()), + ALUSrc::None, + ); + + self.set_field(84..86, (op.src_type.bits() / 8).ilog2()); + self.set_bit(80, false); // TODO: FMZ + self.set_rnd_mode(78..80, op.rnd_mode); + self.set_field(75..77, (op.dst_type.bits() / 8).ilog2()); + } + fn encode_mov(&mut self, op: &OpMov) { self.encode_alu( 0x002, @@ -1493,6 +1513,7 @@ impl SM75Instr { Op::F2F(op) => si.encode_f2f(&op), Op::F2I(op) => si.encode_f2i(&op), Op::I2F(op) => si.encode_i2f(&op), + Op::FRnd(op) => si.encode_frnd(&op), Op::Mov(op) => si.encode_mov(&op), Op::Sel(op) => si.encode_sel(&op), Op::PLop3(op) => si.encode_plop3(&op), diff --git a/src/nouveau/compiler/nak_from_nir.rs b/src/nouveau/compiler/nak_from_nir.rs index e9297a8495c..6caffc64d7b 100644 --- a/src/nouveau/compiler/nak_from_nir.rs +++ b/src/nouveau/compiler/nak_from_nir.rs @@ -243,6 +243,26 @@ impl<'a> ShaderFromNir<'a> { }); dst } + nir_op_fceil | nir_op_ffloor | nir_op_fround_even + | nir_op_ftrunc => { + let dst = b.alloc_ssa(RegFile::GPR, 1); + let ty = FloatType::from_bits(alu.def.bit_size().into()); + let rnd_mode = match alu.op { + nir_op_fceil => FRndMode::PosInf, + nir_op_ffloor => FRndMode::NegInf, + nir_op_ftrunc => FRndMode::Zero, + nir_op_fround_even => FRndMode::NearestEven, + _ => unreachable!(), + }; + b.push_op(OpFRnd { + dst: dst.into(), + src: srcs[0], + src_type: ty, + dst_type: ty, + rnd_mode, + }); + dst + } nir_op_fcos => { let frac_1_2pi = 1.0 / (2.0 * std::f32::consts::PI); let tmp = diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs index 1e723b88ca5..81e8cc75b4c 100644 --- a/src/nouveau/compiler/nak_ir.rs +++ b/src/nouveau/compiler/nak_ir.rs @@ -1105,6 +1105,7 @@ impl fmt::Display for LogicOp { } } +#[derive(Clone, Copy, Eq, Hash, PartialEq)] pub enum FloatType { F16, F32, @@ -2106,6 +2107,47 @@ impl fmt::Display for OpI2F { } } +#[repr(C)] +#[derive(DstsAsSlice)] +pub struct OpFRnd { + pub dst: Dst, + + pub src: Src, + + pub dst_type: FloatType, + pub src_type: FloatType, + pub rnd_mode: FRndMode, +} + +impl SrcsAsSlice for OpFRnd { + fn srcs_as_slice(&self) -> &[Src] { + std::slice::from_ref(&self.src) + } + + fn srcs_as_mut_slice(&mut self) -> &mut [Src] { + std::slice::from_mut(&mut self.src) + } + + fn src_types(&self) -> SrcTypeList { + let src_type = match self.src_type { + FloatType::F16 => unimplemented!(), + FloatType::F32 => SrcType::F32, + FloatType::F64 => SrcType::F64, + }; + SrcTypeList::Uniform(src_type) + } +} + +impl fmt::Display for OpFRnd { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "FRND.{}.{}.{} {} {}", + self.dst_type, self.src_type, self.rnd_mode, self.dst, self.src, + ) + } +} + #[repr(C)] #[derive(SrcsAsSlice, DstsAsSlice)] pub struct OpMov { @@ -3060,6 +3102,7 @@ pub enum Op { F2F(OpF2F), F2I(OpF2I), I2F(OpI2F), + FRnd(OpFRnd), Mov(OpMov), Sel(OpSel), PLop3(OpPLop3), @@ -3374,7 +3417,9 @@ impl Instr { | Op::PLop3(_) | Op::ISetP(_) | Op::Shf(_) => Some(6), - Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::Mov(_) => Some(15), + Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::Mov(_) | Op::FRnd(_) => { + Some(15) + } Op::Sel(_) => Some(15), Op::S2R(_) => None, Op::ALd(_) => None, diff --git a/src/nouveau/compiler/nak_legalize.rs b/src/nouveau/compiler/nak_legalize.rs index c5a1547363c..3aa5d8278a5 100644 --- a/src/nouveau/compiler/nak_legalize.rs +++ b/src/nouveau/compiler/nak_legalize.rs @@ -182,7 +182,9 @@ impl<'a> LegalizeInstr<'a> { self.mov_src_if_not_reg(&mut op.low, RegFile::GPR); self.mov_src_if_not_reg(&mut op.high, RegFile::GPR); } - Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::Mov(_) => (), + Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::Mov(_) | Op::FRnd(_) => { + () + } Op::Sel(op) => { let [ref mut src0, ref mut src1] = op.srcs; if !src_is_reg(src0) && src_is_reg(src1) {