diff --git a/src/nouveau/compiler/nak/hw_tests.rs b/src/nouveau/compiler/nak/hw_tests.rs index 14ca3b30c7c..d6075c03c75 100644 --- a/src/nouveau/compiler/nak/hw_tests.rs +++ b/src/nouveau/compiler/nak/hw_tests.rs @@ -332,7 +332,11 @@ pub fn test_foldable_op_with( fold_src.push(FoldData::U32(0)); } SrcType::F64 => { - todo!("Double ops aren't tested yet"); + let data = b.ld_test_data(comps * 4, MemType::B64); + comps += 2; + + src.src_ref = data.into(); + fold_src.push(FoldData::Vec2([0, 0])); } SrcType::Pred => { let data = b.ld_test_data(comps * 4, MemType::B32); @@ -689,6 +693,20 @@ fn test_op_iadd3x() { } } +#[test] +fn test_op_imnmx() { + for cmp_type in [IntCmpType::U32, IntCmpType::I32] { + let op = OpIMnMx { + dst: Dst::None, + srcs: [0.into(), 0.into()], + min: false.into(), + cmp_type, + }; + + test_foldable_op(op); + } +} + #[test] fn test_op_isetp() { let set_ops = [PredSetOp::And, PredSetOp::Or, PredSetOp::Xor]; @@ -1180,6 +1198,41 @@ fn test_iadd64() { } } +#[test] +fn test_op_dsetp() { + let set_ops = [PredSetOp::And, PredSetOp::Or, PredSetOp::Xor]; + let cmp_ops = [ + FloatCmpOp::OrdEq, + FloatCmpOp::OrdNe, + FloatCmpOp::OrdLt, + FloatCmpOp::OrdLe, + FloatCmpOp::OrdGt, + FloatCmpOp::OrdGe, + FloatCmpOp::UnordEq, + FloatCmpOp::UnordNe, + FloatCmpOp::UnordLt, + FloatCmpOp::UnordLe, + FloatCmpOp::UnordGt, + FloatCmpOp::UnordGe, + FloatCmpOp::IsNum, + FloatCmpOp::IsNan, + ]; + + for set_op in set_ops { + for cmp_op in cmp_ops { + let op = OpDSetP { + dst: Dst::None, + set_op, + cmp_op, + srcs: [0.into(), 0.into()], + accum: true.into(), + }; + + test_foldable_op(op); + } + } +} + #[test] fn test_op_suclamp() { if !RunSingleton::get().sm.is_kepler() { diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index c77c06b98e7..04db3dd4834 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -3109,7 +3109,7 @@ impl DisplayOp for OpDMnMx { impl_display_for_op!(OpDMnMx); #[repr(C)] -#[derive(SrcsAsSlice, DstsAsSlice)] +#[derive(Clone, SrcsAsSlice, DstsAsSlice)] pub struct OpDSetP { #[dst_type(Pred)] pub dst: Dst, @@ -3124,6 +3124,35 @@ pub struct OpDSetP { pub accum: Src, } +impl Foldable for OpDSetP { + fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) { + let a = f.get_f64_src(self, &self.srcs[0]); + let b = f.get_f64_src(self, &self.srcs[1]); + let accum = f.get_pred_src(self, &self.accum); + + let ordered = !a.is_nan() && !b.is_nan(); + let cmp_res = match self.cmp_op { + FloatCmpOp::OrdEq => ordered && a == b, + FloatCmpOp::OrdNe => ordered && a != b, + FloatCmpOp::OrdLt => ordered && a < b, + FloatCmpOp::OrdLe => ordered && a <= b, + FloatCmpOp::OrdGt => ordered && a > b, + FloatCmpOp::OrdGe => ordered && a >= b, + FloatCmpOp::UnordEq => !ordered || a == b, + FloatCmpOp::UnordNe => !ordered || a != b, + FloatCmpOp::UnordLt => !ordered || a < b, + FloatCmpOp::UnordLe => !ordered || a <= b, + FloatCmpOp::UnordGt => !ordered || a > b, + FloatCmpOp::UnordGe => !ordered || a >= b, + FloatCmpOp::IsNum => ordered, + FloatCmpOp::IsNan => !ordered, + }; + let res = self.set_op.eval(cmp_res, accum); + + f.set_pred_dst(self, &self.dst, res); + } +} + impl DisplayOp for OpDSetP { fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "dsetp{}", self.cmp_op)?; @@ -3766,7 +3795,7 @@ impl DisplayOp for OpIMad64 { impl_display_for_op!(OpIMad64); #[repr(C)] -#[derive(SrcsAsSlice, DstsAsSlice)] +#[derive(Clone, SrcsAsSlice, DstsAsSlice)] pub struct OpIMnMx { #[dst_type(GPR)] pub dst: Dst, @@ -3780,6 +3809,25 @@ pub struct OpIMnMx { pub min: Src, } +impl Foldable for OpIMnMx { + fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) { + let (a, b) = ( + f.get_u32_bnot_src(self, &self.srcs[0]), + f.get_u32_bnot_src(self, &self.srcs[1]), + ); + let min = f.get_pred_src(self, &self.min); + + let res = match (min, self.cmp_type) { + (true, IntCmpType::U32) => a.min(b), + (true, IntCmpType::I32) => (a as i32).min(b as i32) as u32, + (false, IntCmpType::U32) => a.max(b), + (false, IntCmpType::I32) => (a as i32).max(b as i32) as u32, + }; + + f.set_u32_dst(self, &self.dst, res); + } +} + impl DisplayOp for OpIMnMx { fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(