diff --git a/src/nouveau/compiler/nak/hw_tests.rs b/src/nouveau/compiler/nak/hw_tests.rs index dce2aa6d9e8..bb6e6d8d5aa 100644 --- a/src/nouveau/compiler/nak/hw_tests.rs +++ b/src/nouveau/compiler/nak/hw_tests.rs @@ -2009,3 +2009,82 @@ fn test_op_sgxt() { }); } } + +#[test] +fn test_op_mufu_f16_down() { + let run = &RunSingleton::get(); + if run.sm.sm() < 73 { + return; + } + + for op in [ + MuFuOp::Cos, + MuFuOp::Sin, + MuFuOp::Exp2, + MuFuOp::Log2, + MuFuOp::Rcp, + MuFuOp::Rsq, + MuFuOp::Sqrt, + MuFuOp::Tanh, + ] { + let mut b = TestShaderBuilder::new(&run.sm); + let src: Src = b.ld_test_data(0, MemType::B32).into(); + let up_convert = b.alloc_ssa(RegFile::GPR); + let mufu = b.alloc_ssa(RegFile::GPR); + let dst = b.alloc_ssa(RegFile::GPR); + let dst16 = b.alloc_ssa(RegFile::GPR); + + b.push_op(OpF2F { + dst: up_convert.into(), + src: src.clone(), + src_type: FloatType::F16, + dst_type: FloatType::F32, + ftz: false, + rnd_mode: FRndMode::NearestEven, + integer_rnd: false, + }); + b.push_op(OpMuFu { + dst: mufu.into(), + src: up_convert.into(), + op: op, + op_type: FloatType::F32, + }); + b.push_op(OpMuFu { + dst: dst16.into(), + src: src, + op: op, + op_type: FloatType::F16, + }); + b.push_op(OpF2F { + dst: dst.into(), + src: mufu.into(), + src_type: FloatType::F32, + dst_type: FloatType::F16, + ftz: false, + rnd_mode: FRndMode::Zero, + integer_rnd: false, + }); + + b.st_test_data(4, MemType::B32, dst.into()); + b.st_test_data(8, MemType::B32, dst16.into()); + + let bin = b.compile(); + + let mut data = Vec::new(); + for i in 0..=u16::MAX { + data.push([u32::from(i), 0, 0]); + } + + run.run.run(&bin, &mut data).unwrap(); + for [_, fp32, fp16] in data { + // Around infinity we get a _slightly_ different result for Exp2 and Rcp + if fp16 == 0x7c00 && matches!(op, MuFuOp::Exp2 | MuFuOp::Rcp) { + assert!(fp32 == 0x7c00 || fp32 == 0x7bff); + } else if fp16 == 0xfc00 && matches!(op, MuFuOp::Rcp) { + assert!(fp32 == 0xfc00 || fp32 == 0xfbff); + } else { + assert_eq!(fp32, fp16); + } + } + } +}