nak: use rro when emitting mufu on SM50

Fixes dEQP-VK.glsl.builtin.precision.*, which was previously failing for trig functions, exp, and pow. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27203>
2025-12-25 00:00:11 +01:00 · 2024-01-22 11:25:16 -08:00 · 2024-01-22 11:25:16 -08:00 · c3fbd0dcb1
commit c3fbd0dcb1
parent 4a0f5fff87
2 changed files with 50 additions and 11 deletions
--- a/src/nouveau/compiler/nak/builder.rs
+++ b/src/nouveau/compiler/nak/builder.rs
@ -456,6 +456,53 @@ pub trait SSABuilder: Builder {
        dst
    }

+    fn fsin(&mut self, src: Src) -> SSARef {
+        let tmp = if self.sm() >= 70 {
+            let frac_1_2pi = 1.0 / (2.0 * std::f32::consts::PI);
+            self.fmul(src, frac_1_2pi.into())
+        } else {
+            let tmp = self.alloc_ssa(RegFile::GPR, 1);
+            self.push_op(OpRro {
+                dst: tmp.into(),
+                op: RroOp::SinCos,
+                src,
+            });
+            tmp
+        };
+        self.mufu(MuFuOp::Sin, tmp.into())
+    }
+
+    fn fcos(&mut self, src: Src) -> SSARef {
+        let tmp = if self.sm() >= 70 {
+            let frac_1_2pi = 1.0 / (2.0 * std::f32::consts::PI);
+            self.fmul(src, frac_1_2pi.into())
+        } else {
+            let tmp = self.alloc_ssa(RegFile::GPR, 1);
+            self.push_op(OpRro {
+                dst: tmp.into(),
+                op: RroOp::SinCos,
+                src,
+            });
+            tmp
+        };
+        self.mufu(MuFuOp::Cos, tmp.into())
+    }
+
+    fn fexp2(&mut self, src: Src) -> SSARef {
+        let tmp = if self.sm() >= 70 {
+            src
+        } else {
+            let tmp = self.alloc_ssa(RegFile::GPR, 1);
+            self.push_op(OpRro {
+                dst: tmp.into(),
+                op: RroOp::Exp2,
+                src,
+            });
+            tmp.into()
+        };
+        self.mufu(MuFuOp::Exp2, tmp)
+    }
+
    fn prmt(&mut self, x: Src, y: Src, sel: [u8; 4]) -> SSARef {
        let dst = self.alloc_ssa(RegFile::GPR, 1);
        self.prmt_to(dst.into(), x, y, sel);
--- a/src/nouveau/compiler/nak/from_nir.rs
+++ b/src/nouveau/compiler/nak/from_nir.rs
@ -714,11 +714,7 @@ impl<'a> ShaderFromNir<'a> {
                }
                dst
            }
-            nir_op_fcos => {
-                let frac_1_2pi = 1.0 / (2.0 * std::f32::consts::PI);
-                let tmp = b.fmul(srcs[0], frac_1_2pi.into());
-                b.mufu(MuFuOp::Cos, tmp.into())
-            }
+            nir_op_fcos => b.fcos(srcs[0]),
            nir_op_feq | nir_op_fge | nir_op_flt | nir_op_fneu => {
                let src_type =
                    FloatType::from_bits(alu.get_src(0).bit_size().into());
@ -756,7 +752,7 @@ impl<'a> ShaderFromNir<'a> {
                }
                dst
            }
-            nir_op_fexp2 => b.mufu(MuFuOp::Exp2, srcs[0]),
+            nir_op_fexp2 => b.fexp2(srcs[0]),
            nir_op_ffma => {
                let ftype = FloatType::from_bits(alu.def.bit_size().into());
                let dst;
@ -938,11 +934,7 @@ impl<'a> ShaderFromNir<'a> {
                    panic!("Unsupported float type: f{}", alu.def.bit_size());
                }
            }
-            nir_op_fsin => {
-                let frac_1_2pi = 1.0 / (2.0 * std::f32::consts::PI);
-                let tmp = b.fmul(srcs[0], frac_1_2pi.into());
-                b.mufu(MuFuOp::Sin, tmp.into())
-            }
+            nir_op_fsin => b.fsin(srcs[0]),
            nir_op_fsqrt => b.mufu(MuFuOp::Sqrt, srcs[0]),
            nir_op_i2f16 | nir_op_i2f32 | nir_op_i2f64 => {
                let src_bits = alu.get_src(0).src.bit_size();