Merge branch 'nir/fsin_fcos_rework' into 'main'

nir: convert fsin_amd and fcos_amd to common code See merge request mesa/mesa!40541
2026-04-13 13:20:36 +02:00 · 2026-03-30 21:36:36 +00:00 · 2026-03-30 21:36:36 +00:00 · 422829653d
commit 422829653d
parent 1b6ed1b34e eda8d42f94
20 changed files with 60 additions and 74 deletions
--- a/src/amd/common/meson.build
+++ b/src/amd/common/meson.build
@ -172,7 +172,6 @@ amd_common_files = files(
  'nir/ac_nir_lower_ngg_mesh.c',
  'nir/ac_nir_lower_ps_early.c',
  'nir/ac_nir_lower_ps_late.c',
-  'nir/ac_nir_lower_sin_cos.c',
  'nir/ac_nir_meta.h',
  'nir/ac_nir_meta_cs_blit.c',
  'nir/ac_nir_meta_cs_clear_copy_buffer.c',
--- a/src/amd/common/nir/ac_nir.h
+++ b/src/amd/common/nir/ac_nir.h
@ -89,8 +89,6 @@ nir_def *
 ac_nir_load_smem(nir_builder *b, unsigned num_components, nir_def *addr, nir_def *offset,
                 unsigned align_mul, enum gl_access_qualifier access);

-bool ac_nir_lower_sin_cos(nir_shader *shader);
-
 typedef struct {
   enum amd_gfx_level gfx_level;
   bool has_ls_vgpr_init_bug;
--- a/src/amd/compiler/instruction_selection/aco_isel_setup.cpp
+++ b/src/amd/compiler/instruction_selection/aco_isel_setup.cpp
@ -514,8 +514,8 @@ init_context(isel_context* ctx, nir_shader* shader)
               case nir_op_fsqrt:
               case nir_op_fexp2:
               case nir_op_flog2:
-               case nir_op_fsin_amd:
-               case nir_op_fcos_amd:
+               case nir_op_fsin_normalized_2_pi:
+               case nir_op_fcos_normalized_2_pi:
               case nir_op_pack_half_2x16_rtz_split:
               case nir_op_pack_half_2x16_split: {
                  if (ctx->program->gfx_level < GFX11_5 ||
--- a/src/amd/compiler/instruction_selection/aco_select_nir_alu.cpp
+++ b/src/amd/compiler/instruction_selection/aco_select_nir_alu.cpp
@ -2512,10 +2512,10 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
      }
      break;
   }
-   case nir_op_fsin_amd:
-   case nir_op_fcos_amd: {
+   case nir_op_fsin_normalized_2_pi:
+   case nir_op_fcos_normalized_2_pi: {
      if (instr->def.bit_size == 16 || instr->def.bit_size == 32) {
-         bool is_sin = instr->op == nir_op_fsin_amd;
+         bool is_sin = instr->op == nir_op_fsin_normalized_2_pi;
         aco_opcode opcode, fract;
         RegClass rc;
         if (instr->def.bit_size == 16) {
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@ -712,12 +712,12 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
   case nir_op_ffract:
      result = emit_fp_intrinsic(&ctx->ac, "llvm.amdgcn.fract", def_type, src[0], NULL, NULL);
      break;
-   case nir_op_fsin_amd:
-   case nir_op_fcos_amd:
+   case nir_op_fsin_normalized_2_pi:
+   case nir_op_fcos_normalized_2_pi:
      /* before GFX9, v_sin_f32 and v_cos_f32 had a valid input domain of [-256, +256] */
      if (ctx->ac.gfx_level < GFX9)
         src[0] = emit_fp_intrinsic(&ctx->ac, "llvm.amdgcn.fract", def_type, src[0], NULL, NULL);
-      result = emit_fp_intrinsic(&ctx->ac, instr->op == nir_op_fsin_amd ? "llvm.amdgcn.sin" : "llvm.amdgcn.cos",
+      result = emit_fp_intrinsic(&ctx->ac, instr->op == nir_op_fsin_normalized_2_pi ? "llvm.amdgcn.sin" : "llvm.amdgcn.cos",
                                 def_type, src[0], NULL, NULL);
      break;
   case nir_op_fsqrt:
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@ -626,7 +626,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st

      NIR_PASS(_, nir, nir_lower_doubles, NULL, lower_doubles);

-      NIR_PASS(_, nir, ac_nir_lower_sin_cos);
+      NIR_PASS(_, nir, nir_normalize_sin_cos);
   }

   if (nir->info.uses_printf)
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@ -252,6 +252,7 @@ else
  'nir_move_output_stores_to_end.c',
  'nir_move_vec_src_uses_to_dest.c',
  'nir_normalize_cubemap_coords.c',
+  'nir_normalize_sin_cos.c',
  'nir_opt_access.c',
  'nir_opt_barriers.c',
  'nir_opt_barycentric.c',
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@ -7082,6 +7082,8 @@ bool nir_unlower_io_to_vars(nir_shader *nir, bool keep_intrinsics);

 bool nir_opt_barycentric(nir_shader *shader, bool lower_sample_to_pos);

+bool nir_normalize_sin_cos(nir_shader *shader);
+
 #include "nir_inline_helpers.h"

 static inline bool
--- a/src/amd/common/nir/ac_nir_lower_sin_cos.c
+++ b/src/amd/common/nir/ac_nir_lower_sin_cos.c
@ -4,13 +4,11 @@
 * SPDX-License-Identifier: MIT
 */

-#include "ac_nir.h"
-#include "ac_nir_helpers.h"
-
+#include "nir.h"
 #include "nir_builder.h"

 static bool
-lower_sin_cos(struct nir_builder *b, nir_alu_instr *sincos, UNUSED void *_)
+normalize_sin_cos(struct nir_builder *b, nir_alu_instr *sincos, UNUSED void *_)
 {
   if (sincos->op != nir_op_fsin && sincos->op != nir_op_fcos)
      return false;
@ -19,14 +17,14 @@ lower_sin_cos(struct nir_builder *b, nir_alu_instr *sincos, UNUSED void *_)
   b->fp_math_ctrl = sincos->fp_math_ctrl;

   nir_def *src = nir_fmul_imm(b, nir_ssa_for_alu_src(b, sincos, 0), 0.15915493667125702);
-   nir_def *replace = sincos->op == nir_op_fsin ? nir_fsin_amd(b, src) : nir_fcos_amd(b, src);
+   nir_def *replace = sincos->op == nir_op_fsin ? nir_fsin_normalized_2_pi(b, src) : nir_fcos_normalized_2_pi(b, src);
   nir_def_replace(&sincos->def, replace);

   return true;
 }

 bool
-ac_nir_lower_sin_cos(nir_shader *shader)
+nir_normalize_sin_cos(nir_shader *shader)
 {
-   return nir_shader_alu_pass(shader, lower_sin_cos, nir_metadata_control_flow, NULL);
+   return nir_shader_alu_pass(shader, normalize_sin_cos, nir_metadata_control_flow, NULL);
 }
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@ -1441,11 +1441,11 @@ unop_horiz("cube_amd", 4, tfloat32, 3, tfloat32, """
   }
 """)

-# r600/gcn specific sin and cos
+# amd/nv specific sin and cos
 # these trigeometric functions need some lowering because the supported
 # input values are expected to be normalized by dividing by (2 * pi)
-unop("fsin_amd", tfloat, "sinf(6.2831853 * src0)")
-unop("fcos_amd", tfloat, "cosf(6.2831853 * src0)")
+unop("fsin_normalized_2_pi", tfloat, "sinf(6.2831853 * src0)")
+unop("fcos_normalized_2_pi", tfloat, "cosf(6.2831853 * src0)")

 opcode("alignbyte_amd", 0, tuint32, [0, 0, 0], [tuint32, tuint32, tuint32], False, "", """
   uint64_t src = src1 | ((uint64_t)src0 << 32);
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@ -3380,7 +3380,7 @@ for op in ['fpow']:
        (('bcsel', a, (op, b, c), (op + '(is_used_once)', d, c)), (op, ('bcsel', a, b, d), c)),
    ]

-for op in ['frcp', 'frsq', 'fsqrt', 'fexp2', 'flog2', 'fsign', 'fsin', 'fcos', 'fsin_amd', 'fcos_amd', 'fsin_mdg', 'fcos_mdg', 'fsin_agx', 'fneg', 'fabs', 'fsign', 'fcanonicalize']:
+for op in ['frcp', 'frsq', 'fsqrt', 'fexp2', 'flog2', 'fsign', 'fsin', 'fcos', 'fsin_normalized_2_pi', 'fcos_normalized_2_pi', 'fsin_mdg', 'fcos_mdg', 'fsin_agx', 'fneg', 'fabs', 'fsign', 'fcanonicalize']:
    optimizations += [
        (('bcsel', c, (op + '(is_used_once)', a), (op + '(is_used_once)', b)), (op, ('bcsel', c, a, b))),
    ]
--- a/src/compiler/nir/nir_opt_fp_math_ctrl.c
+++ b/src/compiler/nir/nir_opt_fp_math_ctrl.c
@ -118,7 +118,7 @@ opt_alu_fp_math_ctrl(nir_alu_instr *alu, struct opt_fp_ctrl_state *state)
      case nir_op_fexp2:
      case nir_op_flog2:
      case nir_op_fcos:
-      case nir_op_fcos_amd:
+      case nir_op_fcos_normalized_2_pi:
      case nir_op_fmulz:
      case nir_op_ffract:
         break;
--- a/src/compiler/nir/nir_opt_varyings.c
+++ b/src/compiler/nir/nir_opt_varyings.c
@ -5151,8 +5151,8 @@ default_varying_estimate_instr_cost(nir_instr *instr)
      case nir_op_fsqrt:
      case nir_op_fsin:
      case nir_op_fcos:
-      case nir_op_fsin_amd:
-      case nir_op_fcos_amd:
+      case nir_op_fsin_normalized_2_pi:
+      case nir_op_fcos_normalized_2_pi:
         /* FP64 is usually much slower. */
         return dst_bit_size == 64 ? 32 : 4;

--- a/src/compiler/nir/nir_range_analysis.c
+++ b/src/compiler/nir/nir_range_analysis.c
@ -805,8 +805,8 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32
      case nir_op_ffract:
      case nir_op_fsin:
      case nir_op_fcos:
-      case nir_op_fsin_amd:
-      case nir_op_fcos_amd:
+      case nir_op_fsin_normalized_2_pi:
+      case nir_op_fcos_normalized_2_pi:
      case nir_op_f2f16:
      case nir_op_f2f16_rtz:
      case nir_op_f2f16_rtne:
@ -1203,8 +1203,8 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32

   case nir_op_fsin:
   case nir_op_fcos:
-   case nir_op_fsin_amd:
-   case nir_op_fcos_amd: {
+   case nir_op_fsin_normalized_2_pi:
+   case nir_op_fcos_normalized_2_pi: {
      /* [-1, +1], and sin/cos(Inf) is NaN */
      r = FP_CLASS_NEG_ONE | FP_CLASS_LT_ZERO_GT_NEG_ONE | FP_CLASS_ANY_ZERO |
          FP_CLASS_GT_ZERO_LT_POS_ONE | FP_CLASS_POS_ONE | FP_CLASS_NON_INTEGRAL;
--- a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp
@ -1672,7 +1672,7 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)

   if (shader.chip_class() == ISA_CC_CAYMAN) {
      switch (alu->op) {
-      case nir_op_fcos_amd:
+      case nir_op_fcos_normalized_2_pi:
         return emit_alu_trans_op1_cayman(*alu, op1_cos, shader);
      case nir_op_fexp2:
         return emit_alu_trans_op1_cayman(*alu, op1_exp_ieee, shader);
@ -1684,7 +1684,7 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
         return emit_alu_trans_op1_cayman(*alu, op1_recipsqrt_ieee1, shader);
      case nir_op_fsqrt:
         return emit_alu_trans_op1_cayman(*alu, op1_sqrt_ieee, shader);
-      case nir_op_fsin_amd:
+      case nir_op_fsin_normalized_2_pi:
         return emit_alu_trans_op1_cayman(*alu, op1_sin, shader);
      case nir_op_i2f32:
         return emit_alu_op1(*alu, op1_int_to_flt, shader);
@ -1746,7 +1746,7 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
         return emit_alu_trans_op1_eg(*alu, op1_flt_to_int, shader);
      case nir_op_f2u32:
         return emit_alu_trans_op1_eg(*alu, op1_flt_to_uint, shader);
-      case nir_op_fcos_amd:
+      case nir_op_fcos_normalized_2_pi:
         return emit_alu_trans_op1_eg(*alu, op1_cos, shader);
      case nir_op_fexp2:
         return emit_alu_trans_op1_eg(*alu, op1_exp_ieee, shader);
@ -1756,7 +1756,7 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
         return emit_alu_trans_op1_eg(*alu, op1_recip_ieee, shader);
      case nir_op_frsq:
         return emit_alu_trans_op1_eg(*alu, op1_recipsqrt_ieee1, shader);
-      case nir_op_fsin_amd:
+      case nir_op_fsin_normalized_2_pi:
         return emit_alu_trans_op1_eg(*alu, op1_sin, shader);
      case nir_op_fsqrt:
         return emit_alu_trans_op1_eg(*alu, op1_sqrt_ieee, shader);
--- a/src/gallium/drivers/r600/sfn/sfn_nir_lower_alu.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_alu.cpp
@ -102,9 +102,9 @@ LowerSinCos::lower(nir_instr *instr)
         : nir_ffma_imm12(b, fract, 2.0f * M_PI, -M_PI);

   if (alu->op == nir_op_fsin)
-      return nir_fsin_amd(b, normalized);
+      return nir_fsin_normalized_2_pi(b, normalized);
   else
-      return nir_fcos_amd(b, normalized);
+      return nir_fcos_normalized_2_pi(b, normalized);
 }

 class FixKcacheIndirectRead : public NirLowerInstruction {
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@ -657,7 +657,7 @@ static void si_preprocess_nir(struct si_nir_shader_ctx *ctx)
   };
   NIR_PASS(progress, nir, nir_lower_image, &lower_image_options);

-   NIR_PASS(progress, nir, ac_nir_lower_sin_cos);
+   NIR_PASS(progress, nir, nir_normalize_sin_cos);
   NIR_PASS(progress, nir, si_nir_lower_intrinsics_early);

   if (nir->info.stage == MESA_SHADER_TASK) {
--- a/src/nouveau/compiler/nak/builder.rs
+++ b/src/nouveau/compiler/nak/builder.rs
@ -289,19 +289,6 @@ pub trait SSABuilder: Builder {
        dst
    }

-    fn fmul(&mut self, x: Src, y: Src) -> SSAValue {
-        let dst = self.alloc_ssa(RegFile::GPR);
-        self.push_op(OpFMul {
-            dst: dst.into(),
-            srcs: [x, y],
-            saturate: false,
-            rnd_mode: FRndMode::NearestEven,
-            ftz: false,
-            dnz: false,
-        });
-        dst
-    }
-
    fn fset(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSAValue {
        let dst = self.alloc_ssa(RegFile::GPR);
        self.push_op(OpFSet {
@ -754,34 +741,24 @@ pub trait SSABuilder: Builder {
    }

    fn fsin(&mut self, src: Src) -> SSAValue {
-        let tmp = if self.sm() >= 70 {
-            let frac_1_2pi = 1.0 / (2.0 * std::f32::consts::PI);
-            self.fmul(src, frac_1_2pi.into())
-        } else {
-            let tmp = self.alloc_ssa(RegFile::GPR);
-            self.push_op(OpRro {
-                dst: tmp.into(),
-                op: RroOp::SinCos,
-                src,
-            });
-            tmp
-        };
+        assert!(self.sm() < 70);
+        let tmp = self.alloc_ssa(RegFile::GPR);
+        self.push_op(OpRro {
+            dst: tmp.into(),
+            op: RroOp::SinCos,
+            src,
+        });
        self.mufu(MuFuOp::Sin, tmp.into())
    }

    fn fcos(&mut self, src: Src) -> SSAValue {
-        let tmp = if self.sm() >= 70 {
-            let frac_1_2pi = 1.0 / (2.0 * std::f32::consts::PI);
-            self.fmul(src, frac_1_2pi.into())
-        } else {
-            let tmp = self.alloc_ssa(RegFile::GPR);
-            self.push_op(OpRro {
-                dst: tmp.into(),
-                op: RroOp::SinCos,
-                src,
-            });
-            tmp
-        };
+        assert!(self.sm() < 70);
+        let tmp = self.alloc_ssa(RegFile::GPR);
+        self.push_op(OpRro {
+            dst: tmp.into(),
+            op: RroOp::SinCos,
+            src,
+        });
        self.mufu(MuFuOp::Cos, tmp.into())
    }

--- a/src/nouveau/compiler/nak/from_nir.rs
+++ b/src/nouveau/compiler/nak/from_nir.rs
@ -982,6 +982,10 @@ impl<'a> ShaderFromNir<'a> {
                dst.into()
            }
            nir_op_fcos => b.fcos(srcs(0)).into(),
+            nir_op_fcos_normalized_2_pi => {
+                assert!(self.sm.sm() >= 70);
+                b.mufu(MuFuOp::Cos, srcs(0)).into()
+            }
            nir_op_feq | nir_op_fge | nir_op_flt | nir_op_fneu => {
                let src_type =
                    FloatType::from_bits(alu.get_src(0).bit_size().into());
@ -1317,6 +1321,10 @@ impl<'a> ShaderFromNir<'a> {
                }
            }
            nir_op_fsin => b.fsin(srcs(0)).into(),
+            nir_op_fsin_normalized_2_pi => {
+                assert!(self.sm.sm() >= 70);
+                b.mufu(MuFuOp::Sin, srcs(0)).into()
+            }
            nir_op_fsqrt => b.mufu(MuFuOp::Sqrt, srcs(0)).into(),
            nir_op_i2f16 | nir_op_i2f32 | nir_op_i2f64 => {
                let src_bits = alu.get_src(0).src.bit_size();
--- a/src/nouveau/compiler/nak_nir.c
+++ b/src/nouveau/compiler/nak_nir.c
@ -1308,6 +1308,9 @@ nak_postprocess_nir(nir_shader *nir,
   OPT(nir, nir_lower_doubles, NULL, nak->nir_options.lower_doubles_options);
   OPT(nir, nir_lower_int64);

+   if (nak->sm >= 70)
+      OPT(nir, nir_normalize_sin_cos);
+
   nak_optimize_nir(nir, nak);

   do {