radv,aco,ac/llvm: use nir_op_f{sin,cos}_amd

This lets NIR optimize the multiplication, particularly sin/cos(a * #b). fossil-db (Sienna Cichlid): Totals from 12306 (7.58% of 162293) affected shaders: MaxWaves: 224814 -> 224834 (+0.01%) Instrs: 17365273 -> 17338758 (-0.15%); split: -0.16%, +0.00% CodeSize: 93478488 -> 93354912 (-0.13%); split: -0.14%, +0.01% VGPRs: 752080 -> 752072 (-0.00%); split: -0.00%, +0.00% SpillSGPRs: 8440 -> 8410 (-0.36%) Latency: 200402154 -> 200279405 (-0.06%); split: -0.06%, +0.00% InvThroughput: 37588077 -> 37545545 (-0.11%); split: -0.11%, +0.00% VClause: 293863 -> 293874 (+0.00%); split: -0.03%, +0.03% SClause: 619539 -> 619064 (-0.08%); split: -0.09%, +0.01% Copies: 1151591 -> 1151641 (+0.00%); split: -0.04%, +0.05% Branches: 506434 -> 506437 (+0.00%); split: -0.00%, +0.00% PreSGPRs: 877609 -> 877517 (-0.01%); split: -0.01%, +0.00% PreVGPRs: 711938 -> 711940 (+0.00%); split: -0.00%, +0.00% fossil-db (LLVM, Sienna Cichlid): Totals from 4377 (3.59% of 121873) affected shaders: SGPRs: 358960 -> 359176 (+0.06%); split: -0.18%, +0.25% VGPRs: 319832 -> 319720 (-0.04%); split: -0.18%, +0.15% SpillSGPRs: 46983 -> 47007 (+0.05%); split: -0.99%, +1.04% CodeSize: 30872812 -> 30764512 (-0.35%); split: -0.39%, +0.04% MaxWaves: 73814 -> 73904 (+0.12%); split: +0.25%, -0.13% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10587>
2026-05-04 22:49:13 +02:00 · 2021-05-03 11:10:06 +01:00 · 2021-05-03 11:10:06 +01:00 · 48578713b7
commit 48578713b7
parent bb0415b697
4 changed files with 36 additions and 14 deletions
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@ -2750,27 +2750,22 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
      }
      break;
   }
-   case nir_op_fsin:
-   case nir_op_fcos: {
+   case nir_op_fsin_amd:
+   case nir_op_fcos_amd: {
      Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));
      aco_ptr<Instruction> norm;
      if (dst.regClass() == v2b) {
-         Temp half_pi = bld.copy(bld.def(s1), Operand::c32(0x3118u));
-         Temp tmp = bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), half_pi, src);
         aco_opcode opcode =
-            instr->op == nir_op_fsin ? aco_opcode::v_sin_f16 : aco_opcode::v_cos_f16;
-         bld.vop1(opcode, Definition(dst), tmp);
+            instr->op == nir_op_fsin_amd ? aco_opcode::v_sin_f16 : aco_opcode::v_cos_f16;
+         bld.vop1(opcode, Definition(dst), src);
      } else if (dst.regClass() == v1) {
-         Temp half_pi = bld.copy(bld.def(s1), Operand::c32(0x3e22f983u));
-         Temp tmp = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), half_pi, src);
-
         /* before GFX9, v_sin_f32 and v_cos_f32 had a valid input domain of [-256, +256] */
         if (ctx->options->gfx_level < GFX9)
-            tmp = bld.vop1(aco_opcode::v_fract_f32, bld.def(v1), tmp);
+            src = bld.vop1(aco_opcode::v_fract_f32, bld.def(v1), src);

         aco_opcode opcode =
-            instr->op == nir_op_fsin ? aco_opcode::v_sin_f32 : aco_opcode::v_cos_f32;
-         bld.vop1(opcode, Definition(dst), tmp);
+            instr->op == nir_op_fsin_amd ? aco_opcode::v_sin_f32 : aco_opcode::v_cos_f32;
+         bld.vop1(opcode, Definition(dst), src);
      } else {
         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
      }
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@ -501,8 +501,8 @@ init_context(isel_context* ctx, nir_shader* shader)
               case nir_op_fceil:
               case nir_op_ftrunc:
               case nir_op_fround_even:
-               case nir_op_fsin:
-               case nir_op_fcos:
+               case nir_op_fsin_amd:
+               case nir_op_fcos_amd:
               case nir_op_f2f16:
               case nir_op_f2f16_rtz:
               case nir_op_f2f16_rtne:
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@ -854,6 +854,16 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
      result =
         emit_intrin_1f_param(&ctx->ac, "llvm.cos", ac_to_float_type(&ctx->ac, def_type), src[0]);
      break;
+   case nir_op_fsin_amd:
+   case nir_op_fcos_amd:
+      /* before GFX9, v_sin_f32 and v_cos_f32 had a valid input domain of [-256, +256] */
+      if (ctx->ac.gfx_level < GFX9)
+         src[0] = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.fract",
+                                              ac_to_float_type(&ctx->ac, def_type), src[0]);
+      result =
+         emit_intrin_1f_param(&ctx->ac, instr->op == nir_op_fsin_amd ? "llvm.amdgcn.sin" : "llvm.amdgcn.cos",
+                              ac_to_float_type(&ctx->ac, def_type), src[0]);
+      break;
   case nir_op_fsqrt:
      result =
         emit_intrin_1f_param(&ctx->ac, "llvm.sqrt", ac_to_float_type(&ctx->ac, def_type), src[0]);
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@ -639,6 +639,21 @@ radv_lower_ms_workgroup_id(nir_shader *nir)
   return progress;
 }

+static bool
+is_sincos(const nir_instr *instr, const void *_)
+{
+   return instr->type == nir_instr_type_alu &&
+          (nir_instr_as_alu(instr)->op == nir_op_fsin || nir_instr_as_alu(instr)->op == nir_op_fcos);
+}
+
+static nir_ssa_def *
+lower_sincos(struct nir_builder *b, nir_instr *instr, void *_)
+{
+   nir_alu_instr *sincos = nir_instr_as_alu(instr);
+   nir_ssa_def *src = nir_fmul_imm(b, nir_ssa_for_alu_src(b, sincos, 0), 0.15915493667125702);
+   return sincos->op == nir_op_fsin ? nir_fsin_amd(b, src) : nir_fcos_amd(b, src);
+}
+
 nir_shader *
 radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_stage *stage,
                         const struct radv_pipeline_key *key)
@ -849,6 +864,8 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_
      }

      NIR_PASS(_, nir, nir_lower_doubles, NULL, lower_doubles);
+
+      NIR_PASS(_, nir, nir_shader_lower_instructions, &is_sincos, &lower_sincos, NULL);
   }

   NIR_PASS(_, nir, nir_lower_system_values);