ac/llvm,radeonsi: lower nir_fpow for aco and llvm

aco does not implement fpow, need nir to lower it first. llvm will do by itself in the same way, so we always lower fpow in nir now. Remove the llvm fpow implementation that has special handling for the muliplication. It's not used any more and does not match GLSL spec as fpow(0,0)=NaN but here we get 0. There's some pixel changes for gl-radeonsi-stoney: ror-default 2 (no tolerance), 0 (1% tol.) Reviewed-by: Marek Olšák <marek.olsak@amd.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22573>
2026-02-28 00:10:30 +01:00 · 2023-04-17 18:01:09 +08:00 · 2023-04-17 18:01:09 +08:00 · 9bc1fb4c07
commit 9bc1fb4c07
parent 19a8626f86
3 changed files with 2 additions and 25 deletions
--- a/src/amd/ci/traces-amd.yml
+++ b/src/amd/ci/traces-amd.yml
@ -104,7 +104,7 @@ traces:
      checksum: 60f74020451e9beaf586b4551541b763
  ror/ror-default.trace:
    gl-radeonsi-stoney:
-      checksum: ea53f93df31703bf7a07a4efb922608f
+      checksum: a37b58424c4289a6de77e61d599b6fab
  nheko/nheko-colors.trace:
    gl-radeonsi-stoney:
      checksum: d3234cd6ccc2ab5d3ceab7db79300c69
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@ -844,30 +844,6 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
      src[0] = ac_to_float(&ctx->ac, src[0]);
      result = ac_build_frexp_mant(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
      break;
-   case nir_op_fpow:
-      if (instr->dest.dest.ssa.bit_size != 32) {
-         /* 16 and 64 bits */
-         result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
-                                       ac_to_float_type(&ctx->ac, def_type), src[0]);
-         result = LLVMBuildFMul(ctx->ac.builder, result, ac_to_float(&ctx->ac, src[1]), "");
-         result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
-                                       ac_to_float_type(&ctx->ac, def_type), result);
-         break;
-      }
-      if (LLVM_VERSION_MAJOR >= 12) {
-         result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
-                                       ac_to_float_type(&ctx->ac, def_type), src[0]);
-         result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.fmul.legacy", ctx->ac.f32,
-                                     (LLVMValueRef[]){result, ac_to_float(&ctx->ac, src[1])},
-                                     2, 0);
-         result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
-                                       ac_to_float_type(&ctx->ac, def_type), result);
-         break;
-      }
-      /* Older LLVM doesn't have fmul.legacy. */
-      result = emit_intrin_2f_param(&ctx->ac, "llvm.pow", ac_to_float_type(&ctx->ac, def_type),
-                                    src[0], src[1]);
-      break;
   case nir_op_fmax:
      result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", ac_to_float_type(&ctx->ac, def_type),
                                    src[0], src[1]);
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@ -1280,6 +1280,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
      .fuse_ffma32 = use_fma32,
      .fuse_ffma64 = true,
      .lower_fmod = true,
+      .lower_fpow = true,
      .lower_ineg = true,
      .lower_pack_snorm_4x8 = true,
      .lower_pack_unorm_4x8 = true,