ac/llvm,radeonsi: lower nir_fpow for aco and llvm

aco does not implement fpow, need nir to lower it
first. llvm will do by itself in the same way, so
we always lower fpow in nir now.

Remove the llvm fpow implementation that has special
handling for the muliplication. It's not used any
more and does not match GLSL spec as fpow(0,0)=NaN
but here we get 0.

There's some pixel changes for gl-radeonsi-stoney:
  ror-default 2 (no tolerance), 0 (1% tol.)

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22573>
This commit is contained in:
Qiang Yu 2023-04-17 18:01:09 +08:00 committed by Marge Bot
parent 19a8626f86
commit 9bc1fb4c07
3 changed files with 2 additions and 25 deletions

View file

@ -104,7 +104,7 @@ traces:
checksum: 60f74020451e9beaf586b4551541b763
ror/ror-default.trace:
gl-radeonsi-stoney:
checksum: ea53f93df31703bf7a07a4efb922608f
checksum: a37b58424c4289a6de77e61d599b6fab
nheko/nheko-colors.trace:
gl-radeonsi-stoney:
checksum: d3234cd6ccc2ab5d3ceab7db79300c69

View file

@ -844,30 +844,6 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
src[0] = ac_to_float(&ctx->ac, src[0]);
result = ac_build_frexp_mant(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
break;
case nir_op_fpow:
if (instr->dest.dest.ssa.bit_size != 32) {
/* 16 and 64 bits */
result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
ac_to_float_type(&ctx->ac, def_type), src[0]);
result = LLVMBuildFMul(ctx->ac.builder, result, ac_to_float(&ctx->ac, src[1]), "");
result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
ac_to_float_type(&ctx->ac, def_type), result);
break;
}
if (LLVM_VERSION_MAJOR >= 12) {
result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
ac_to_float_type(&ctx->ac, def_type), src[0]);
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.fmul.legacy", ctx->ac.f32,
(LLVMValueRef[]){result, ac_to_float(&ctx->ac, src[1])},
2, 0);
result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
ac_to_float_type(&ctx->ac, def_type), result);
break;
}
/* Older LLVM doesn't have fmul.legacy. */
result = emit_intrin_2f_param(&ctx->ac, "llvm.pow", ac_to_float_type(&ctx->ac, def_type),
src[0], src[1]);
break;
case nir_op_fmax:
result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", ac_to_float_type(&ctx->ac, def_type),
src[0], src[1]);

View file

@ -1280,6 +1280,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
.fuse_ffma32 = use_fma32,
.fuse_ffma64 = true,
.lower_fmod = true,
.lower_fpow = true,
.lower_ineg = true,
.lower_pack_snorm_4x8 = true,
.lower_pack_unorm_4x8 = true,