mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 22:20:14 +01:00
ac/llvm,radeonsi: lower nir_fpow for aco and llvm
aco does not implement fpow, need nir to lower it first. llvm will do by itself in the same way, so we always lower fpow in nir now. Remove the llvm fpow implementation that has special handling for the muliplication. It's not used any more and does not match GLSL spec as fpow(0,0)=NaN but here we get 0. There's some pixel changes for gl-radeonsi-stoney: ror-default 2 (no tolerance), 0 (1% tol.) Reviewed-by: Marek Olšák <marek.olsak@amd.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22573>
This commit is contained in:
parent
19a8626f86
commit
9bc1fb4c07
3 changed files with 2 additions and 25 deletions
|
|
@ -104,7 +104,7 @@ traces:
|
|||
checksum: 60f74020451e9beaf586b4551541b763
|
||||
ror/ror-default.trace:
|
||||
gl-radeonsi-stoney:
|
||||
checksum: ea53f93df31703bf7a07a4efb922608f
|
||||
checksum: a37b58424c4289a6de77e61d599b6fab
|
||||
nheko/nheko-colors.trace:
|
||||
gl-radeonsi-stoney:
|
||||
checksum: d3234cd6ccc2ab5d3ceab7db79300c69
|
||||
|
|
|
|||
|
|
@ -844,30 +844,6 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
|||
src[0] = ac_to_float(&ctx->ac, src[0]);
|
||||
result = ac_build_frexp_mant(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
|
||||
break;
|
||||
case nir_op_fpow:
|
||||
if (instr->dest.dest.ssa.bit_size != 32) {
|
||||
/* 16 and 64 bits */
|
||||
result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
|
||||
ac_to_float_type(&ctx->ac, def_type), src[0]);
|
||||
result = LLVMBuildFMul(ctx->ac.builder, result, ac_to_float(&ctx->ac, src[1]), "");
|
||||
result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
|
||||
ac_to_float_type(&ctx->ac, def_type), result);
|
||||
break;
|
||||
}
|
||||
if (LLVM_VERSION_MAJOR >= 12) {
|
||||
result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
|
||||
ac_to_float_type(&ctx->ac, def_type), src[0]);
|
||||
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.fmul.legacy", ctx->ac.f32,
|
||||
(LLVMValueRef[]){result, ac_to_float(&ctx->ac, src[1])},
|
||||
2, 0);
|
||||
result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
|
||||
ac_to_float_type(&ctx->ac, def_type), result);
|
||||
break;
|
||||
}
|
||||
/* Older LLVM doesn't have fmul.legacy. */
|
||||
result = emit_intrin_2f_param(&ctx->ac, "llvm.pow", ac_to_float_type(&ctx->ac, def_type),
|
||||
src[0], src[1]);
|
||||
break;
|
||||
case nir_op_fmax:
|
||||
result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", ac_to_float_type(&ctx->ac, def_type),
|
||||
src[0], src[1]);
|
||||
|
|
|
|||
|
|
@ -1280,6 +1280,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
|
|||
.fuse_ffma32 = use_fma32,
|
||||
.fuse_ffma64 = true,
|
||||
.lower_fmod = true,
|
||||
.lower_fpow = true,
|
||||
.lower_ineg = true,
|
||||
.lower_pack_snorm_4x8 = true,
|
||||
.lower_pack_unorm_4x8 = true,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue