spirv: Use a simpler and more correct implementaiton of tanh()

The new implementation is more correct because it clamps the incoming value
to 10 to avoid floating-point overflow.  It also uses a much reduced
version of the formula which only requires 1 exp() rather than 2.  This
fixes all of the dEQP-VK.glsl.builtin.precision.tanh.* tests.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Cc: "13.0" <mesa-dev@lists.freedesktop.org>
This commit is contained in:
Jason Ekstrand 2016-12-09 09:34:50 -08:00
parent 9807f502eb
commit da1c49171d

View file

@ -565,16 +565,21 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
build_exp(nb, nir_fneg(nb, src[0]))));
return;
case GLSLstd450Tanh:
/* (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x))) */
val->ssa->def =
nir_fdiv(nb, nir_fmul(nb, nir_imm_float(nb, 0.5f),
nir_fsub(nb, build_exp(nb, src[0]),
build_exp(nb, nir_fneg(nb, src[0])))),
nir_fmul(nb, nir_imm_float(nb, 0.5f),
nir_fadd(nb, build_exp(nb, src[0]),
build_exp(nb, nir_fneg(nb, src[0])))));
case GLSLstd450Tanh: {
/* tanh(x) := (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x)))
*
* With a little algebra this reduces to (e^2x - 1) / (e^2x + 1)
*
* We clamp x to (-inf, +10] to avoid precision problems. When x > 10,
* e^2x is so much larger than 1.0 that 1.0 gets flushed to zero in the
* computation e^2x +/- 1 so it can be ignored.
*/
nir_ssa_def *x = nir_fmin(nb, src[0], nir_imm_float(nb, 10));
nir_ssa_def *exp2x = build_exp(nb, nir_fmul(nb, x, nir_imm_float(nb, 2)));
val->ssa->def = nir_fdiv(nb, nir_fsub(nb, exp2x, nir_imm_float(nb, 1)),
nir_fadd(nb, exp2x, nir_imm_float(nb, 1)));
return;
}
case GLSLstd450Asinh:
val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]),