mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-27 05:48:12 +02:00
gallivm: fix somewhat broken NaN behavior for exp2
I actually screwed that up in 754319490f,
mistakenly thinking the code actually wanted the non-nan result before.
So, introduce that missing nan behavior case and use that instead.
For sse, there's no actual change in the resulting code at all, the fallback
code wouldn't have done the right thing though.
Of course, the actual issue I saw with pow() was completely unrelated...
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
This commit is contained in:
parent
3d29e75a5f
commit
ca4f0baca2
2 changed files with 25 additions and 13 deletions
|
|
@ -134,7 +134,8 @@ lp_build_min_simple(struct lp_build_context *bld,
|
|||
}
|
||||
}
|
||||
else if (type.floating && util_cpu_caps.has_altivec) {
|
||||
if (nan_behavior == GALLIVM_NAN_RETURN_NAN) {
|
||||
if (nan_behavior == GALLIVM_NAN_RETURN_NAN ||
|
||||
nan_behavior == GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) {
|
||||
debug_printf("%s: altivec doesn't support nan return nan behavior\n",
|
||||
__FUNCTION__);
|
||||
}
|
||||
|
|
@ -202,7 +203,8 @@ lp_build_min_simple(struct lp_build_context *bld,
|
|||
*/
|
||||
if (util_cpu_caps.has_sse && type.floating &&
|
||||
nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
|
||||
nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN) {
|
||||
nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN &&
|
||||
nan_behavior != GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) {
|
||||
LLVMValueRef isnan, max;
|
||||
max = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
|
||||
type,
|
||||
|
|
@ -241,6 +243,9 @@ lp_build_min_simple(struct lp_build_context *bld,
|
|||
case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN:
|
||||
cond = lp_build_cmp_ordered(bld, PIPE_FUNC_LESS, a, b);
|
||||
return lp_build_select(bld, cond, a, b);
|
||||
case GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN:
|
||||
cond = lp_build_cmp(bld, PIPE_FUNC_LESS, b, a);
|
||||
return lp_build_select(bld, cond, b, a);
|
||||
case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
|
||||
cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
|
||||
return lp_build_select(bld, cond, a, b);
|
||||
|
|
@ -310,7 +315,8 @@ lp_build_max_simple(struct lp_build_context *bld,
|
|||
}
|
||||
}
|
||||
else if (type.floating && util_cpu_caps.has_altivec) {
|
||||
if (nan_behavior == GALLIVM_NAN_RETURN_NAN) {
|
||||
if (nan_behavior == GALLIVM_NAN_RETURN_NAN ||
|
||||
nan_behavior == GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) {
|
||||
debug_printf("%s: altivec doesn't support nan return nan behavior\n",
|
||||
__FUNCTION__);
|
||||
}
|
||||
|
|
@ -373,7 +379,8 @@ lp_build_max_simple(struct lp_build_context *bld,
|
|||
if(intrinsic) {
|
||||
if (util_cpu_caps.has_sse && type.floating &&
|
||||
nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
|
||||
nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN) {
|
||||
nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN &&
|
||||
nan_behavior != GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) {
|
||||
LLVMValueRef isnan, min;
|
||||
min = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
|
||||
type,
|
||||
|
|
@ -412,6 +419,9 @@ lp_build_max_simple(struct lp_build_context *bld,
|
|||
case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN:
|
||||
cond = lp_build_cmp_ordered(bld, PIPE_FUNC_GREATER, a, b);
|
||||
return lp_build_select(bld, cond, a, b);
|
||||
case GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN:
|
||||
cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, b, a);
|
||||
return lp_build_select(bld, cond, b, a);
|
||||
case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
|
||||
cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
|
||||
return lp_build_select(bld, cond, a, b);
|
||||
|
|
@ -3040,7 +3050,6 @@ lp_build_exp2(struct lp_build_context *bld,
|
|||
|
||||
assert(lp_check_value(bld->type, x));
|
||||
|
||||
|
||||
/* TODO: optimize the constant case */
|
||||
if (gallivm_debug & GALLIVM_DEBUG_PERF &&
|
||||
LLVMIsConstant(x)) {
|
||||
|
|
@ -3053,15 +3062,14 @@ lp_build_exp2(struct lp_build_context *bld,
|
|||
/* We want to preserve NaN and make sure than for exp2 if x > 128,
|
||||
* the result is INF and if it's smaller than -126.9 the result is 0 */
|
||||
x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type, 128.0), x,
|
||||
GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
|
||||
x = lp_build_max(bld, lp_build_const_vec(bld->gallivm, type, -126.99999), x);
|
||||
GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN);
|
||||
x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type, -126.99999),
|
||||
x, GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN);
|
||||
|
||||
/* ipart = floor(x) */
|
||||
/* fpart = x - ipart */
|
||||
lp_build_ifloor_fract(bld, x, &ipart, &fpart);
|
||||
|
||||
|
||||
|
||||
/* expipart = (float) (1 << ipart) */
|
||||
expipart = LLVMBuildAdd(builder, ipart,
|
||||
lp_build_const_int_vec(bld->gallivm, type, 127), "");
|
||||
|
|
@ -3069,13 +3077,11 @@ lp_build_exp2(struct lp_build_context *bld,
|
|||
lp_build_const_int_vec(bld->gallivm, type, 23), "");
|
||||
expipart = LLVMBuildBitCast(builder, expipart, vec_type, "");
|
||||
|
||||
|
||||
expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
|
||||
Elements(lp_build_exp2_polynomial));
|
||||
|
||||
res = LLVMBuildFMul(builder, expipart, expfpart, "");
|
||||
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -138,7 +138,7 @@ lp_build_lerp_3d(struct lp_build_context *bld,
|
|||
enum gallivm_nan_behavior {
|
||||
/* Results are undefined with NaN. Results in fastest code */
|
||||
GALLIVM_NAN_BEHAVIOR_UNDEFINED,
|
||||
/* If input is NaN, NaN is returned */
|
||||
/* If one of the inputs is NaN, NaN is returned */
|
||||
GALLIVM_NAN_RETURN_NAN,
|
||||
/* If one of the inputs is NaN, the other operand is returned */
|
||||
GALLIVM_NAN_RETURN_OTHER,
|
||||
|
|
@ -146,7 +146,13 @@ enum gallivm_nan_behavior {
|
|||
* but we guarantee the second operand is not a NaN.
|
||||
* In min/max it will be as fast as undefined with sse opcodes,
|
||||
* and archs having native return_other can benefit too. */
|
||||
GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN
|
||||
GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN,
|
||||
/* If one of the inputs is NaN, NaN is returned,
|
||||
* but we guarantee the first operand is not a NaN.
|
||||
* In min/max it will be as fast as undefined with sse opcodes,
|
||||
* and archs having native return_nan can benefit too. */
|
||||
GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN,
|
||||
|
||||
};
|
||||
|
||||
LLVMValueRef
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue