diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_float.c b/src/gallium/auxiliary/gallivm/lp_bld_format_float.c index 2fe45e0eadf..b0c9b7acf05 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_float.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_float.c @@ -84,8 +84,6 @@ lp_build_float_to_smallfloat(struct gallivm_state *gallivm, struct lp_build_context f32_bld, i32_bld; LLVMValueRef zero = lp_build_const_vec(gallivm, f32_type, 0.0f); unsigned exponent_start = mantissa_start + mantissa_bits; - bool always_preserve_nans = true; - bool maybe_correct_denorm_rounding = true; lp_build_context_init(&f32_bld, gallivm, f32_type); lp_build_context_init(&i32_bld, gallivm, i32_type); @@ -112,17 +110,12 @@ lp_build_float_to_smallfloat(struct gallivm_state *gallivm, * but only if we use the preserve NaN path does using * src_abs instead save us any instruction. */ - if (maybe_correct_denorm_rounding || !always_preserve_nans) { - i32_roundmask = lp_build_const_int_vec(gallivm, i32_type, - ~((1 << (23 - mantissa_bits)) - 1) & - 0x7fffffff); - rescale_src = LLVMBuildBitCast(builder, rescale_src, i32_bld.vec_type, ""); - rescale_src = lp_build_and(&i32_bld, rescale_src, i32_roundmask); - rescale_src = LLVMBuildBitCast(builder, rescale_src, f32_bld.vec_type, ""); - } - else { - rescale_src = lp_build_abs(&f32_bld, src); - } + i32_roundmask = lp_build_const_int_vec(gallivm, i32_type, + ~((1 << (23 - mantissa_bits)) - 1) & + 0x7fffffff); + rescale_src = LLVMBuildBitCast(builder, rescale_src, i32_bld.vec_type, ""); + rescale_src = lp_build_and(&i32_bld, rescale_src, i32_roundmask); + rescale_src = LLVMBuildBitCast(builder, rescale_src, f32_bld.vec_type, ""); /* bias exponent (and denormalize if necessary) */ magic = lp_build_const_int_vec(gallivm, i32_type, @@ -147,51 +140,30 @@ lp_build_float_to_smallfloat(struct gallivm_state *gallivm, * (Cannot actually save the comparison since we need to distinguish * Inf and NaN cases anyway, but it would be better for AVX.) */ - if (always_preserve_nans) { - LLVMValueRef infcheck_src, is_inf, is_nan; - LLVMValueRef src_abs = lp_build_abs(&f32_bld, src); - src_abs = LLVMBuildBitCast(builder, src_abs, i32_bld.vec_type, ""); + LLVMValueRef infcheck_src, is_inf, is_nan; + LLVMValueRef src_abs = lp_build_abs(&f32_bld, src); + src_abs = LLVMBuildBitCast(builder, src_abs, i32_bld.vec_type, ""); - if (has_sign) { - infcheck_src = src_abs; - } - else { - infcheck_src = i32_src; - } - is_nan = lp_build_compare(gallivm, i32_type, PIPE_FUNC_GREATER, - src_abs, i32_floatexpmask); - is_inf = lp_build_compare(gallivm, i32_type, PIPE_FUNC_EQUAL, - infcheck_src, i32_floatexpmask); - is_nan_or_inf = lp_build_or(&i32_bld, is_nan, is_inf); - /* could also set more mantissa bits but need at least the highest mantissa bit */ - i32_qnanbit = lp_build_const_vec(gallivm, i32_type, 1 << 22); - /* combine maxexp with qnanbit */ - nan_or_inf = lp_build_or(&i32_bld, i32_smallexpmask, - lp_build_and(&i32_bld, is_nan, i32_qnanbit)); + if (has_sign) { + infcheck_src = src_abs; } else { - /* - * A couple simplifications, with mostly 2 drawbacks (so disabled): - * - it will promote some SNaNs (those which only had bits set - * in the mantissa part which got chopped off) to +-Infinity. - * (Those bits get chopped off anyway later so can as well use - * rescale_src instead of src_abs here saving the calculation of that.) - * - for no sign case, it relies on the max() being used for rescale_src - * to give back the NaN (which is NOT ieee754r behavior, but should work - * with sse2 on a full moon (rather if I got the operand order right) - - * we _don't_ have well-defined behavior specified with min/max wrt NaNs, - * however, and if it gets converted to cmp/select it may not work (we - * don't really have specified behavior for cmp wrt NaNs neither). - */ - rescale_src = LLVMBuildBitCast(builder, rescale_src, i32_bld.vec_type, ""); - is_nan_or_inf = lp_build_compare(gallivm, i32_type, PIPE_FUNC_GEQUAL, - rescale_src, i32_floatexpmask); - /* note this will introduce excess exponent bits */ - nan_or_inf = rescale_src; + infcheck_src = i32_src; } + is_nan = lp_build_compare(gallivm, i32_type, PIPE_FUNC_GREATER, + src_abs, i32_floatexpmask); + is_inf = lp_build_compare(gallivm, i32_type, PIPE_FUNC_EQUAL, + infcheck_src, i32_floatexpmask); + is_nan_or_inf = lp_build_or(&i32_bld, is_nan, is_inf); + /* could also set more mantissa bits but need at least the highest mantissa bit */ + i32_qnanbit = lp_build_const_vec(gallivm, i32_type, 1 << 22); + /* combine maxexp with qnanbit */ + nan_or_inf = lp_build_or(&i32_bld, i32_smallexpmask, + lp_build_and(&i32_bld, is_nan, i32_qnanbit)); + res = lp_build_select(&i32_bld, is_nan_or_inf, nan_or_inf, normal); - if (mantissa_start > 0 || !always_preserve_nans) { + if (mantissa_start > 0) { /* mask off excess bits */ unsigned maskbits = (1 << (mantissa_bits + exponent_bits)) - 1; mask = lp_build_const_int_vec(gallivm, i32_type,