diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_float.c b/src/gallium/auxiliary/gallivm/lp_bld_format_float.c index b0c9b7acf05..7ee1e959b6b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_float.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_float.c @@ -113,23 +113,56 @@ lp_build_float_to_smallfloat(struct gallivm_state *gallivm, i32_roundmask = lp_build_const_int_vec(gallivm, i32_type, ~((1 << (23 - mantissa_bits)) - 1) & 0x7fffffff); - rescale_src = LLVMBuildBitCast(builder, rescale_src, i32_bld.vec_type, ""); rescale_src = lp_build_and(&i32_bld, rescale_src, i32_roundmask); - rescale_src = LLVMBuildBitCast(builder, rescale_src, f32_bld.vec_type, ""); - /* bias exponent (and denormalize if necessary) */ - magic = lp_build_const_int_vec(gallivm, i32_type, - ((1 << (exponent_bits - 1)) - 1) << 23); - magic = LLVMBuildBitCast(builder, magic, f32_bld.vec_type, ""); - normal = lp_build_mul(&f32_bld, rescale_src, magic); - - /* clamp to max value - largest non-infinity number */ + /* largest non-infinity number */ small_max = lp_build_const_int_vec(gallivm, i32_type, - (((1 << exponent_bits) - 2) << 23) | + ((127 + ((1 << (exponent_bits - 1)) - 1)) << 23) | (((1 << mantissa_bits) - 1) << (23 - mantissa_bits))); - small_max = LLVMBuildBitCast(builder, small_max, f32_bld.vec_type, ""); - normal = lp_build_min(&f32_bld, normal, small_max); - normal = LLVMBuildBitCast(builder, normal, i32_bld.vec_type, ""); + + /* + * This code only works correctly if denormals are enabled if the smallfloat + * result is a denormal, otherwise result is flushed to zero. + */ + if (0) { + /* clamp to max value */ + rescale_src = LLVMBuildBitCast(builder, rescale_src, f32_bld.vec_type, ""); + small_max = LLVMBuildBitCast(builder, small_max, f32_bld.vec_type, ""); + normal = lp_build_min(&f32_bld, rescale_src, small_max); + /* bias exponent (and denormalize if necessary) */ + magic = lp_build_const_int_vec(gallivm, i32_type, + ((1 << (exponent_bits - 1)) - 1) << 23); + magic = LLVMBuildBitCast(builder, magic, f32_bld.vec_type, ""); + normal = lp_build_mul(&f32_bld, normal, magic); + normal = LLVMBuildBitCast(builder, normal, i32_bld.vec_type, ""); + } + else { + LLVMValueRef exp_adj, denorm_scale, is_denorm_or_zero, denorm; + /* clamp to max value */ + normal = lp_build_min(&i32_bld, rescale_src, small_max); + exp_adj = lp_build_const_int_vec(gallivm, i32_type, + (127 - ((1 << (exponent_bits - 1)) - 1)) << 23); + normal = lp_build_sub(&i32_bld, normal, exp_adj); + LLVMValueRef mantissa_mask = lp_build_not(&i32_bld, + lp_build_const_int_vec(gallivm, i32_type, 0xFF800000)); + is_denorm_or_zero = lp_build_cmp(&i32_bld, PIPE_FUNC_LEQUAL, normal, mantissa_mask); + /* + * for smallfloat denormals, do magic scaling so the mantissa bits can + * be extracted directly. denorm_scale is just the smallest normal number. + */ + denorm_scale = lp_build_const_int_vec(gallivm, i32_type, + (127 - ((1 << (exponent_bits - 1)) - 2)) << 23); + denorm_scale = LLVMBuildBitCast(builder, denorm_scale, f32_bld.vec_type, ""); + rescale_src = LLVMBuildBitCast(builder, rescale_src, f32_bld.vec_type, ""); + /* + * this magic add will shift out the mantissa bits to the right position (if + * the number is actually smaller than denorm_scale). + */ + denorm = lp_build_add(&f32_bld, rescale_src, denorm_scale); + denorm = LLVMBuildBitCast(builder, denorm, i32_bld.vec_type, ""); + denorm = lp_build_and(&i32_bld, denorm, mantissa_mask); + normal = lp_build_select(&i32_bld, is_denorm_or_zero, denorm, normal); + } /* * handle nan/inf cases @@ -344,7 +377,7 @@ lp_build_smallfloat_to_float(struct gallivm_state *gallivm, /* for normals, Infs, Nans fix up exponent */ exp_adj = lp_build_const_int_vec(gallivm, i32_type, - (127 - ((1 << (exponent_bits - 1)) - 1)) << 23); + (127 - ((1 << (exponent_bits - 1)) - 1)) << 23); normal = lp_build_add(&i32_bld, srcabs, exp_adj); tmp = lp_build_and(&i32_bld, wasinfnan, i32_floatexpmask); normal = lp_build_or(&i32_bld, tmp, normal); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 4f45fafba9f..eb8b02ef9d1 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -2481,7 +2481,6 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, const bool is_1d = variant->key.resource_1d; const unsigned num_fullblock_fs = is_1d ? 2 * num_fs : num_fs; - LLVMValueRef fpstate = NULL; LLVMTypeRef fs_vec_type = lp_build_vec_type(gallivm, fs_type); @@ -2490,23 +2489,6 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, lp_blend_type_from_format_desc(out_format_desc, &row_type); lp_mem_type_from_format_desc(out_format_desc, &dst_type); - /* - * Technically this code should go into lp_build_smallfloat_to_float - * and lp_build_float_to_smallfloat but due to the - * http://llvm.org/bugs/show_bug.cgi?id=6393 - * llvm reorders the mxcsr intrinsics in a way that breaks the code. - * So the ordering is important here and there shouldn't be any - * llvm ir instrunctions in this function before - * this, otherwise half-float format conversions won't work - * (again due to llvm bug #6393). - */ - if (have_smallfloat_format(dst_type, out_format)) { - /* We need to make sure that denorms are ok for half float - conversions */ - fpstate = lp_build_fpstate_get(gallivm); - lp_build_fpstate_set_denorms_zero(gallivm, false); - } - struct lp_type mask_type = lp_int32_vec4_type(); mask_type.length = fs_type.length; @@ -3129,10 +3111,6 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, if (do_branch) { lp_build_mask_end(&mask_ctx); } - - if (fpstate) { - lp_build_fpstate_set(gallivm, fpstate); - } }