diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_float.c b/src/gallium/auxiliary/gallivm/lp_bld_format_float.c
index b0c9b7acf05..7ee1e959b6b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_float.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_float.c
@@ -113,23 +113,56 @@ lp_build_float_to_smallfloat(struct gallivm_state *gallivm,
    i32_roundmask = lp_build_const_int_vec(gallivm, i32_type,
                                           ~((1 << (23 - mantissa_bits)) - 1) &
                                           0x7fffffff);
-   rescale_src = LLVMBuildBitCast(builder, rescale_src, i32_bld.vec_type, "");
    rescale_src = lp_build_and(&i32_bld, rescale_src, i32_roundmask);
-   rescale_src = LLVMBuildBitCast(builder, rescale_src, f32_bld.vec_type, "");
 
-   /* bias exponent (and denormalize if necessary) */
-   magic = lp_build_const_int_vec(gallivm, i32_type,
-                                  ((1 << (exponent_bits - 1)) - 1) << 23);
-   magic = LLVMBuildBitCast(builder, magic, f32_bld.vec_type, "");
-   normal = lp_build_mul(&f32_bld, rescale_src, magic);
-
-   /* clamp to max value - largest non-infinity number */
+   /* largest non-infinity number */
    small_max = lp_build_const_int_vec(gallivm, i32_type,
-                                      (((1 << exponent_bits) - 2) << 23) |
+                                      ((127 + ((1 << (exponent_bits - 1)) - 1)) << 23) |
                                       (((1 << mantissa_bits) - 1) << (23 - mantissa_bits)));
-   small_max = LLVMBuildBitCast(builder, small_max, f32_bld.vec_type, "");
-   normal = lp_build_min(&f32_bld, normal, small_max);
-   normal = LLVMBuildBitCast(builder, normal, i32_bld.vec_type, "");
+
+   /*
+    * This code only works correctly if denormals are enabled if the smallfloat
+    * result is a denormal, otherwise result is flushed to zero.
+    */
+   if (0) {
+      /* clamp to max value */
+      rescale_src = LLVMBuildBitCast(builder, rescale_src, f32_bld.vec_type, "");
+      small_max = LLVMBuildBitCast(builder, small_max, f32_bld.vec_type, "");
+      normal = lp_build_min(&f32_bld, rescale_src, small_max);
+      /* bias exponent (and denormalize if necessary) */
+      magic = lp_build_const_int_vec(gallivm, i32_type,
+                                     ((1 << (exponent_bits - 1)) - 1) << 23);
+      magic = LLVMBuildBitCast(builder, magic, f32_bld.vec_type, "");
+      normal = lp_build_mul(&f32_bld, normal, magic);
+      normal = LLVMBuildBitCast(builder, normal, i32_bld.vec_type, "");
+   }
+   else {
+      LLVMValueRef exp_adj, denorm_scale, is_denorm_or_zero, denorm;
+      /* clamp to max value */
+      normal = lp_build_min(&i32_bld, rescale_src, small_max);
+      exp_adj = lp_build_const_int_vec(gallivm, i32_type,
+                                       (127 - ((1 << (exponent_bits - 1)) - 1)) << 23);
+      normal = lp_build_sub(&i32_bld, normal, exp_adj);
+      LLVMValueRef mantissa_mask = lp_build_not(&i32_bld,
+                                                lp_build_const_int_vec(gallivm, i32_type, 0xFF800000));
+      is_denorm_or_zero = lp_build_cmp(&i32_bld, PIPE_FUNC_LEQUAL, normal, mantissa_mask);
+      /*
+       * for smallfloat denormals, do magic scaling so the mantissa bits can
+       * be extracted directly. denorm_scale is just the smallest normal number.
+       */
+      denorm_scale = lp_build_const_int_vec(gallivm, i32_type,
+                                            (127 - ((1 << (exponent_bits - 1)) - 2)) << 23);
+      denorm_scale = LLVMBuildBitCast(builder, denorm_scale, f32_bld.vec_type, "");
+      rescale_src = LLVMBuildBitCast(builder, rescale_src, f32_bld.vec_type, "");
+      /*
+       * this magic add will shift out the mantissa bits to the right position (if
+       * the number is actually smaller than denorm_scale).
+       */
+      denorm = lp_build_add(&f32_bld, rescale_src, denorm_scale);
+      denorm = LLVMBuildBitCast(builder, denorm, i32_bld.vec_type, "");
+      denorm = lp_build_and(&i32_bld, denorm, mantissa_mask);
+      normal = lp_build_select(&i32_bld, is_denorm_or_zero, denorm, normal);
+   }
 
    /*
     * handle nan/inf cases
@@ -344,7 +377,7 @@ lp_build_smallfloat_to_float(struct gallivm_state *gallivm,
 
       /* for normals, Infs, Nans fix up exponent */
       exp_adj = lp_build_const_int_vec(gallivm, i32_type,
-                                      (127 - ((1 << (exponent_bits - 1)) - 1)) << 23);
+                                       (127 - ((1 << (exponent_bits - 1)) - 1)) << 23);
       normal = lp_build_add(&i32_bld, srcabs, exp_adj);
       tmp = lp_build_and(&i32_bld, wasinfnan, i32_floatexpmask);
       normal = lp_build_or(&i32_bld, tmp, normal);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 4f45fafba9f..eb8b02ef9d1 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -2481,7 +2481,6 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
 
    const bool is_1d = variant->key.resource_1d;
    const unsigned num_fullblock_fs = is_1d ? 2 * num_fs : num_fs;
-   LLVMValueRef fpstate = NULL;
 
    LLVMTypeRef fs_vec_type = lp_build_vec_type(gallivm, fs_type);
 
@@ -2490,23 +2489,6 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
    lp_blend_type_from_format_desc(out_format_desc, &row_type);
    lp_mem_type_from_format_desc(out_format_desc, &dst_type);
 
-   /*
-    * Technically this code should go into lp_build_smallfloat_to_float
-    * and lp_build_float_to_smallfloat but due to the
-    * http://llvm.org/bugs/show_bug.cgi?id=6393
-    * llvm reorders the mxcsr intrinsics in a way that breaks the code.
-    * So the ordering is important here and there shouldn't be any
-    * llvm ir instrunctions in this function before
-    * this, otherwise half-float format conversions won't work
-    * (again due to llvm bug #6393).
-    */
-   if (have_smallfloat_format(dst_type, out_format)) {
-      /* We need to make sure that denorms are ok for half float
-         conversions */
-      fpstate = lp_build_fpstate_get(gallivm);
-      lp_build_fpstate_set_denorms_zero(gallivm, false);
-   }
-
    struct lp_type mask_type = lp_int32_vec4_type();
    mask_type.length = fs_type.length;
 
@@ -3129,10 +3111,6 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
    if (do_branch) {
       lp_build_mask_end(&mask_ctx);
    }
-
-   if (fpstate) {
-      lp_build_fpstate_set(gallivm, fpstate);
-   }
 }