mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-15 13:10:31 +01:00
llvmpipe: fix blending with half-float formats
The fact that we flush denorms to zero breaks our half-float conversion and blending. This patches enables denorms for blending. It's a little tricky due to the llvm bug that makes it incorrectly reorder the mxcsr intrinsics: http://llvm.org/bugs/show_bug.cgi?id=6393 Signed-off-by: Zack Rusin <zackr@vmware.com> Reviewed-by: José Fonseca <jfonseca@vmware.com> Reviewed-by: Roland Scheidegger <sroland@vmware.com> Signed-off-by: Zack Rusin <zackr@vmware.com>
This commit is contained in:
parent
1e71493afa
commit
155139059b
3 changed files with 108 additions and 5 deletions
|
|
@ -64,6 +64,17 @@
|
|||
#include "lp_bld_arit.h"
|
||||
#include "lp_bld_flow.h"
|
||||
|
||||
#if defined(PIPE_ARCH_SSE)
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifndef _MM_DENORMALS_ZERO_MASK
|
||||
#define _MM_DENORMALS_ZERO_MASK 0x0040
|
||||
#endif
|
||||
|
||||
#ifndef _MM_FLUSH_ZERO_MASK
|
||||
#define _MM_FLUSH_ZERO_MASK 0x8000
|
||||
#endif
|
||||
|
||||
#define EXP_POLY_DEGREE 5
|
||||
|
||||
|
|
@ -3489,3 +3500,63 @@ lp_build_is_inf_or_nan(struct gallivm_state *gallivm,
|
|||
return ret;
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_fpstate_get(struct gallivm_state *gallivm)
|
||||
{
|
||||
if (util_cpu_caps.has_sse) {
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
LLVMValueRef mxcsr_ptr = lp_build_alloca(
|
||||
gallivm,
|
||||
LLVMInt32TypeInContext(gallivm->context),
|
||||
"mxcsr_ptr");
|
||||
lp_build_intrinsic(builder,
|
||||
"llvm.x86.sse.stmxcsr",
|
||||
LLVMVoidTypeInContext(gallivm->context),
|
||||
&mxcsr_ptr, 1);
|
||||
return mxcsr_ptr;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm,
|
||||
boolean zero)
|
||||
{
|
||||
if (util_cpu_caps.has_sse) {
|
||||
/* turn on DAZ (64) | FTZ (32768) = 32832 if available */
|
||||
int daz_ftz = _MM_FLUSH_ZERO_MASK;
|
||||
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
LLVMValueRef mxcsr_ptr = lp_build_fpstate_get(gallivm);
|
||||
LLVMValueRef mxcsr =
|
||||
LLVMBuildLoad(builder, mxcsr_ptr, "mxcsr");
|
||||
|
||||
if (util_cpu_caps.has_daz) {
|
||||
/* Enable denormals are zero mode */
|
||||
daz_ftz |= _MM_DENORMALS_ZERO_MASK;
|
||||
}
|
||||
if (zero) {
|
||||
mxcsr = LLVMBuildOr(builder, mxcsr,
|
||||
LLVMConstInt(LLVMTypeOf(mxcsr), daz_ftz, 0), "");
|
||||
} else {
|
||||
mxcsr = LLVMBuildAnd(builder, mxcsr,
|
||||
LLVMConstInt(LLVMTypeOf(mxcsr), ~daz_ftz, 0), "");
|
||||
}
|
||||
|
||||
LLVMBuildStore(builder, mxcsr, mxcsr_ptr);
|
||||
lp_build_fpstate_set(gallivm, mxcsr_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
lp_build_fpstate_set(struct gallivm_state *gallivm,
|
||||
LLVMValueRef mxcsr_ptr)
|
||||
{
|
||||
if (util_cpu_caps.has_sse) {
|
||||
lp_build_intrinsic(gallivm->builder,
|
||||
"llvm.x86.sse.ldmxcsr",
|
||||
LLVMVoidTypeInContext(gallivm->context),
|
||||
&mxcsr_ptr, 1);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -358,4 +358,15 @@ lp_build_is_inf_or_nan(struct gallivm_state *gallivm,
|
|||
const struct lp_type type,
|
||||
LLVMValueRef x);
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_fpstate_get(struct gallivm_state *gallivm);
|
||||
|
||||
void
|
||||
lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm,
|
||||
boolean zero);
|
||||
void
|
||||
lp_build_fpstate_set(struct gallivm_state *gallivm,
|
||||
LLVMValueRef mxcsr);
|
||||
|
||||
#endif /* !LP_BLD_ARIT_H */
|
||||
|
|
|
|||
|
|
@ -1554,6 +1554,28 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
|
|||
|
||||
const boolean is_1d = variant->key.resource_1d;
|
||||
unsigned num_fullblock_fs = is_1d ? 2 * num_fs : num_fs;
|
||||
LLVMValueRef fpstate = 0;
|
||||
|
||||
/* Get type from output format */
|
||||
lp_blend_type_from_format_desc(out_format_desc, &row_type);
|
||||
lp_mem_type_from_format_desc(out_format_desc, &dst_type);
|
||||
|
||||
/*
|
||||
* Technically this code should go into lp_build_smallfloat_to_float
|
||||
* and lp_build_float_to_smallfloat but due to the
|
||||
* http://llvm.org/bugs/show_bug.cgi?id=6393
|
||||
* llvm reorders the mxcsr intrinsics in a way that breaks the code.
|
||||
* So the ordering is important here and there shouldn't be any
|
||||
* llvm ir instrunctions in this function before
|
||||
* this, otherwise half-float format conversions won't work
|
||||
* (again due to llvm bug #6393).
|
||||
*/
|
||||
if (dst_type.floating && dst_type.width != 32) {
|
||||
/* We need to make sure that denorms are ok for half float
|
||||
conversions */
|
||||
fpstate = lp_build_fpstate_get(gallivm);
|
||||
lp_build_fpstate_set_denorms_zero(gallivm, FALSE);
|
||||
}
|
||||
|
||||
mask_type = lp_int32_vec4_type();
|
||||
mask_type.length = fs_type.length;
|
||||
|
|
@ -1587,11 +1609,6 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
|
|||
undef_src_val = lp_build_undef(gallivm, fs_type);
|
||||
#endif
|
||||
|
||||
|
||||
/* Get type from output format */
|
||||
lp_blend_type_from_format_desc(out_format_desc, &row_type);
|
||||
lp_mem_type_from_format_desc(out_format_desc, &dst_type);
|
||||
|
||||
row_type.length = fs_type.length;
|
||||
vector_width = dst_type.floating ? lp_native_vector_width : lp_integer_vector_width;
|
||||
|
||||
|
|
@ -2051,6 +2068,10 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
|
|||
dst, dst_type, dst_count, dst_alignment);
|
||||
}
|
||||
|
||||
if (dst_type.floating && dst_type.width != 32) {
|
||||
lp_build_fpstate_set(gallivm, fpstate);
|
||||
}
|
||||
|
||||
if (do_branch) {
|
||||
lp_build_mask_end(&mask_ctx);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue