util/u_math: Use xmmintrin.h whenever possible.

It seems  __builtin_ia32_ldmxcsr is only available on gcc and only when
-msse is used. xmmintrin.h/pmmintrin.h provide portable intrinsics, but
these too are only available with gcc when -msse/-msse3 are set.

scons build always sets -msse on x86 builds, but autotools doesn't seem
to.

We could try to get this working on gcc x86 without -msse by emitting
assembly, but I believe that in this day and age we really should be
building Mesa with -msse and -msse2.
This commit is contained in:
José Fonseca 2013-07-10 07:56:17 +01:00
parent 045bf0db52
commit b042aae70d

View file

@ -27,9 +27,17 @@
#include "pipe/p_config.h"
#include "util/u_math.h"
#include "util/u_cpu_detect.h"
#if defined(PIPE_ARCH_SSE)
#include <xmmintrin.h>
/* This is defined in pmmintrin.h, but it can only be included when -msse3 is
* used, so just define it here to avoid further. */
#define _MM_DENORMALS_ZERO_MASK 0x0040
#endif
/** 2^x, for x in [-1.0, 1.0) */
float pow2_table[POW2_TABLE_SIZE];
@ -81,9 +89,9 @@ util_fpstate_get(void)
{
unsigned mxcsr = 0;
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
#if defined(PIPE_ARCH_SSE)
if (util_cpu_caps.has_sse) {
mxcsr = __builtin_ia32_stmxcsr();
mxcsr = _mm_getcsr();
}
#endif
@ -99,13 +107,13 @@ util_fpstate_get(void)
unsigned
util_fpstate_set_denorms_to_zero(unsigned current_mxcsr)
{
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
#define MXCSR_DAZ (1 << 6) /* Enable denormals are zero mode */
#define MXCSR_FTZ (1 << 15) /* Enable flush to zero mode */
#if defined(PIPE_ARCH_SSE)
if (util_cpu_caps.has_sse) {
current_mxcsr |= MXCSR_FTZ;
/* Enable flush to zero mode */
current_mxcsr |= _MM_FLUSH_ZERO_MASK;
if (util_cpu_caps.has_sse3) {
current_mxcsr |= MXCSR_DAZ;
/* Enable denormals are zero mode */
current_mxcsr |= _MM_DENORMALS_ZERO_MASK;
}
util_fpstate_set(current_mxcsr);
}
@ -121,9 +129,9 @@ util_fpstate_set_denorms_to_zero(unsigned current_mxcsr)
void
util_fpstate_set(unsigned mxcsr)
{
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
#if defined(PIPE_ARCH_SSE)
if (util_cpu_caps.has_sse) {
__builtin_ia32_ldmxcsr(mxcsr);
_mm_setcsr(mxcsr);
}
#endif
}