util: try to use SSE instructions with MSVC and 32-bit gcc

The lrint() and lrintf() functions are pretty slow and make some
texture transfers very inefficient.  This patch makes a better effort
at using those intrisics for 32-bit gcc and MSVC.

Note, this patch doesn't address the use of SSE4.1 with MSVC.

v2: get rid of the ROUND_WITH_SSE symbol, per Matt.

Reviewed-by: José Fonseca <jfonseca@vmware.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
Brian Paul 2016-08-17 10:41:30 -06:00
parent 18e6e0796a
commit 5de29aeef0

View file

@ -29,7 +29,8 @@
#include <limits.h>
#include <stdint.h>
#ifdef __x86_64__
#if defined(__SSE__) || defined(_MSC_VER)
/* MSVC always has SSE nowadays */
#include <xmmintrin.h>
#include <emmintrin.h>
#endif
@ -95,7 +96,7 @@ _mesa_roundeven(double x)
static inline long
_mesa_lroundevenf(float x)
{
#ifdef __x86_64__
#if defined(__SSE__) || defined(_MSC_VER)
#if LONG_MAX == INT64_MAX
return _mm_cvtss_si64(_mm_load_ss(&x));
#elif LONG_MAX == INT32_MAX
@ -115,7 +116,7 @@ _mesa_lroundevenf(float x)
static inline long
_mesa_lroundeven(double x)
{
#ifdef __x86_64__
#if defined(__SSE__) || defined(_MSC_VER)
#if LONG_MAX == INT64_MAX
return _mm_cvtsd_si64(_mm_load_sd(&x));
#elif LONG_MAX == INT32_MAX