swr/rast: simdlib changes for clang/gcc

Tested with clang-4.0 and gcc-6.3.

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
Tim Rowley 2017-07-18 12:04:41 -05:00
parent c27cbd88e6
commit 6970f48b6e
2 changed files with 35 additions and 10 deletions

View file

@ -24,6 +24,21 @@
#error Do not include this file directly, use "simdlib.hpp" instead.
#endif
#if defined(__GNUC__) && !defined( __clang__) && !defined(__INTEL_COMPILER)
// gcc missing these intrinsics
#ifndef _mm512_cmpneq_ps_mask
#define _mm512_cmpneq_ps_mask(a,b) _mm512_cmp_ps_mask((a),(b),_CMP_NEQ_UQ)
#endif
#ifndef _mm512_cmplt_ps_mask
#define _mm512_cmplt_ps_mask(a,b) _mm512_cmp_ps_mask((a),(b),_CMP_LT_OS)
#endif
#ifndef _mm512_cmplt_pd_mask
#define _mm512_cmplt_pd_mask(a,b) _mm512_cmp_pd_mask((a),(b),_CMP_LT_OS)
#endif
#endif
//============================================================================
// SIMD16 AVX512 (F) implementation
//
@ -138,6 +153,17 @@ using SIMD256T = SIMD256Impl::AVX2Impl;
}
#define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op)
#define SIMD_EMU_IWRAPPER_2(op) \
static SIMDINLINE \
Integer SIMDCALL op(Integer a, Integer b)\
{\
return Integer\
{\
SIMD256T::op(a.v8[0], b.v8[0]),\
SIMD256T::op(a.v8[1], b.v8[1]),\
};\
}
private:
static SIMDINLINE Integer vmask(__mmask8 m)
{
@ -234,14 +260,6 @@ SIMD_IWRAPPER_1I(slli_epi32); // return a << ImmT
SIMD_IWRAPPER_2(sllv_epi32);
SIMD_IWRAPPER_1I(srai_epi32); // return a >> ImmT (int32)
SIMD_IWRAPPER_1I(srli_epi32); // return a >> ImmT (uint32)
SIMD_IWRAPPER_1I_(srli_si, srli_si512); // return a >> (ImmT*8) (uint)
template<int ImmT> // same as srli_si, but with Float cast to int
static SIMDINLINE Float SIMDCALL srlisi_ps(Float a)
{
return castsi_ps(srli_si<ImmT>(castps_si(a)));
}
SIMD_IWRAPPER_2(srlv_epi32);
//-----------------------------------------------------------------------
@ -443,10 +461,17 @@ static SIMDINLINE Integer SIMDCALL insert_si(Integer a, SIMD256Impl::Integer b)
return _mm512_inserti64x4(a, b, imm);
}
#if !defined(AVX512F_STRICT)
SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm512_packs_epi16 and _mm512_packs_epi16
SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm512_packs_epi32 and _mm512_packs_epi32
SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm512_packus_epi16 and _mm512_packus_epi16
SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm512_packus_epi32 and _mm512_packus_epi32
#else
SIMD_EMU_IWRAPPER_2(packs_epi16)
SIMD_EMU_IWRAPPER_2(packs_epi32)
SIMD_EMU_IWRAPPER_2(packus_epi16)
SIMD_EMU_IWRAPPER_2(packus_epi32)
#endif
static SIMDINLINE Integer SIMDCALL permute_epi32(Integer a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float)
{
@ -679,4 +704,4 @@ static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask)
#undef SIMD_IWRAPPER_2
#undef SIMD_IWRAPPER_2_
#undef SIMD_IWRAPPER_2I
#undef SIMD_EMU_IWRAPPER_2

View file

@ -262,7 +262,7 @@ namespace SIMDImpl
namespace SIMD512Impl
{
#if !defined(_MM_K0_REG)
#if !defined(__AVX512F__)
// Define AVX512 types if not included via immintrin.h.
// All data members of these types are ONLY to viewed
// in a debugger. Do NOT access them via code!