swr: [rasterizer] Interpolation utility functions

v2: use _mm_cmpunord_ps for vIsNaN

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
Tim Rowley 2016-04-05 16:33:02 -06:00
parent 27cc5924ea
commit ef293ee9c0
3 changed files with 55 additions and 6 deletions

View file

@ -914,17 +914,26 @@ INLINE simdscalar vplaneps(simdscalar vA, simdscalar vB, simdscalar vC, simdscal
return vOut;
}
//////////////////////////////////////////////////////////////////////////
/// @brief Compute plane equation vA * vX + vB * vY + vC
INLINE __m128 vplaneps128(__m128 vA, __m128 vB, __m128 vC, __m128 &vX, __m128 &vY)
{
__m128 vOut = _simd128_fmadd_ps(vA, vX, vC);
vOut = _simd128_fmadd_ps(vB, vY, vOut);
return vOut;
}
//////////////////////////////////////////////////////////////////////////
/// @brief Interpolates a single component.
/// @param vI - barycentric I
/// @param vJ - barycentric J
/// @param pInterpBuffer - pointer to attribute barycentric coeffs
template<UINT Attrib, UINT Comp>
template<UINT Attrib, UINT Comp, UINT numComponents = 4>
static INLINE simdscalar InterpolateComponent(simdscalar vI, simdscalar vJ, const float *pInterpBuffer)
{
const float *pInterpA = &pInterpBuffer[Attrib * 12 + 0 + Comp];
const float *pInterpB = &pInterpBuffer[Attrib * 12 + 4 + Comp];
const float *pInterpC = &pInterpBuffer[Attrib * 12 + 8 + Comp];
const float *pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
const float *pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp];
const float *pInterpC = &pInterpBuffer[Attrib * 3 * numComponents + numComponents * 2 + Comp];
simdscalar vA = _simd_broadcast_ss(pInterpA);
simdscalar vB = _simd_broadcast_ss(pInterpB);
@ -936,6 +945,40 @@ static INLINE simdscalar InterpolateComponent(simdscalar vI, simdscalar vJ, cons
return vplaneps(vA, vB, vC, vI, vJ);
}
//////////////////////////////////////////////////////////////////////////
/// @brief Interpolates a single component.
/// @param vI - barycentric I
/// @param vJ - barycentric J
/// @param pInterpBuffer - pointer to attribute barycentric coeffs
template<UINT Attrib, UINT Comp, UINT numComponents = 4>
static INLINE __m128 InterpolateComponent(__m128 vI, __m128 vJ, const float *pInterpBuffer)
{
const float *pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
const float *pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp];
const float *pInterpC = &pInterpBuffer[Attrib * 3 * numComponents + numComponents * 2 + Comp];
__m128 vA = _mm_broadcast_ss(pInterpA);
__m128 vB = _mm_broadcast_ss(pInterpB);
__m128 vC = _mm_broadcast_ss(pInterpC);
__m128 vk = _mm_sub_ps(_mm_sub_ps(_mm_set1_ps(1.0f), vI), vJ);
vC = _mm_mul_ps(vk, vC);
return vplaneps128(vA, vB, vC, vI, vJ);
}
static INLINE __m128 _simd128_abs_ps(__m128 a)
{
__m128i ai = _mm_castps_si128(a);
return _mm_castsi128_ps(_mm_and_si128(ai, _mm_set1_epi32(0x7fffffff)));
}
static INLINE simdscalar _simd_abs_ps(simdscalar a)
{
simdscalari ai = _simd_castps_si(a);
return _simd_castsi_ps(_simd_and_si(ai, _simd_set1_epi32(0x7fffffff)));
}
INLINE
UINT pdep_u32(UINT a, UINT mask)
{

View file

@ -307,6 +307,14 @@ bool CanUseSimplePoints(DRAW_CONTEXT *pDC)
!state.rastState.pointSpriteEnable);
}
INLINE
bool vIsNaN(const __m128& vec)
{
const __m128 result = _mm_cmpunord_ps(vec, vec);
const int32_t mask = _mm_movemask_ps(result);
return (mask != 0);
}
uint32_t GetNumPrims(PRIMITIVE_TOPOLOGY mode, uint32_t numElements);
uint32_t NumVertsPerPrim(PRIMITIVE_TOPOLOGY topology, bool includeAdjVerts);

View file

@ -197,8 +197,6 @@ enum SWR_OUTER_TESSFACTOR_ID
#define VERTEX_CLIPCULL_DIST_LO_SLOT 35 // VS writes lower 4 clip/cull dist
#define VERTEX_CLIPCULL_DIST_HI_SLOT 36 // VS writes upper 4 clip/cull dist
#define VERTEX_POINT_SIZE_SLOT 37 // VS writes point size here
static_assert(VERTEX_POINT_SIZE_SLOT < KNOB_NUM_ATTRIBUTES, "Mismatched attribute slot size");
// SoAoSoA
struct simdvertex
{