mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-23 23:30:22 +01:00
swr: [rasterizer jitter] vpermps support
This commit is contained in:
parent
bfb954189e
commit
aca5513184
4 changed files with 84 additions and 1 deletions
|
|
@ -115,6 +115,30 @@ __m256i func(__m256i a, __m256i b)\
|
|||
}
|
||||
|
||||
#if (KNOB_ARCH == KNOB_ARCH_AVX)
|
||||
INLINE
|
||||
__m256 _simdemu_permute_ps(__m256 a, __m256i b)
|
||||
{
|
||||
__m128 aHi = _mm256_extractf128_ps(a, 1);
|
||||
__m128i bHi = _mm256_extractf128_si256(b, 1);
|
||||
__m128 aLo = _mm256_castps256_ps128(a);
|
||||
__m128i bLo = _mm256_castsi256_si128(b);
|
||||
|
||||
__m128i indexHi = _mm_cmpgt_epi32(bLo, _mm_set1_epi32(3));
|
||||
__m128 resLow = _mm_permutevar_ps(aLo, _mm_and_si128(bLo, _mm_set1_epi32(0x3)));
|
||||
__m128 resHi = _mm_permutevar_ps(aHi, _mm_and_si128(bLo, _mm_set1_epi32(0x3)));
|
||||
__m128 blendLowRes = _mm_blendv_ps(resLow, resHi, _mm_castsi128_ps(indexHi));
|
||||
|
||||
indexHi = _mm_cmpgt_epi32(bHi, _mm_set1_epi32(3));
|
||||
resLow = _mm_permutevar_ps(aLo, _mm_and_si128(bHi, _mm_set1_epi32(0x3)));
|
||||
resHi = _mm_permutevar_ps(aHi, _mm_and_si128(bHi, _mm_set1_epi32(0x3)));
|
||||
__m128 blendHiRes = _mm_blendv_ps(resLow, resHi, _mm_castsi128_ps(indexHi));
|
||||
|
||||
__m256 result = _mm256_castps128_ps256(blendLowRes);
|
||||
result = _mm256_insertf128_ps(result, blendHiRes, 1);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#define _simd_mul_epi32 _simdemu_mul_epi32
|
||||
#define _simd_mullo_epi32 _simdemu_mullo_epi32
|
||||
#define _simd_sub_epi32 _simdemu_sub_epi32
|
||||
|
|
@ -137,8 +161,11 @@ __m256i func(__m256i a, __m256i b)\
|
|||
#define _simd_cmpeq_epi64 _simdemu_cmpeq_epi64
|
||||
#define _simd_cmpgt_epi64 _simdemu_cmpgt_epi64
|
||||
#define _simd_cmpgt_epi8 _simdemu_cmpgt_epi8
|
||||
#define _simd_cmpeq_epi8 _simdemu_cmpeq_epi8
|
||||
#define _simd_cmpgt_epi16 _simdemu_cmpgt_epi16
|
||||
#define _simd_cmpeq_epi16 _simdemu_cmpeq_epi16
|
||||
#define _simd_movemask_epi8 _simdemu_movemask_epi8
|
||||
#define _simd_permute_ps _simdemu_permute_ps
|
||||
|
||||
SIMD_EMU_EPI(_simdemu_mul_epi32, _mm_mul_epi32)
|
||||
SIMD_EMU_EPI(_simdemu_mullo_epi32, _mm_mullo_epi32)
|
||||
|
|
@ -161,7 +188,9 @@ SIMD_EMU_EPI(_simdemu_add_epi8, _mm_add_epi8)
|
|||
SIMD_EMU_EPI(_simdemu_cmpeq_epi64, _mm_cmpeq_epi64)
|
||||
SIMD_EMU_EPI(_simdemu_cmpgt_epi64, _mm_cmpgt_epi64)
|
||||
SIMD_EMU_EPI(_simdemu_cmpgt_epi8, _mm_cmpgt_epi8)
|
||||
SIMD_EMU_EPI(_simdemu_cmpeq_epi8, _mm_cmpeq_epi8)
|
||||
SIMD_EMU_EPI(_simdemu_cmpgt_epi16, _mm_cmpgt_epi16)
|
||||
SIMD_EMU_EPI(_simdemu_cmpeq_epi16, _mm_cmpeq_epi16)
|
||||
|
||||
#define _simd_unpacklo_epi32(a, b) _mm256_castps_si256(_mm256_unpacklo_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)))
|
||||
#define _simd_unpackhi_epi32(a, b) _mm256_castps_si256(_mm256_unpackhi_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)))
|
||||
|
|
@ -300,8 +329,11 @@ int _simdemu_movemask_epi8(__m256i a)
|
|||
#define _simd_cmpeq_epi64 _mm256_cmpeq_epi64
|
||||
#define _simd_cmpgt_epi64 _mm256_cmpgt_epi64
|
||||
#define _simd_cmpgt_epi8 _mm256_cmpgt_epi8
|
||||
#define _simd_cmpeq_epi8 _mm256_cmpeq_epi8
|
||||
#define _simd_cmpgt_epi16 _mm256_cmpgt_epi16
|
||||
#define _simd_cmpeq_epi16 _mm256_cmpeq_epi16
|
||||
#define _simd_movemask_epi8 _mm256_movemask_epi8
|
||||
#define _simd_permute_ps _mm256_permutevar8x32_ps
|
||||
#endif
|
||||
|
||||
#define _simd_shuffleps_epi32(vA, vB, imm) _mm256_castps_si256(_mm256_shuffle_ps(_mm256_castsi256_ps(vA), _mm256_castsi256_ps(vB), imm))
|
||||
|
|
|
|||
|
|
@ -776,11 +776,60 @@ Value *Builder::PERMD(Value* a, Value* idx)
|
|||
}
|
||||
else
|
||||
{
|
||||
res = VSHUFFLE(a, a, idx);
|
||||
if (isa<Constant>(idx))
|
||||
{
|
||||
res = VSHUFFLE(a, a, idx);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = VUNDEF_I();
|
||||
for (uint32_t l = 0; l < JM()->mVWidth; ++l)
|
||||
{
|
||||
Value* pIndex = VEXTRACT(idx, C(l));
|
||||
Value* pVal = VEXTRACT(a, pIndex);
|
||||
res = VINSERT(res, pVal, C(l));
|
||||
}
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Generate a VPERMPS operation (shuffle 32 bit float values
|
||||
/// across 128 bit lanes) in LLVM IR. If not supported on the underlying
|
||||
/// platform, emulate it
|
||||
/// @param a - 256bit SIMD lane(8x32bit) of float values.
|
||||
/// @param idx - 256bit SIMD lane(8x32bit) of 3 bit lane index values
|
||||
Value *Builder::PERMPS(Value* a, Value* idx)
|
||||
{
|
||||
Value* res;
|
||||
// use avx2 permute instruction if available
|
||||
if (JM()->mArch.AVX2())
|
||||
{
|
||||
// llvm 3.6.0 swapped the order of the args to vpermd
|
||||
res = VPERMPS(idx, a);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (isa<Constant>(idx))
|
||||
{
|
||||
res = VSHUFFLE(a, a, idx);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = VUNDEF_F();
|
||||
for (uint32_t l = 0; l < JM()->mVWidth; ++l)
|
||||
{
|
||||
Value* pIndex = VEXTRACT(idx, C(l));
|
||||
Value* pVal = VEXTRACT(a, pIndex);
|
||||
res = VINSERT(res, pVal, C(l));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Generate a VCVTPH2PS operation (float16->float32 conversion)
|
||||
/// in LLVM IR. If not supported on the underlying platform, emulate it
|
||||
|
|
|
|||
|
|
@ -115,6 +115,7 @@ Value *PSHUFB(Value* a, Value* b);
|
|||
Value *PMOVSXBD(Value* a);
|
||||
Value *PMOVSXWD(Value* a);
|
||||
Value *PERMD(Value* a, Value* idx);
|
||||
Value *PERMPS(Value* a, Value* idx);
|
||||
Value *CVTPH2PS(Value* a);
|
||||
Value *CVTPS2PH(Value* a, Value* rounding);
|
||||
Value *PMAXSD(Value* a, Value* b);
|
||||
|
|
|
|||
|
|
@ -103,6 +103,7 @@ intrinsics = [
|
|||
["VPMOVSXBD", "x86_avx2_pmovsxbd", ["a"]], # sign extend packed 8bit components
|
||||
["VPMOVSXWD", "x86_avx2_pmovsxwd", ["a"]], # sign extend packed 16bit components
|
||||
["VPERMD", "x86_avx2_permd", ["idx", "a"]],
|
||||
["VPERMPS", "x86_avx2_permps", ["idx", "a"]],
|
||||
["VCVTPH2PS", "x86_vcvtph2ps_256", ["a"]],
|
||||
["VCVTPS2PH", "x86_vcvtps2ph_256", ["a", "round"]],
|
||||
["VHSUBPS", "x86_avx_hsub_ps_256", ["a", "b"]],
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue