mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 06:58:05 +02:00
swr/rast: Replace x86 VMOVMSK with llvm-only implementation
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
0899122c03
commit
99fe90722d
5 changed files with 26 additions and 5 deletions
|
|
@ -58,7 +58,6 @@ intrinsics = [
|
|||
['VPTESTC', ['a', 'b'], 'mInt32Ty'],
|
||||
['VPTESTZ', ['a', 'b'], 'mInt32Ty'],
|
||||
['VFMADDPS', ['a', 'b', 'c'], 'a'],
|
||||
['VMOVMSKPS', ['a'], 'mInt32Ty'],
|
||||
['VPHADDD', ['a', 'b'], 'a'],
|
||||
['PDEP32', ['a', 'b'], 'a'],
|
||||
['RDTSC', [], 'mInt64Ty'],
|
||||
|
|
|
|||
|
|
@ -608,7 +608,7 @@ namespace SwrJit
|
|||
pSrcArrayPtr = POINTER_CAST(pSrcArrayPtr, PointerType::get(pSrcTy, 0));
|
||||
pOffsetsArrayPtr = POINTER_CAST(pOffsetsArrayPtr, PointerType::get(mInt32Ty, 0));
|
||||
|
||||
Value* pMask = VMOVMSKPS(BITCAST(vMask, mSimdFP32Ty));
|
||||
Value* pMask = VMOVMSK(vMask);
|
||||
|
||||
// Setup loop basic block
|
||||
BasicBlock* pLoop = BasicBlock::Create(mpJitMgr->mContext, "Scatter_Loop", pFunc);
|
||||
|
|
|
|||
|
|
@ -525,6 +525,28 @@ namespace SwrJit
|
|||
return S_EXT(mask, mSimd16Int32Ty);
|
||||
}
|
||||
|
||||
/// @brief Convert <Nxi1> llvm mask to integer
|
||||
Value *Builder::VMOVMSK(Value* mask)
|
||||
{
|
||||
SWR_ASSERT(mask->getType()->getVectorElementType() == mInt1Ty);
|
||||
uint32_t numLanes = mask->getType()->getVectorNumElements();
|
||||
Value* i32Result;
|
||||
if (numLanes == 8)
|
||||
{
|
||||
i32Result = BITCAST(mask, mInt8Ty);
|
||||
}
|
||||
else if (numLanes == 16)
|
||||
{
|
||||
i32Result = BITCAST(mask, mInt16Ty);
|
||||
}
|
||||
else
|
||||
{
|
||||
SWR_ASSERT("Unsupported vector width");
|
||||
i32Result = BITCAST(mask, mInt8Ty);
|
||||
}
|
||||
return Z_EXT(i32Result, mInt32Ty);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Generate a VPSHUFB operation in LLVM IR. If not
|
||||
/// supported on the underlying platform, emulate it
|
||||
|
|
@ -768,8 +790,7 @@ namespace SwrJit
|
|||
/// @brief pop count on vector mask (e.g. <8 x i1>)
|
||||
Value* Builder::VPOPCNT(Value* a)
|
||||
{
|
||||
Value* b = BITCAST(VMASK(a), mSimdFP32Ty);
|
||||
return POPCNT(VMOVMSKPS(b));
|
||||
return POPCNT(VMOVMSK(a));
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
|||
|
|
@ -102,6 +102,8 @@ Value *MASK_16(Value *vmask);
|
|||
Value *VMASK(Value *mask);
|
||||
Value *VMASK_16(Value *mask);
|
||||
|
||||
Value *VMOVMSK(Value *mask);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief functions that build IR to call x86 intrinsics directly, or
|
||||
/// emulate them with other instructions if not available on the host
|
||||
|
|
|
|||
|
|
@ -79,7 +79,6 @@ namespace SwrJit
|
|||
{"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256},
|
||||
{"meta.intrinsic.VPTESTZ", Intrinsic::x86_avx_ptestz_256},
|
||||
{"meta.intrinsic.VFMADDPS", Intrinsic::x86_fma_vfmadd_ps_256},
|
||||
{"meta.intrinsic.VMOVMSKPS", Intrinsic::x86_avx_movmsk_ps_256},
|
||||
{"meta.intrinsic.VPHADDD", Intrinsic::x86_avx2_phadd_d},
|
||||
{"meta.intrinsic.PDEP32", Intrinsic::x86_bmi_pdep_32},
|
||||
{"meta.intrinsic.RDTSC", Intrinsic::x86_rdtsc},
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue