mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 11:18:08 +02:00
swr/rast: Fix 64bit float loads in x86 lowering pass
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
1ffbbbee97
commit
96ad8f5a23
2 changed files with 25 additions and 45 deletions
|
|
@ -201,44 +201,7 @@ namespace SwrJit
|
|||
/// @param scale - value to scale indices by
|
||||
Value *Builder::GATHERPD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, uint8_t scale)
|
||||
{
|
||||
Value* vGather;
|
||||
|
||||
// use avx2 gather instruction if available
|
||||
if (JM()->mArch.AVX2())
|
||||
{
|
||||
vMask = BITCAST(S_EXT(vMask, VectorType::get(mInt64Ty, mVWidth / 2)), VectorType::get(mDoubleTy, mVWidth / 2));
|
||||
vGather = VGATHERPD(vSrc, pBase, vIndices, vMask, C(scale));
|
||||
}
|
||||
else
|
||||
{
|
||||
Value* pStack = STACKSAVE();
|
||||
|
||||
// store vSrc on the stack. this way we can select between a valid load address and the vSrc address
|
||||
Value* vSrcPtr = ALLOCA(vSrc->getType());
|
||||
SetTempAlloca(vSrcPtr);
|
||||
STORE(vSrc, vSrcPtr);
|
||||
|
||||
vGather = UndefValue::get(VectorType::get(mDoubleTy, 4));
|
||||
Value *vScaleVec = VECTOR_SPLAT(4, C((uint32_t)scale));
|
||||
Value *vOffsets = MUL(vIndices, vScaleVec);
|
||||
for (uint32_t i = 0; i < mVWidth / 2; ++i)
|
||||
{
|
||||
// single component byte index
|
||||
Value *offset = VEXTRACT(vOffsets, C(i));
|
||||
// byte pointer to component
|
||||
Value *loadAddress = GEP(pBase, offset);
|
||||
loadAddress = BITCAST(loadAddress, PointerType::get(mDoubleTy, 0));
|
||||
// pointer to the value to load if we're masking off a component
|
||||
Value *maskLoadAddress = GEP(vSrcPtr, { C(0), C(i) });
|
||||
Value *selMask = VEXTRACT(vMask, C(i));
|
||||
// switch in a safe address to load if we're trying to access a vertex
|
||||
Value *validAddress = SELECT(selMask, loadAddress, maskLoadAddress);
|
||||
Value *val = LOAD(validAddress);
|
||||
vGather = VINSERT(vGather, val, C(i));
|
||||
}
|
||||
STACKRESTORE(pStack);
|
||||
}
|
||||
return vGather;
|
||||
return VGATHERPD(vSrc, pBase, vIndices, vMask, C(scale));
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
|||
|
|
@ -230,7 +230,6 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
|
|||
}
|
||||
|
||||
// Fetch attributes from memory and output to a simdvertex struct
|
||||
// since VGATHER has a perf penalty on HSW vs BDW, allow client to choose which fetch method to use
|
||||
JitGatherVertices(fetchState, streams, vIndices, pVtxOut);
|
||||
|
||||
RET_VOID();
|
||||
|
|
@ -763,13 +762,31 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
|
|||
// if we need to gather the component
|
||||
if (compCtrl[i] == StoreSrc)
|
||||
{
|
||||
Value *vMaskLo = VSHUFFLE(vGatherMask, VUNDEF(mInt1Ty, 8), C({0, 1, 2, 3}));
|
||||
Value *vMaskHi = VSHUFFLE(vGatherMask, VUNDEF(mInt1Ty, 8), C({4, 5, 6, 7}));
|
||||
Value* vShufLo;
|
||||
Value* vShufHi;
|
||||
Value* vShufAll;
|
||||
|
||||
Value *vOffsetsLo = VEXTRACTI128(vOffsets, C(0));
|
||||
Value *vOffsetsHi = VEXTRACTI128(vOffsets, C(1));
|
||||
if (mVWidth == 8)
|
||||
{
|
||||
vShufLo = C({ 0, 1, 2, 3 });
|
||||
vShufHi = C({ 4, 5, 6, 7 });
|
||||
vShufAll = C({ 0, 1, 2, 3, 4, 5, 6, 7 });
|
||||
}
|
||||
else
|
||||
{
|
||||
SWR_ASSERT(mVWidth == 16);
|
||||
vShufLo = C({ 0, 1, 2, 3, 4, 5, 6, 7 });
|
||||
vShufHi = C({ 8, 9, 10, 11, 12, 13, 14, 15 });
|
||||
vShufAll = C({ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 });
|
||||
}
|
||||
|
||||
Value *vZeroDouble = VECTOR_SPLAT(4, ConstantFP::get(IRB()->getDoubleTy(), 0.0f));
|
||||
Value *vMaskLo = VSHUFFLE(vGatherMask, vGatherMask, vShufLo);
|
||||
Value *vMaskHi = VSHUFFLE(vGatherMask, vGatherMask, vShufHi);
|
||||
|
||||
Value *vOffsetsLo = VSHUFFLE(vOffsets, vOffsets, vShufLo);
|
||||
Value *vOffsetsHi = VSHUFFLE(vOffsets, vOffsets, vShufHi);
|
||||
|
||||
Value *vZeroDouble = VECTOR_SPLAT(mVWidth / 2, ConstantFP::get(IRB()->getDoubleTy(), 0.0f));
|
||||
|
||||
Value* pGatherLo = GATHERPD(vZeroDouble, pStreamBase, vOffsetsLo, vMaskLo);
|
||||
Value* pGatherHi = GATHERPD(vZeroDouble, pStreamBase, vOffsetsHi, vMaskHi);
|
||||
|
|
@ -777,7 +794,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
|
|||
pGatherLo = VCVTPD2PS(pGatherLo);
|
||||
pGatherHi = VCVTPD2PS(pGatherHi);
|
||||
|
||||
Value *pGather = VSHUFFLE(pGatherLo, pGatherHi, C({0, 1, 2, 3, 4, 5, 6, 7}));
|
||||
Value *pGather = VSHUFFLE(pGatherLo, pGatherHi, vShufAll);
|
||||
|
||||
vVertexElements[currentVertexElement++] = pGather;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue