mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-02 03:38:06 +02:00
swr/rast: Change gfx pointers to gfxptr_t
Changing type to gfxptr for indices and related changes to fetch and mem builder code. Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
71239478d3
commit
81371a5909
11 changed files with 220 additions and 103 deletions
|
|
@ -162,7 +162,9 @@ def parse_ir_builder(input_file):
|
|||
if (func_name == 'CreateInsertNUWNSWBinOp' or
|
||||
func_name == 'CreateMaskedIntrinsic' or
|
||||
func_name == 'CreateAlignmentAssumptionHelper' or
|
||||
func_name == 'CreateLoad'):
|
||||
func_name == 'CreateGEP' or
|
||||
func_name == 'CreateLoad' or
|
||||
func_name == 'CreateMaskedLoad'):
|
||||
ignore = True
|
||||
|
||||
# Convert CamelCase to CAMEL_CASE
|
||||
|
|
|
|||
|
|
@ -1321,8 +1321,8 @@ void DrawIndexedInstance(
|
|||
}
|
||||
|
||||
int draw = 0;
|
||||
uint8_t *pIB = (uint8_t*)pState->indexBuffer.pIndices;
|
||||
pIB += (uint64_t)indexOffset * (uint64_t)indexSize;
|
||||
gfxptr_t xpIB = pState->indexBuffer.xpIndices;
|
||||
xpIB += (uint64_t)indexOffset * (uint64_t)indexSize;
|
||||
|
||||
pState->topology = topology;
|
||||
pState->forceFront = false;
|
||||
|
|
@ -1360,7 +1360,7 @@ void DrawIndexedInstance(
|
|||
pDC->pState->pfnProcessPrims != nullptr);
|
||||
pDC->FeWork.desc.draw.pDC = pDC;
|
||||
pDC->FeWork.desc.draw.numIndices = numIndicesForDraw;
|
||||
pDC->FeWork.desc.draw.pIB = (int*)pIB;
|
||||
pDC->FeWork.desc.draw.xpIB = xpIB;
|
||||
pDC->FeWork.desc.draw.type = pDC->pState->state.indexBuffer.format;
|
||||
|
||||
pDC->FeWork.desc.draw.numInstances = numInstances;
|
||||
|
|
@ -1376,7 +1376,7 @@ void DrawIndexedInstance(
|
|||
AR_API_EVENT(DrawIndexedInstancedEvent(pDC->drawId, topology, numIndicesForDraw, indexOffset, baseVertex,
|
||||
numInstances, startInstance, pState->tsState.tsEnable, pState->gsState.gsEnable, pState->soState.soEnable, pState->gsState.outputTopology, draw));
|
||||
|
||||
pIB += maxIndicesPerDraw * indexSize;
|
||||
xpIB += maxIndicesPerDraw * indexSize;
|
||||
remainingIndices -= numIndicesForDraw;
|
||||
draw++;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -176,7 +176,7 @@ struct DRAW_WORK
|
|||
};
|
||||
union
|
||||
{
|
||||
const int32_t* pIB; // DrawIndexed: App supplied indices
|
||||
gfxptr_t xpIB; // DrawIndexed: App supplied int32 indices
|
||||
uint32_t startVertex; // Draw: Starting vertex in VB to render from.
|
||||
};
|
||||
int32_t baseVertex;
|
||||
|
|
|
|||
|
|
@ -1527,28 +1527,24 @@ void ProcessDraw(
|
|||
uint32_t indexSize = 0;
|
||||
uint32_t endVertex = work.numVerts;
|
||||
|
||||
const int32_t* pLastRequestedIndex = nullptr;
|
||||
gfxptr_t xpLastRequestedIndex = 0;
|
||||
if (IsIndexedT::value)
|
||||
{
|
||||
switch (work.type)
|
||||
{
|
||||
case R32_UINT:
|
||||
indexSize = sizeof(uint32_t);
|
||||
pLastRequestedIndex = &(work.pIB[endVertex]);
|
||||
break;
|
||||
case R16_UINT:
|
||||
indexSize = sizeof(uint16_t);
|
||||
// nasty address offset to last index
|
||||
pLastRequestedIndex = (int32_t*)(&(((uint16_t*)work.pIB)[endVertex]));
|
||||
break;
|
||||
case R8_UINT:
|
||||
indexSize = sizeof(uint8_t);
|
||||
// nasty address offset to last index
|
||||
pLastRequestedIndex = (int32_t*)(&(((uint8_t*)work.pIB)[endVertex]));
|
||||
break;
|
||||
default:
|
||||
SWR_INVALID("Invalid work.type: %d", work.type);
|
||||
}
|
||||
xpLastRequestedIndex = work.xpIB + endVertex * indexSize;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -1660,10 +1656,10 @@ void ProcessDraw(
|
|||
|
||||
// if the entire index buffer isn't being consumed, set the last index
|
||||
// so that fetches < a SIMD wide will be masked off
|
||||
fetchInfo_lo.pLastIndex = (const int32_t*)(((uint8_t*)state.indexBuffer.pIndices) + state.indexBuffer.size);
|
||||
if (pLastRequestedIndex < fetchInfo_lo.pLastIndex)
|
||||
fetchInfo_lo.xpLastIndex = state.indexBuffer.xpIndices + state.indexBuffer.size;
|
||||
if (xpLastRequestedIndex < fetchInfo_lo.xpLastIndex)
|
||||
{
|
||||
fetchInfo_lo.pLastIndex = pLastRequestedIndex;
|
||||
fetchInfo_lo.xpLastIndex = xpLastRequestedIndex;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
@ -1683,15 +1679,15 @@ void ProcessDraw(
|
|||
|
||||
if (IsIndexedT::value)
|
||||
{
|
||||
fetchInfo_lo.pIndices = work.pIB;
|
||||
fetchInfo_hi.pIndices = (int32_t *)((uint8_t *)fetchInfo_lo.pIndices + KNOB_SIMD_WIDTH * indexSize); // 1/2 of KNOB_SIMD16_WIDTH
|
||||
fetchInfo_lo.xpIndices = work.xpIB;
|
||||
fetchInfo_hi.xpIndices = fetchInfo_lo.xpIndices + KNOB_SIMD_WIDTH * indexSize; // 1/2 of KNOB_SIMD16_WIDTH
|
||||
}
|
||||
else
|
||||
{
|
||||
vIndex = _simd16_add_epi32(_simd16_set1_epi32(work.startVertexID), vScale);
|
||||
|
||||
fetchInfo_lo.pIndices = (const int32_t *)&vIndex;
|
||||
fetchInfo_hi.pIndices = (const int32_t *)&vIndex + KNOB_SIMD_WIDTH; // 1/2 of KNOB_SIMD16_WIDTH
|
||||
fetchInfo_lo.xpIndices = (gfxptr_t)&vIndex;
|
||||
fetchInfo_hi.xpIndices = (gfxptr_t)&vIndex + KNOB_SIMD_WIDTH * sizeof(int32_t); // 1/2 of KNOB_SIMD16_WIDTH
|
||||
}
|
||||
|
||||
fetchInfo_lo.CurInstance = instanceNum;
|
||||
|
|
@ -1725,18 +1721,18 @@ void ProcessDraw(
|
|||
{
|
||||
if (!IsIndexedT::value)
|
||||
{
|
||||
fetchInfo_lo.pLastIndex = fetchInfo_lo.pIndices;
|
||||
fetchInfo_lo.xpLastIndex = fetchInfo_lo.xpIndices;
|
||||
uint32_t offset;
|
||||
offset = std::min(endVertex-i, (uint32_t) KNOB_SIMD16_WIDTH);
|
||||
#if USE_SIMD16_SHADERS
|
||||
offset *= 4; // convert from index to address
|
||||
fetchInfo_lo.pLastIndex += offset;
|
||||
fetchInfo_lo.xpLastIndex += offset;
|
||||
#else
|
||||
fetchInfo_lo.pLastIndex += std::min(offset, (uint32_t) KNOB_SIMD_WIDTH) * 4; // * 4 for converting index to address
|
||||
fetchInfo_lo.xpLastIndex += std::min(offset, (uint32_t) KNOB_SIMD_WIDTH) * 4; // * 4 for converting index to address
|
||||
uint32_t offset2 = std::min(offset, (uint32_t) KNOB_SIMD16_WIDTH)-KNOB_SIMD_WIDTH;
|
||||
assert(offset >= 0);
|
||||
fetchInfo_hi.pLastIndex = fetchInfo_hi.pIndices;
|
||||
fetchInfo_hi.pLastIndex += offset2 * 4; // * 4 for converting index to address
|
||||
fetchInfo_hi.xpLastIndex = fetchInfo_hi.xpIndices;
|
||||
fetchInfo_hi.xpLastIndex += offset2 * 4; // * 4 for converting index to address
|
||||
#endif
|
||||
}
|
||||
// 1. Execute FS/VS for a single SIMD.
|
||||
|
|
@ -1919,8 +1915,8 @@ void ProcessDraw(
|
|||
|
||||
if (IsIndexedT::value)
|
||||
{
|
||||
fetchInfo_lo.pIndices = (int32_t *)((uint8_t*)fetchInfo_lo.pIndices + KNOB_SIMD16_WIDTH * indexSize);
|
||||
fetchInfo_hi.pIndices = (int32_t *)((uint8_t*)fetchInfo_hi.pIndices + KNOB_SIMD16_WIDTH * indexSize);
|
||||
fetchInfo_lo.xpIndices = fetchInfo_lo.xpIndices + KNOB_SIMD16_WIDTH * indexSize;
|
||||
fetchInfo_hi.xpIndices = fetchInfo_hi.xpIndices + KNOB_SIMD16_WIDTH * indexSize;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -1948,9 +1944,9 @@ void ProcessDraw(
|
|||
// if the entire index buffer isn't being consumed, set the last index
|
||||
// so that fetches < a SIMD wide will be masked off
|
||||
fetchInfo.pLastIndex = (const int32_t*)(((uint8_t*)state.indexBuffer.pIndices) + state.indexBuffer.size);
|
||||
if (pLastRequestedIndex < fetchInfo.pLastIndex)
|
||||
if (xpLastRequestedIndex < fetchInfo.pLastIndex)
|
||||
{
|
||||
fetchInfo.pLastIndex = pLastRequestedIndex;
|
||||
fetchInfo.pLastIndex = xpLastRequestedIndex;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
|
|||
|
|
@ -582,7 +582,7 @@ struct SWR_VERTEX_BUFFER_STATE
|
|||
|
||||
struct SWR_INDEX_BUFFER_STATE
|
||||
{
|
||||
const void *pIndices;
|
||||
gfxptr_t xpIndices;
|
||||
// Format type for indices (e.g. UINT16, UINT32, etc.)
|
||||
SWR_FORMAT format; // @llvm_enum
|
||||
uint32_t size;
|
||||
|
|
@ -598,8 +598,8 @@ struct SWR_INDEX_BUFFER_STATE
|
|||
struct SWR_FETCH_CONTEXT
|
||||
{
|
||||
const SWR_VERTEX_BUFFER_STATE* pStreams; // IN: array of bound vertex buffers
|
||||
const int32_t* pIndices; // IN: pointer to index buffer for indexed draws
|
||||
const int32_t* pLastIndex; // IN: pointer to end of index buffer, used for bounds checking
|
||||
gfxptr_t xpIndices; // IN: pointer to int32 index buffer for indexed draws
|
||||
gfxptr_t xpLastIndex; // IN: pointer to end of index buffer, used for bounds checking
|
||||
uint32_t CurInstance; // IN: current instance
|
||||
uint32_t BaseVertex; // IN: base vertex
|
||||
uint32_t StartVertex; // IN: start vertex
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ namespace SwrJit
|
|||
BuilderGfxMem::BuilderGfxMem(JitManager* pJitMgr) :
|
||||
Builder(pJitMgr)
|
||||
{
|
||||
mpTranslationFuncTy = nullptr;
|
||||
mpfnTranslateGfxAddress = nullptr;
|
||||
mpParamSimDC = nullptr;
|
||||
|
||||
|
|
@ -51,8 +52,7 @@ namespace SwrJit
|
|||
|
||||
void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage)
|
||||
{
|
||||
SWR_ASSERT(ptr->getType() == mInt64Ty, "GFX addresses must be gfxptr_t and not converted to system pointers.");
|
||||
SWR_ASSERT(usage != MEM_CLIENT_INTERNAL, "Internal memory should not go through the translation path and should not be gfxptr_t.");
|
||||
SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT_INTERNAL), "Internal memory should not be gfxptr_t.");
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
|
@ -106,32 +106,94 @@ namespace SwrJit
|
|||
return ADD(base, offset);
|
||||
}
|
||||
|
||||
LoadInst* BuilderGfxMem::LOAD(Value *Ptr, const char *Name, JIT_MEM_CLIENT usage)
|
||||
Value *BuilderGfxMem::GEP(Value *Ptr, Value *Idx, Type *Ty, const Twine &Name)
|
||||
{
|
||||
// the 64 bit gfx pointers are not yet propagated up the stack
|
||||
// so there is some casting in here and the test for type is not yet enabled
|
||||
Ptr = TranslationHelper(Ptr, Ty);
|
||||
return Builder::GEP(Ptr, Idx, nullptr, Name);
|
||||
}
|
||||
|
||||
Value *BuilderGfxMem::GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name)
|
||||
{
|
||||
Ptr = TranslationHelper(Ptr, Ty);
|
||||
return Builder::GEP(Ty, Ptr, Idx, Name);
|
||||
}
|
||||
|
||||
Value *BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<Value*> &indexList, Type *Ty)
|
||||
{
|
||||
Ptr = TranslationHelper(Ptr, Ty);
|
||||
return Builder::GEP(Ptr, indexList);
|
||||
}
|
||||
|
||||
Value *BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty)
|
||||
{
|
||||
Ptr = TranslationHelper(Ptr, Ty);
|
||||
return Builder::GEP(Ptr, indexList);
|
||||
}
|
||||
|
||||
Value* BuilderGfxMem::TranslationHelper(Value *Ptr, Type *Ty)
|
||||
{
|
||||
SWR_ASSERT(!(Ptr->getType() == mInt64Ty && Ty == nullptr), "Access of GFX pointers must have non-null type specified.");
|
||||
|
||||
|
||||
// address may be coming in as 64bit int now so get the pointer
|
||||
if (Ptr->getType() == mInt64Ty)
|
||||
{
|
||||
Ptr = INT_TO_PTR(Ptr, Ty);
|
||||
}
|
||||
|
||||
return Ptr;
|
||||
}
|
||||
|
||||
LoadInst* BuilderGfxMem::LOAD(Value *Ptr, const char *Name, Type *Ty, JIT_MEM_CLIENT usage)
|
||||
{
|
||||
AssertGFXMemoryParams(Ptr, usage);
|
||||
|
||||
Ptr = TranslationHelper(Ptr, Ty);
|
||||
return Builder::LOAD(Ptr, Name);
|
||||
}
|
||||
|
||||
LoadInst* BuilderGfxMem::LOAD(Value *Ptr, const Twine &Name, JIT_MEM_CLIENT usage)
|
||||
LoadInst* BuilderGfxMem::LOAD(Value *Ptr, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
|
||||
{
|
||||
AssertGFXMemoryParams(Ptr, usage);
|
||||
|
||||
Ptr = TranslationHelper(Ptr, Ty);
|
||||
return Builder::LOAD(Ptr, Name);
|
||||
}
|
||||
|
||||
LoadInst* BuilderGfxMem::LOAD(Type *Ty, Value *Ptr, const Twine &Name, JIT_MEM_CLIENT usage)
|
||||
{
|
||||
AssertGFXMemoryParams(Ptr, usage);
|
||||
|
||||
Ptr = TranslationHelper(Ptr, Ty);
|
||||
return Builder::LOAD(Ty, Ptr, Name);
|
||||
}
|
||||
|
||||
LoadInst* BuilderGfxMem::LOAD(Value *Ptr, bool isVolatile, const Twine &Name, JIT_MEM_CLIENT usage)
|
||||
LoadInst* BuilderGfxMem::LOAD(Value *Ptr, bool isVolatile, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
|
||||
{
|
||||
AssertGFXMemoryParams(Ptr, usage);
|
||||
|
||||
Ptr = TranslationHelper(Ptr, Ty);
|
||||
return Builder::LOAD(Ptr, isVolatile, Name);
|
||||
}
|
||||
|
||||
LoadInst *BuilderGfxMem::LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& name, JIT_MEM_CLIENT usage)
|
||||
LoadInst *BuilderGfxMem::LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& name, Type *Ty, JIT_MEM_CLIENT usage)
|
||||
{
|
||||
return Builder::LOAD(BasePtr, offset, name);
|
||||
AssertGFXMemoryParams(BasePtr, usage);
|
||||
|
||||
// This call is just a pass through to the base class.
|
||||
// It needs to be here to compile due to the combination of virtual overrides and signature overloads.
|
||||
// It doesn't do anything meaningful because the implementation in the base class is going to call
|
||||
// another version of LOAD inside itself where the actual per offset translation will take place
|
||||
// and we can't just translate the BasePtr once, each address needs individual translation.
|
||||
return Builder::LOAD(BasePtr, offset, name, Ty, usage);
|
||||
}
|
||||
|
||||
CallInst* BuilderGfxMem::MASKED_LOAD(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
|
||||
{
|
||||
AssertGFXMemoryParams(Ptr, usage);
|
||||
|
||||
Ptr = TranslationHelper(Ptr, Ty);
|
||||
return Builder::MASKED_LOAD(Ptr, Align, Mask, PassThru, Name, Ty, usage);
|
||||
}
|
||||
|
||||
Value* BuilderGfxMem::TranslateGfxAddress(Value* xpGfxAddress)
|
||||
|
|
|
|||
|
|
@ -41,11 +41,18 @@ namespace SwrJit
|
|||
BuilderGfxMem(JitManager* pJitMgr);
|
||||
virtual ~BuilderGfxMem() {}
|
||||
|
||||
virtual LoadInst* LOAD(Value *Ptr, const char *Name, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
virtual LoadInst* LOAD(Value *Ptr, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
virtual Value *GEP(Value *Ptr, Value *Idx, Type *Ty = nullptr, const Twine &Name = "");
|
||||
virtual Value *GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name = "");
|
||||
virtual Value *GEP(Value* Ptr, const std::initializer_list<Value*> &indexList, Type *Ty = nullptr);
|
||||
virtual Value *GEP(Value* Ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty = nullptr);
|
||||
|
||||
virtual LoadInst* LOAD(Value *Ptr, const char *Name, Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
virtual LoadInst* LOAD(Value *Ptr, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
virtual LoadInst* LOAD(Type *Ty, Value *Ptr, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
virtual LoadInst* LOAD(Value *Ptr, bool isVolatile, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
virtual LoadInst* LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
virtual LoadInst* LOAD(Value *Ptr, bool isVolatile, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
virtual LoadInst* LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
|
||||
virtual CallInst* MASKED_LOAD(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru = nullptr, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
|
||||
virtual Value *GATHERPS(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
|
||||
|
|
@ -62,8 +69,15 @@ namespace SwrJit
|
|||
|
||||
virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant *offset);
|
||||
|
||||
Value* TranslationHelper(Value *Ptr, Type *Ty);
|
||||
|
||||
FunctionType* GetTranslationFunctionType() { return mpTranslationFuncTy; }
|
||||
Value* GetTranslationFunction() { return mpfnTranslateGfxAddress; }
|
||||
Value* GetParamSimDC() { return mpParamSimDC; }
|
||||
|
||||
private:
|
||||
|
||||
|
||||
FunctionType* mpTranslationFuncTy;
|
||||
Value* mpfnTranslateGfxAddress;
|
||||
Value* mpParamSimDC;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -41,7 +41,17 @@ namespace SwrJit
|
|||
SWR_ASSERT(ptr->getType() != mInt64Ty, "Address appears to be GFX access. Requires translation through BuilderGfxMem.");
|
||||
}
|
||||
|
||||
Value *Builder::GEP(Value* ptr, const std::initializer_list<Value*> &indexList)
|
||||
Value *Builder::GEP(Value *Ptr, Value *Idx, Type *Ty, const Twine &Name)
|
||||
{
|
||||
return IRB()->CreateGEP(Ptr, Idx, Name);
|
||||
}
|
||||
|
||||
Value *Builder::GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name)
|
||||
{
|
||||
return IRB()->CreateGEP(Ty, Ptr, Idx, Name);
|
||||
}
|
||||
|
||||
Value *Builder::GEP(Value* ptr, const std::initializer_list<Value*> &indexList, Type *Ty)
|
||||
{
|
||||
std::vector<Value*> indices;
|
||||
for (auto i : indexList)
|
||||
|
|
@ -49,7 +59,7 @@ namespace SwrJit
|
|||
return GEPA(ptr, indices);
|
||||
}
|
||||
|
||||
Value *Builder::GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList)
|
||||
Value *Builder::GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty)
|
||||
{
|
||||
std::vector<Value*> indices;
|
||||
for (auto i : indexList)
|
||||
|
|
@ -57,6 +67,16 @@ namespace SwrJit
|
|||
return GEPA(ptr, indices);
|
||||
}
|
||||
|
||||
Value *Builder::GEPA(Value *Ptr, ArrayRef<Value *> IdxList, const Twine &Name)
|
||||
{
|
||||
return IRB()->CreateGEP(Ptr, IdxList, Name);
|
||||
}
|
||||
|
||||
Value *Builder::GEPA(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList, const Twine &Name)
|
||||
{
|
||||
return IRB()->CreateGEP(Ty, Ptr, IdxList, Name);
|
||||
}
|
||||
|
||||
Value *Builder::IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*> &indexList)
|
||||
{
|
||||
std::vector<Value*> indices;
|
||||
|
|
@ -73,13 +93,13 @@ namespace SwrJit
|
|||
return IN_BOUNDS_GEP(ptr, indices);
|
||||
}
|
||||
|
||||
LoadInst* Builder::LOAD(Value *Ptr, const char *Name, JIT_MEM_CLIENT usage)
|
||||
LoadInst* Builder::LOAD(Value *Ptr, const char *Name, Type *Ty, JIT_MEM_CLIENT usage)
|
||||
{
|
||||
AssertMemoryUsageParams(Ptr, usage);
|
||||
return IRB()->CreateLoad(Ptr, Name);
|
||||
}
|
||||
|
||||
LoadInst* Builder::LOAD(Value *Ptr, const Twine &Name, JIT_MEM_CLIENT usage)
|
||||
LoadInst* Builder::LOAD(Value *Ptr, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
|
||||
{
|
||||
AssertMemoryUsageParams(Ptr, usage);
|
||||
return IRB()->CreateLoad(Ptr, Name);
|
||||
|
|
@ -91,19 +111,18 @@ namespace SwrJit
|
|||
return IRB()->CreateLoad(Ty, Ptr, Name);
|
||||
}
|
||||
|
||||
LoadInst* Builder::LOAD(Value *Ptr, bool isVolatile, const Twine &Name, JIT_MEM_CLIENT usage)
|
||||
LoadInst* Builder::LOAD(Value *Ptr, bool isVolatile, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
|
||||
{
|
||||
AssertMemoryUsageParams(Ptr, usage);
|
||||
return IRB()->CreateLoad(Ptr, isVolatile, Name);
|
||||
}
|
||||
|
||||
LoadInst *Builder::LOAD(Value *basePtr, const std::initializer_list<uint32_t> &indices, const llvm::Twine& name, JIT_MEM_CLIENT usage)
|
||||
LoadInst *Builder::LOAD(Value *basePtr, const std::initializer_list<uint32_t> &indices, const llvm::Twine& name, Type *Ty, JIT_MEM_CLIENT usage)
|
||||
{
|
||||
AssertMemoryUsageParams(basePtr, usage);
|
||||
std::vector<Value*> valIndices;
|
||||
for (auto i : indices)
|
||||
valIndices.push_back(C(i));
|
||||
return LOAD(GEPA(basePtr, valIndices), name);
|
||||
return Builder::LOAD(GEPA(basePtr, valIndices), name);
|
||||
}
|
||||
|
||||
LoadInst *Builder::LOADV(Value *basePtr, const std::initializer_list<Value*> &indices, const llvm::Twine& name)
|
||||
|
|
|
|||
|
|
@ -45,16 +45,27 @@ void AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage);
|
|||
|
||||
public:
|
||||
|
||||
Value *GEP(Value* ptr, const std::initializer_list<Value*> &indexList);
|
||||
Value *GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList);
|
||||
virtual Value *GEP(Value *Ptr, Value *Idx, Type *Ty = nullptr, const Twine &Name = "");
|
||||
virtual Value *GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name = "");
|
||||
virtual Value *GEP(Value* ptr, const std::initializer_list<Value*> &indexList, Type *Ty = nullptr);
|
||||
virtual Value *GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty = nullptr);
|
||||
|
||||
Value *GEPA(Value *Ptr, ArrayRef<Value *> IdxList, const Twine &Name = "");
|
||||
Value *GEPA(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList, const Twine &Name = "");
|
||||
|
||||
Value *IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*> &indexList);
|
||||
Value *IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList);
|
||||
|
||||
virtual LoadInst* LOAD(Value *Ptr, const char *Name, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
virtual LoadInst* LOAD(Value *Ptr, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
virtual LoadInst* LOAD(Value *Ptr, const char *Name, Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
virtual LoadInst* LOAD(Value *Ptr, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
virtual LoadInst* LOAD(Type *Ty, Value *Ptr, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
virtual LoadInst* LOAD(Value *Ptr, bool isVolatile, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
virtual LoadInst* LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
virtual LoadInst* LOAD(Value *Ptr, bool isVolatile, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
virtual LoadInst* LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
|
||||
virtual CallInst* MASKED_LOAD(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru = nullptr, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL)
|
||||
{
|
||||
return IRB()->CreateMaskedLoad(Ptr, Align, Mask, PassThru, Name);
|
||||
}
|
||||
|
||||
LoadInst *LOADV(Value *BasePtr, const std::initializer_list<Value*> &offset, const llvm::Twine& name = "");
|
||||
StoreInst *STORE(Value *Val, Value *BasePtr, const std::initializer_list<uint32_t> &offset);
|
||||
|
|
|
|||
|
|
@ -133,11 +133,11 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
|
|||
streams->setName("pStreams");
|
||||
|
||||
// SWR_FETCH_CONTEXT::pIndices
|
||||
Value* indices = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_pIndices});
|
||||
Value* indices = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_xpIndices});
|
||||
indices->setName("pIndices");
|
||||
|
||||
// SWR_FETCH_CONTEXT::pLastIndex
|
||||
Value* pLastIndex = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_pLastIndex});
|
||||
Value* pLastIndex = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_xpLastIndex});
|
||||
pLastIndex->setName("pLastIndex");
|
||||
|
||||
Value* vIndices;
|
||||
|
|
@ -152,12 +152,10 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
|
|||
}
|
||||
else
|
||||
{
|
||||
pLastIndex = BITCAST(pLastIndex, Type::getInt8PtrTy(JM()->mContext, 0));
|
||||
vIndices = GetSimdValid8bitIndices(indices, pLastIndex);
|
||||
}
|
||||
break;
|
||||
case R16_UINT:
|
||||
indices = BITCAST(indices, Type::getInt16PtrTy(JM()->mContext, 0));
|
||||
if(fetchState.bDisableIndexOOBCheck)
|
||||
{
|
||||
vIndices = LOAD(BITCAST(indices, PointerType::get(VectorType::get(mInt16Ty, mpJitMgr->mVWidth), 0)), {(uint32_t)0});
|
||||
|
|
@ -165,12 +163,11 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
|
|||
}
|
||||
else
|
||||
{
|
||||
pLastIndex = BITCAST(pLastIndex, Type::getInt16PtrTy(JM()->mContext, 0));
|
||||
vIndices = GetSimdValid16bitIndices(indices, pLastIndex);
|
||||
}
|
||||
break;
|
||||
case R32_UINT:
|
||||
(fetchState.bDisableIndexOOBCheck) ? vIndices = LOAD(BITCAST(indices, PointerType::get(mSimdInt32Ty,0)),{(uint32_t)0})
|
||||
(fetchState.bDisableIndexOOBCheck) ? vIndices = LOAD(indices, "", PointerType::get(mSimdInt32Ty, 0), GFX_MEM_CLIENT_FETCH)
|
||||
: vIndices = GetSimdValid32bitIndices(indices, pLastIndex);
|
||||
break; // incoming type is already 32bit int
|
||||
default:
|
||||
|
|
@ -967,6 +964,10 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
|
|||
}
|
||||
}
|
||||
|
||||
typedef void*(*PFN_TRANSLATEGFXADDRESS_FUNC)(void* pdc, gfxptr_t va);
|
||||
extern "C" void GetSimdValid8bitIndicesGfx(gfxptr_t indices, gfxptr_t lastIndex, uint32_t vWidth, PFN_TRANSLATEGFXADDRESS_FUNC pfnTranslate, void* pdc, uint32_t* outIndices);
|
||||
extern "C" void GetSimdValid16bitIndicesGfx(gfxptr_t indices, gfxptr_t lastIndex, uint32_t vWidth, PFN_TRANSLATEGFXADDRESS_FUNC pfnTranslate, void* pdc, uint32_t* outIndices);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Loads a simd of valid indices. OOB indices are set to 0
|
||||
/// *Note* have to do 16bit index checking in scalar until we have AVX-512
|
||||
|
|
@ -975,30 +976,36 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
|
|||
/// @param pLastIndex - pointer to last valid index
|
||||
Value* FetchJit::GetSimdValid8bitIndices(Value* pIndices, Value* pLastIndex)
|
||||
{
|
||||
// can fit 2 16 bit integers per vWidth lane
|
||||
Value* vIndices = VUNDEF_I();
|
||||
SWR_ASSERT(pIndices->getType() == mInt64Ty && pLastIndex->getType() == mInt64Ty, "Function expects gfxptr_t for both input parameters.");
|
||||
|
||||
// store 0 index on stack to be used to conditionally load from if index address is OOB
|
||||
Value* pZeroIndex = ALLOCA(mInt8Ty);
|
||||
STORE(C((uint8_t)0), pZeroIndex);
|
||||
Value* vIndices = VUNDEF_I();
|
||||
|
||||
// Load a SIMD of index pointers
|
||||
for(int64_t lane = 0; lane < mVWidth; lane++)
|
||||
{
|
||||
// Calculate the address of the requested index
|
||||
Value *pIndex = GEP(pIndices, C(lane));
|
||||
// store 0 index on stack to be used to conditionally load from if index address is OOB
|
||||
Value* pZeroIndex = ALLOCA(mInt8Ty);
|
||||
STORE(C((uint8_t)0), pZeroIndex);
|
||||
|
||||
// check if the address is less than the max index,
|
||||
Value* mask = ICMP_ULT(pIndex, pLastIndex);
|
||||
// Load a SIMD of index pointers
|
||||
for (int64_t lane = 0; lane < mVWidth; lane++)
|
||||
{
|
||||
// Calculate the address of the requested index
|
||||
Value *pIndex = GEP(pIndices, C(lane), mInt8PtrTy);
|
||||
|
||||
// if valid, load the index. if not, load 0 from the stack
|
||||
Value* pValid = SELECT(mask, pIndex, pZeroIndex);
|
||||
Value *index = LOAD(pValid, "valid index");
|
||||
pLastIndex = INT_TO_PTR(pLastIndex, mInt8PtrTy);
|
||||
|
||||
// zero extended index to 32 bits and insert into the correct simd lane
|
||||
index = Z_EXT(index, mInt32Ty);
|
||||
vIndices = VINSERT(vIndices, index, lane);
|
||||
// check if the address is less than the max index,
|
||||
Value* mask = ICMP_ULT(pIndex, pLastIndex);
|
||||
|
||||
// if valid, load the index. if not, load 0 from the stack
|
||||
Value* pValid = SELECT(mask, pIndex, pZeroIndex);
|
||||
Value *index = LOAD(pValid, "valid index", PointerType::get(mInt8Ty, 0), GFX_MEM_CLIENT_FETCH);
|
||||
|
||||
// zero extended index to 32 bits and insert into the correct simd lane
|
||||
index = Z_EXT(index, mInt32Ty);
|
||||
vIndices = VINSERT(vIndices, index, lane);
|
||||
}
|
||||
}
|
||||
|
||||
return vIndices;
|
||||
}
|
||||
|
||||
|
|
@ -1010,30 +1017,36 @@ Value* FetchJit::GetSimdValid8bitIndices(Value* pIndices, Value* pLastIndex)
|
|||
/// @param pLastIndex - pointer to last valid index
|
||||
Value* FetchJit::GetSimdValid16bitIndices(Value* pIndices, Value* pLastIndex)
|
||||
{
|
||||
// can fit 2 16 bit integers per vWidth lane
|
||||
Value* vIndices = VUNDEF_I();
|
||||
SWR_ASSERT(pIndices->getType() == mInt64Ty && pLastIndex->getType() == mInt64Ty, "Function expects gfxptr_t for both input parameters.");
|
||||
|
||||
// store 0 index on stack to be used to conditionally load from if index address is OOB
|
||||
Value* pZeroIndex = ALLOCA(mInt16Ty);
|
||||
STORE(C((uint16_t)0), pZeroIndex);
|
||||
Value* vIndices = VUNDEF_I();
|
||||
|
||||
// Load a SIMD of index pointers
|
||||
for(int64_t lane = 0; lane < mVWidth; lane++)
|
||||
{
|
||||
// Calculate the address of the requested index
|
||||
Value *pIndex = GEP(pIndices, C(lane));
|
||||
// store 0 index on stack to be used to conditionally load from if index address is OOB
|
||||
Value* pZeroIndex = ALLOCA(mInt16Ty);
|
||||
STORE(C((uint16_t)0), pZeroIndex);
|
||||
|
||||
// check if the address is less than the max index,
|
||||
Value* mask = ICMP_ULT(pIndex, pLastIndex);
|
||||
// Load a SIMD of index pointers
|
||||
for (int64_t lane = 0; lane < mVWidth; lane++)
|
||||
{
|
||||
// Calculate the address of the requested index
|
||||
Value *pIndex = GEP(pIndices, C(lane), mInt16PtrTy);
|
||||
|
||||
// if valid, load the index. if not, load 0 from the stack
|
||||
Value* pValid = SELECT(mask, pIndex, pZeroIndex);
|
||||
Value *index = LOAD(pValid, "valid index", GFX_MEM_CLIENT_FETCH);
|
||||
pLastIndex = INT_TO_PTR(pLastIndex, mInt16PtrTy);
|
||||
|
||||
// zero extended index to 32 bits and insert into the correct simd lane
|
||||
index = Z_EXT(index, mInt32Ty);
|
||||
vIndices = VINSERT(vIndices, index, lane);
|
||||
// check if the address is less than the max index,
|
||||
Value* mask = ICMP_ULT(pIndex, pLastIndex);
|
||||
|
||||
// if valid, load the index. if not, load 0 from the stack
|
||||
Value* pValid = SELECT(mask, pIndex, pZeroIndex);
|
||||
Value *index = LOAD(pValid, "valid index", PointerType::get(mInt16Ty, 0), GFX_MEM_CLIENT_FETCH);
|
||||
|
||||
// zero extended index to 32 bits and insert into the correct simd lane
|
||||
index = Z_EXT(index, mInt32Ty);
|
||||
vIndices = VINSERT(vIndices, index, lane);
|
||||
}
|
||||
}
|
||||
|
||||
return vIndices;
|
||||
}
|
||||
|
||||
|
|
@ -1045,8 +1058,8 @@ Value* FetchJit::GetSimdValid32bitIndices(Value* pIndices, Value* pLastIndex)
|
|||
{
|
||||
DataLayout dL(JM()->mpCurrentModule);
|
||||
unsigned int ptrSize = dL.getPointerSize() * 8; // ptr size in bits
|
||||
Value* iLastIndex = PTR_TO_INT(pLastIndex, Type::getIntNTy(JM()->mContext, ptrSize));
|
||||
Value* iIndices = PTR_TO_INT(pIndices, Type::getIntNTy(JM()->mContext, ptrSize));
|
||||
Value* iLastIndex = pLastIndex;
|
||||
Value* iIndices = pIndices;
|
||||
|
||||
// get the number of indices left in the buffer (endPtr - curPtr) / sizeof(index)
|
||||
Value* numIndicesLeft = SUB(iLastIndex,iIndices);
|
||||
|
|
@ -1918,7 +1931,7 @@ void FetchJit::StoreVertexElements(Value* pVtxOut, const uint32_t outputElt, con
|
|||
#endif
|
||||
// outputElt * 4 = offsetting by the size of a simdvertex
|
||||
// + c offsets to a 32bit x vWidth row within the current vertex
|
||||
Value* dest = GEP(pVtxOut, C(outputElt * 4 + c), "destGEP");
|
||||
Value* dest = GEP(pVtxOut, C(outputElt * 4 + c), nullptr, "destGEP");
|
||||
STORE(vVertexElements[c], dest);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1375,7 +1375,7 @@ swr_update_derived(struct pipe_context *pipe,
|
|||
|
||||
SWR_INDEX_BUFFER_STATE swrIndexBuffer;
|
||||
swrIndexBuffer.format = swr_convert_index_type(info.index_size);
|
||||
swrIndexBuffer.pIndices = p_data;
|
||||
swrIndexBuffer.xpIndices = (gfxptr_t) p_data;
|
||||
swrIndexBuffer.size = size;
|
||||
|
||||
ctx->api.pfnSwrSetIndexBuffer(ctx->swrContext, &swrIndexBuffer);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue