mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 15:50:17 +01:00
swr: [rasterizer core] use wrap-around safe compares for dependency checking
Move drawIDs from 64-bit to 32-bit to increase perf. Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
dd189536dc
commit
73a9154bde
6 changed files with 45 additions and 36 deletions
|
|
@ -215,13 +215,13 @@ void QueueWork(SWR_CONTEXT *pContext)
|
|||
|
||||
if (IsDraw)
|
||||
{
|
||||
uint64_t curDraw[2] = { pContext->pCurDrawContext->drawId, pContext->pCurDrawContext->drawId };
|
||||
uint32_t curDraw[2] = { pContext->pCurDrawContext->drawId, pContext->pCurDrawContext->drawId };
|
||||
WorkOnFifoFE(pContext, 0, curDraw[0]);
|
||||
WorkOnFifoBE(pContext, 0, curDraw[1], gSingleThreadLockedTiles, 0, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
uint64_t curDispatch = pContext->pCurDrawContext->drawId;
|
||||
uint32_t curDispatch = pContext->pCurDrawContext->drawId;
|
||||
WorkOnCompute(pContext, 0, curDispatch);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -381,13 +381,13 @@ struct DRAW_STATE
|
|||
struct DRAW_CONTEXT
|
||||
{
|
||||
SWR_CONTEXT* pContext;
|
||||
uint64_t drawId;
|
||||
uint32_t drawId;
|
||||
uint32_t dependency;
|
||||
union
|
||||
{
|
||||
MacroTileMgr* pTileMgr;
|
||||
DispatchQueue* pDispatch; // Queue for thread groups. (isCompute)
|
||||
};
|
||||
uint64_t dependency;
|
||||
DRAW_STATE* pState;
|
||||
CachingArena* pArena;
|
||||
|
||||
|
|
|
|||
|
|
@ -90,13 +90,13 @@ public:
|
|||
return (numEnqueued == mNumEntries);
|
||||
}
|
||||
|
||||
INLINE uint64_t GetTail() volatile { return mRingTail; }
|
||||
INLINE uint64_t GetHead() volatile { return mRingHead; }
|
||||
INLINE uint32_t GetTail() volatile { return mRingTail; }
|
||||
INLINE uint32_t GetHead() volatile { return mRingHead; }
|
||||
|
||||
protected:
|
||||
T* mpRingBuffer;
|
||||
uint32_t mNumEntries;
|
||||
|
||||
OSALIGNLINE(volatile uint64_t) mRingHead; // Consumer Counter
|
||||
OSALIGNLINE(volatile uint64_t) mRingTail; // Producer Counter
|
||||
OSALIGNLINE(volatile uint32_t) mRingHead; // Consumer Counter
|
||||
OSALIGNLINE(volatile uint32_t) mRingTail; // Producer Counter
|
||||
};
|
||||
|
|
|
|||
|
|
@ -294,22 +294,30 @@ void bindThread(uint32_t threadId, uint32_t procGroupId = 0, bool bindProcGroup=
|
|||
}
|
||||
|
||||
INLINE
|
||||
uint64_t GetEnqueuedDraw(SWR_CONTEXT *pContext)
|
||||
uint32_t GetEnqueuedDraw(SWR_CONTEXT *pContext)
|
||||
{
|
||||
return pContext->dcRing.GetHead();
|
||||
}
|
||||
|
||||
INLINE
|
||||
DRAW_CONTEXT *GetDC(SWR_CONTEXT *pContext, uint64_t drawId)
|
||||
DRAW_CONTEXT *GetDC(SWR_CONTEXT *pContext, uint32_t drawId)
|
||||
{
|
||||
return &pContext->dcRing[(drawId-1) % KNOB_MAX_DRAWS_IN_FLIGHT];
|
||||
}
|
||||
|
||||
INLINE
|
||||
bool IDComparesLess(uint32_t a, uint32_t b)
|
||||
{
|
||||
// Use signed delta to ensure that wrap-around to 0 is correctly handled.
|
||||
int32_t delta = int32_t(a - b);
|
||||
return (delta < 0);
|
||||
}
|
||||
|
||||
// returns true if dependency not met
|
||||
INLINE
|
||||
bool CheckDependency(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint64_t lastRetiredDraw)
|
||||
bool CheckDependency(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t lastRetiredDraw)
|
||||
{
|
||||
return (pDC->dependency > lastRetiredDraw);
|
||||
return IDComparesLess(lastRetiredDraw, pDC->dependency);
|
||||
}
|
||||
|
||||
// inlined-only version
|
||||
|
|
@ -345,11 +353,11 @@ int64_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
|
|||
return CompleteDrawContextInl(pContext, pDC);
|
||||
}
|
||||
|
||||
INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint64_t& curDrawBE, uint64_t& drawEnqueued)
|
||||
INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint32_t& curDrawBE, uint32_t& drawEnqueued)
|
||||
{
|
||||
// increment our current draw id to the first incomplete draw
|
||||
drawEnqueued = GetEnqueuedDraw(pContext);
|
||||
while (curDrawBE < drawEnqueued)
|
||||
while (IDComparesLess(curDrawBE, drawEnqueued))
|
||||
{
|
||||
DRAW_CONTEXT *pDC = &pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT];
|
||||
|
||||
|
|
@ -372,7 +380,7 @@ INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint64_t& curDrawBE,
|
|||
}
|
||||
|
||||
// If there are no more incomplete draws then return false.
|
||||
return (curDrawBE >= drawEnqueued) ? false : true;
|
||||
return IDComparesLess(curDrawBE, drawEnqueued);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
|
@ -392,20 +400,20 @@ INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint64_t& curDrawBE,
|
|||
void WorkOnFifoBE(
|
||||
SWR_CONTEXT *pContext,
|
||||
uint32_t workerId,
|
||||
uint64_t &curDrawBE,
|
||||
uint32_t &curDrawBE,
|
||||
TileSet& lockedTiles,
|
||||
uint32_t numaNode,
|
||||
uint32_t numaMask)
|
||||
{
|
||||
// Find the first incomplete draw that has pending work. If no such draw is found then
|
||||
// return. FindFirstIncompleteDraw is responsible for incrementing the curDrawBE.
|
||||
uint64_t drawEnqueued = 0;
|
||||
uint32_t drawEnqueued = 0;
|
||||
if (FindFirstIncompleteDraw(pContext, curDrawBE, drawEnqueued) == false)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t lastRetiredDraw = pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT].drawId - 1;
|
||||
uint32_t lastRetiredDraw = pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT].drawId - 1;
|
||||
|
||||
// Reset our history for locked tiles. We'll have to re-learn which tiles are locked.
|
||||
lockedTiles.clear();
|
||||
|
|
@ -415,7 +423,7 @@ void WorkOnFifoBE(
|
|||
// 2. If we're trying to work on draws after curDrawBE, we are restricted to
|
||||
// working on those macrotiles that are known to be complete in the prior draw to
|
||||
// maintain order. The locked tiles provides the history to ensures this.
|
||||
for (uint64_t i = curDrawBE; i < drawEnqueued; ++i)
|
||||
for (uint32_t i = curDrawBE; IDComparesLess(i, drawEnqueued); ++i)
|
||||
{
|
||||
DRAW_CONTEXT *pDC = &pContext->dcRing[i % KNOB_MAX_DRAWS_IN_FLIGHT];
|
||||
|
||||
|
|
@ -508,11 +516,11 @@ void WorkOnFifoBE(
|
|||
}
|
||||
}
|
||||
|
||||
void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawFE)
|
||||
void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawFE)
|
||||
{
|
||||
// Try to grab the next DC from the ring
|
||||
uint64_t drawEnqueued = GetEnqueuedDraw(pContext);
|
||||
while (curDrawFE < drawEnqueued)
|
||||
uint32_t drawEnqueued = GetEnqueuedDraw(pContext);
|
||||
while (IDComparesLess(curDrawFE, drawEnqueued))
|
||||
{
|
||||
uint32_t dcSlot = curDrawFE % KNOB_MAX_DRAWS_IN_FLIGHT;
|
||||
DRAW_CONTEXT *pDC = &pContext->dcRing[dcSlot];
|
||||
|
|
@ -527,8 +535,8 @@ void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawFE)
|
|||
}
|
||||
}
|
||||
|
||||
uint64_t curDraw = curDrawFE;
|
||||
while (curDraw < drawEnqueued)
|
||||
uint32_t curDraw = curDrawFE;
|
||||
while (IDComparesLess(curDraw, drawEnqueued))
|
||||
{
|
||||
uint32_t dcSlot = curDraw % KNOB_MAX_DRAWS_IN_FLIGHT;
|
||||
DRAW_CONTEXT *pDC = &pContext->dcRing[dcSlot];
|
||||
|
|
@ -559,17 +567,17 @@ void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawFE)
|
|||
void WorkOnCompute(
|
||||
SWR_CONTEXT *pContext,
|
||||
uint32_t workerId,
|
||||
uint64_t& curDrawBE)
|
||||
uint32_t& curDrawBE)
|
||||
{
|
||||
uint64_t drawEnqueued = 0;
|
||||
uint32_t drawEnqueued = 0;
|
||||
if (FindFirstIncompleteDraw(pContext, curDrawBE, drawEnqueued) == false)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t lastRetiredDraw = pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT].drawId - 1;
|
||||
uint32_t lastRetiredDraw = pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT].drawId - 1;
|
||||
|
||||
for (uint64_t i = curDrawBE; curDrawBE < drawEnqueued; ++i)
|
||||
for (uint64_t i = curDrawBE; IDComparesLess(i, drawEnqueued); ++i)
|
||||
{
|
||||
DRAW_CONTEXT *pDC = &pContext->dcRing[i % KNOB_MAX_DRAWS_IN_FLIGHT];
|
||||
if (pDC->isCompute == false) return;
|
||||
|
|
@ -639,10 +647,10 @@ DWORD workerThreadMain(LPVOID pData)
|
|||
// the worker can safely increment its oldestDraw counter and move on to the next draw.
|
||||
std::unique_lock<std::mutex> lock(pContext->WaitLock, std::defer_lock);
|
||||
|
||||
auto threadHasWork = [&](uint64_t curDraw) { return curDraw != pContext->dcRing.GetHead(); };
|
||||
auto threadHasWork = [&](uint32_t curDraw) { return curDraw != pContext->dcRing.GetHead(); };
|
||||
|
||||
uint64_t curDrawBE = 0;
|
||||
uint64_t curDrawFE = 0;
|
||||
uint32_t curDrawBE = 0;
|
||||
uint32_t curDrawFE = 0;
|
||||
|
||||
while (pContext->threadPool.inThreadShutdown == false)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool);
|
|||
void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool);
|
||||
|
||||
// Expose FE and BE worker functions to the API thread if single threaded
|
||||
void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawFE);
|
||||
void WorkOnFifoBE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawBE, TileSet &usedTiles, uint32_t numaNode, uint32_t numaMask);
|
||||
void WorkOnCompute(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawBE);
|
||||
void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawFE);
|
||||
void WorkOnFifoBE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawBE, TileSet &usedTiles, uint32_t numaNode, uint32_t numaMask);
|
||||
void WorkOnCompute(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawBE);
|
||||
int64_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC);
|
||||
|
|
@ -133,8 +133,9 @@ KNOBS = [
|
|||
|
||||
['MAX_DRAWS_IN_FLIGHT', {
|
||||
'type' : 'uint32_t',
|
||||
'default' : '96',
|
||||
'desc' : ['Maximum number of draws outstanding before API thread blocks.'],
|
||||
'default' : '128',
|
||||
'desc' : ['Maximum number of draws outstanding before API thread blocks.',
|
||||
'This value MUST be evenly divisible into 2^32'],
|
||||
'category' : 'perf',
|
||||
}],
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue