swr/rast: Add api to override draws in flight

Allow draws in flight to be overridden via SWR_CREATECONTEXT_INFO.

Patch by Jan Zielinski.

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
Tim Rowley 2017-10-17 15:02:53 -05:00
parent 2559f2b93e
commit 028ffa5e18
4 changed files with 31 additions and 19 deletions

View file

@ -74,13 +74,19 @@ HANDLE SwrCreateContext(
pContext->privateStateSize = pCreateInfo->privateStateSize;
pContext->dcRing.Init(KNOB_MAX_DRAWS_IN_FLIGHT);
pContext->dsRing.Init(KNOB_MAX_DRAWS_IN_FLIGHT);
pContext->MAX_DRAWS_IN_FLIGHT = KNOB_MAX_DRAWS_IN_FLIGHT;
if (pCreateInfo->MAX_DRAWS_IN_FLIGHT != 0)
{
pContext->MAX_DRAWS_IN_FLIGHT = pCreateInfo->MAX_DRAWS_IN_FLIGHT;
}
pContext->pMacroTileManagerArray = (MacroTileMgr*)AlignedMalloc(sizeof(MacroTileMgr) * KNOB_MAX_DRAWS_IN_FLIGHT, 64);
pContext->pDispatchQueueArray = (DispatchQueue*)AlignedMalloc(sizeof(DispatchQueue) * KNOB_MAX_DRAWS_IN_FLIGHT, 64);
pContext->dcRing.Init(pContext->MAX_DRAWS_IN_FLIGHT);
pContext->dsRing.Init(pContext->MAX_DRAWS_IN_FLIGHT);
for (uint32_t dc = 0; dc < KNOB_MAX_DRAWS_IN_FLIGHT; ++dc)
pContext->pMacroTileManagerArray = (MacroTileMgr*)AlignedMalloc(sizeof(MacroTileMgr) * pContext->MAX_DRAWS_IN_FLIGHT, 64);
pContext->pDispatchQueueArray = (DispatchQueue*)AlignedMalloc(sizeof(DispatchQueue) * pContext->MAX_DRAWS_IN_FLIGHT, 64);
for (uint32_t dc = 0; dc < pContext->MAX_DRAWS_IN_FLIGHT; ++dc)
{
pContext->dcRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator);
new (&pContext->pMacroTileManagerArray[dc]) MacroTileMgr(*pContext->dcRing[dc].pArena);
@ -173,7 +179,7 @@ template<bool IsDraw>
void QueueWork(SWR_CONTEXT *pContext)
{
DRAW_CONTEXT* pDC = pContext->pCurDrawContext;
uint32_t dcIndex = pDC->drawId % KNOB_MAX_DRAWS_IN_FLIGHT;
uint32_t dcIndex = pDC->drawId % pContext->MAX_DRAWS_IN_FLIGHT;
if (IsDraw)
{
@ -257,7 +263,7 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
}
uint64_t curDraw = pContext->dcRing.GetHead();
uint32_t dcIndex = curDraw % KNOB_MAX_DRAWS_IN_FLIGHT;
uint32_t dcIndex = curDraw % pContext->MAX_DRAWS_IN_FLIGHT;
if ((pContext->frameCount - pContext->lastFrameChecked) > 2 ||
(curDraw - pContext->lastDrawChecked) > 0x10000)
@ -273,7 +279,7 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
pContext->pCurDrawContext = pCurDrawContext;
// Assign next available entry in DS ring to this DC.
uint32_t dsIndex = pContext->curStateId % KNOB_MAX_DRAWS_IN_FLIGHT;
uint32_t dsIndex = pContext->curStateId % pContext->MAX_DRAWS_IN_FLIGHT;
pCurDrawContext->pState = &pContext->dsRing[dsIndex];
// Copy previous state to current state.
@ -361,7 +367,7 @@ void SwrDestroyContext(HANDLE hContext)
DestroyThreadPool(pContext, &pContext->threadPool);
// free the fifos
for (uint32_t i = 0; i < KNOB_MAX_DRAWS_IN_FLIGHT; ++i)
for (uint32_t i = 0; i < pContext->MAX_DRAWS_IN_FLIGHT; ++i)
{
AlignedFree(pContext->dcRing[i].dynState.pStats);
delete pContext->dcRing[i].pArena;
@ -1481,7 +1487,7 @@ void SwrDispatch(
pTaskData->threadGroupCountZ = threadGroupCountZ;
uint32_t totalThreadGroups = threadGroupCountX * threadGroupCountY * threadGroupCountZ;
uint32_t dcIndex = pDC->drawId % KNOB_MAX_DRAWS_IN_FLIGHT;
uint32_t dcIndex = pDC->drawId % pContext->MAX_DRAWS_IN_FLIGHT;
pDC->pDispatch = &pContext->pDispatchQueueArray[dcIndex];
pDC->pDispatch->initialize(totalThreadGroups, pTaskData, &ProcessComputeBE);

View file

@ -215,6 +215,10 @@ struct SWR_CREATECONTEXT_INFO
// Input (optional): Threading info that overrides any set KNOB values.
SWR_THREADING_INFO* pThreadInfo;
// Input: if set to non-zero value, overrides KNOB value for maximum
// number of draws in flight
uint32_t MAX_DRAWS_IN_FLIGHT;
};
//////////////////////////////////////////////////////////////////////////

View file

@ -481,6 +481,8 @@ struct SWR_CONTEXT
THREAD_POOL threadPool; // Thread pool associated with this context
SWR_THREADING_INFO threadInfo;
uint32_t MAX_DRAWS_IN_FLIGHT;
std::condition_variable FifosNotEmpty;
std::mutex WaitLock;

View file

@ -318,7 +318,7 @@ uint32_t GetEnqueuedDraw(SWR_CONTEXT *pContext)
INLINE
DRAW_CONTEXT *GetDC(SWR_CONTEXT *pContext, uint32_t drawId)
{
return &pContext->dcRing[(drawId-1) % KNOB_MAX_DRAWS_IN_FLIGHT];
return &pContext->dcRing[(drawId-1) % pContext->MAX_DRAWS_IN_FLIGHT];
}
INLINE
@ -421,7 +421,7 @@ INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint32_t workerId, ui
drawEnqueued = GetEnqueuedDraw(pContext);
while (IDComparesLess(curDrawBE, drawEnqueued))
{
DRAW_CONTEXT *pDC = &pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT];
DRAW_CONTEXT *pDC = &pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT];
// If its not compute and FE is not done then break out of loop.
if (!pDC->doneFE && !pDC->isCompute) break;
@ -478,7 +478,7 @@ bool WorkOnFifoBE(
return false;
}
uint32_t lastRetiredDraw = pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT].drawId - 1;
uint32_t lastRetiredDraw = pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT].drawId - 1;
// Reset our history for locked tiles. We'll have to re-learn which tiles are locked.
lockedTiles.clear();
@ -490,7 +490,7 @@ bool WorkOnFifoBE(
// maintain order. The locked tiles provides the history to ensures this.
for (uint32_t i = curDrawBE; IDComparesLess(i, drawEnqueued); ++i)
{
DRAW_CONTEXT *pDC = &pContext->dcRing[i % KNOB_MAX_DRAWS_IN_FLIGHT];
DRAW_CONTEXT *pDC = &pContext->dcRing[i % pContext->MAX_DRAWS_IN_FLIGHT];
if (pDC->isCompute) return false; // We don't look at compute work.
@ -636,7 +636,7 @@ void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawFE)
uint32_t drawEnqueued = GetEnqueuedDraw(pContext);
while (IDComparesLess(curDrawFE, drawEnqueued))
{
uint32_t dcSlot = curDrawFE % KNOB_MAX_DRAWS_IN_FLIGHT;
uint32_t dcSlot = curDrawFE % pContext->MAX_DRAWS_IN_FLIGHT;
DRAW_CONTEXT *pDC = &pContext->dcRing[dcSlot];
if (pDC->isCompute || pDC->doneFE)
{
@ -653,7 +653,7 @@ void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawFE)
uint32_t curDraw = curDrawFE;
while (IDComparesLess(curDraw, drawEnqueued))
{
uint32_t dcSlot = curDraw % KNOB_MAX_DRAWS_IN_FLIGHT;
uint32_t dcSlot = curDraw % pContext->MAX_DRAWS_IN_FLIGHT;
DRAW_CONTEXT *pDC = &pContext->dcRing[dcSlot];
if (!pDC->isCompute && !pDC->FeLock)
@ -694,11 +694,11 @@ void WorkOnCompute(
return;
}
uint32_t lastRetiredDraw = pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT].drawId - 1;
uint32_t lastRetiredDraw = pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT].drawId - 1;
for (uint64_t i = curDrawBE; IDComparesLess(i, drawEnqueued); ++i)
{
DRAW_CONTEXT *pDC = &pContext->dcRing[i % KNOB_MAX_DRAWS_IN_FLIGHT];
DRAW_CONTEXT *pDC = &pContext->dcRing[i % pContext->MAX_DRAWS_IN_FLIGHT];
if (pDC->isCompute == false) return;
// check dependencies
@ -972,7 +972,7 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
}
// Initialize DRAW_CONTEXT's per-thread stats
for (uint32_t dc = 0; dc < KNOB_MAX_DRAWS_IN_FLIGHT; ++dc)
for (uint32_t dc = 0; dc < pContext->MAX_DRAWS_IN_FLIGHT; ++dc)
{
pContext->dcRing[dc].dynState.pStats = (SWR_STATS*)AlignedMalloc(sizeof(SWR_STATS) * numThreads, 64);
memset(pContext->dcRing[dc].dynState.pStats, 0, sizeof(SWR_STATS) * numThreads);