swr: [rasterizer] Discard work + misc fixes

This commit is contained in:
Tim Rowley 2016-03-10 18:30:40 -06:00
parent 542d7dec7b
commit e374d2d24b
9 changed files with 119 additions and 28 deletions

View file

@ -1265,7 +1265,10 @@ void SwrDrawIndexedInstanced(
DrawIndexedInstance(hContext, topology, numIndices, indexOffset, baseVertex, numInstances, startInstance);
}
// Attach surfaces to pipeline
//////////////////////////////////////////////////////////////////////////
/// @brief SwrInvalidateTiles
/// @param hContext - Handle passed back from SwrCreateContext
/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to invalidate.
void SwrInvalidateTiles(
HANDLE hContext,
uint32_t attachmentMask)
@ -1273,10 +1276,39 @@ void SwrInvalidateTiles(
SWR_CONTEXT *pContext = (SWR_CONTEXT*)hContext;
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
pDC->FeWork.type = DISCARDINVALIDATETILES;
pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles;
pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask;
memset(&pDC->FeWork.desc.discardInvalidateTiles.rect, 0, sizeof(SWR_RECT));
pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_INVALID;
pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = false;
pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = false;
//enqueue
QueueDraw(pContext);
}
//////////////////////////////////////////////////////////////////////////
/// @brief SwrDiscardRect
/// @param hContext - Handle passed back from SwrCreateContext
/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard.
/// @param rect - if rect is all zeros, the entire attachment surface will be discarded
void SwrDiscardRect(
HANDLE hContext,
uint32_t attachmentMask,
SWR_RECT rect)
{
SWR_CONTEXT *pContext = (SWR_CONTEXT*)hContext;
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
// Queue a load to the hottile
pDC->FeWork.type = INVALIDATETILES;
pDC->FeWork.pfnWork = ProcessInvalidateTiles;
pDC->FeWork.desc.invalidateTiles.attachmentMask = attachmentMask;
pDC->FeWork.type = DISCARDINVALIDATETILES;
pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles;
pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask;
pDC->FeWork.desc.discardInvalidateTiles.rect = rect;
pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_RESOLVED;
pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = true;
pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = true;
//enqueue
QueueDraw(pContext);

View file

@ -408,6 +408,16 @@ void SWR_API SwrInvalidateTiles(
HANDLE hContext,
uint32_t attachmentMask);
//////////////////////////////////////////////////////////////////////////
/// @brief SwrDiscardRect
/// @param hContext - Handle passed back from SwrCreateContext
/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard.
/// @param rect - if rect is all zeros, the entire attachment surface will be discarded
void SWR_API SwrDiscardRect(
HANDLE hContext,
uint32_t attachmentMask,
SWR_RECT rect);
//////////////////////////////////////////////////////////////////////////
/// @brief SwrDispatch
/// @param hContext - Handle passed back from SwrCreateContext

View file

@ -399,20 +399,22 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile
}
void ProcessInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData)
void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData)
{
INVALIDATE_TILES_DESC *pDesc = (INVALIDATE_TILES_DESC*)pData;
DISCARD_INVALIDATE_TILES_DESC *pDesc = (DISCARD_INVALIDATE_TILES_DESC *)pData;
SWR_CONTEXT *pContext = pDC->pContext;
const int numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; ++i)
{
if (pDesc->attachmentMask & (1 << i))
{
HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTileNoLoad(pContext, pDC, macroTile, (SWR_RENDERTARGET_ATTACHMENT)i);
HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTileNoLoad(
pContext, pDC, macroTile, (SWR_RENDERTARGET_ATTACHMENT)i, pDesc->createNewTiles, numSamples);
if (pHotTile)
{
SWR_ASSERT(pHotTile->state == HOTTILE_INVALID || pHotTile->state == HOTTILE_RESOLVED);
pHotTile->state = HOTTILE_INVALID;
pHotTile->state = (HOTTILE_STATE)pDesc->newTileState;
}
}
}

View file

@ -37,7 +37,7 @@ void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
void ProcessQueryStatsBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
void ProcessInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers);
void InitClearTilesTable();
simdmask ComputeUserClipMask(uint8_t clipMask, float* pUserClipBuffer, simdscalar vI, simdscalar vJ);

View file

@ -114,9 +114,13 @@ struct CLEAR_DESC
uint8_t clearStencil;
};
struct INVALIDATE_TILES_DESC
struct DISCARD_INVALIDATE_TILES_DESC
{
uint32_t attachmentMask;
SWR_RECT rect;
SWR_TILE_STATE newTileState;
bool createNewTiles;
bool fullTilesOnly;
};
struct SYNC_DESC
@ -152,7 +156,7 @@ enum WORK_TYPE
SYNC,
DRAW,
CLEAR,
INVALIDATETILES,
DISCARDINVALIDATETILES,
STORETILES,
QUERYSTATS,
};
@ -166,7 +170,7 @@ struct BE_WORK
SYNC_DESC sync;
TRIANGLE_WORK_DESC tri;
CLEAR_DESC clear;
INVALIDATE_TILES_DESC invalidateTiles;
DISCARD_INVALIDATE_TILES_DESC discardInvalidateTiles;
STORE_TILES_DESC storeTiles;
QUERY_DESC queryStats;
} desc;
@ -203,7 +207,7 @@ struct FE_WORK
SYNC_DESC sync;
DRAW_WORK draw;
CLEAR_DESC clear;
INVALIDATE_TILES_DESC invalidateTiles;
DISCARD_INVALIDATE_TILES_DESC discardInvalidateTiles;
STORE_TILES_DESC storeTiles;
QUERY_DESC queryStats;
} desc;

View file

@ -193,35 +193,65 @@ void ProcessStoreTiles(
/// @param workerId - thread's worker id. Even thread has a unique id.
/// @param pUserData - Pointer to user data passed back to callback.
/// @todo This should go away when we switch this to use compute threading.
void ProcessInvalidateTiles(
void ProcessDiscardInvalidateTiles(
SWR_CONTEXT *pContext,
DRAW_CONTEXT *pDC,
uint32_t workerId,
void *pUserData)
{
RDTSC_START(FEProcessInvalidateTiles);
INVALIDATE_TILES_DESC *pInv = (INVALIDATE_TILES_DESC*)pUserData;
DISCARD_INVALIDATE_TILES_DESC *pInv = (DISCARD_INVALIDATE_TILES_DESC*)pUserData;
MacroTileMgr *pTileMgr = pDC->pTileMgr;
const API_STATE& state = GetApiState(pDC);
SWR_RECT rect;
if (pInv->rect.top | pInv->rect.bottom | pInv->rect.right | pInv->rect.left)
{
// Valid rect
rect = pInv->rect;
}
else
{
// Use viewport dimensions
const API_STATE& state = GetApiState(pDC);
rect.left = (uint32_t)state.vp[0].x;
rect.right = (uint32_t)(state.vp[0].x + state.vp[0].width);
rect.top = (uint32_t)state.vp[0].y;
rect.bottom = (uint32_t)(state.vp[0].y + state.vp[0].height);
}
// queue a store to each macro tile
// compute macro tile bounds for the current render target
uint32_t macroWidth = KNOB_MACROTILE_X_DIM;
uint32_t macroHeight = KNOB_MACROTILE_Y_DIM;
uint32_t numMacroTilesX = ((uint32_t)state.vp[0].width + (uint32_t)state.vp[0].x + (macroWidth - 1)) / macroWidth;
uint32_t numMacroTilesY = ((uint32_t)state.vp[0].height + (uint32_t)state.vp[0].y + (macroHeight - 1)) / macroHeight;
// Setup region assuming full tiles
uint32_t macroTileStartX = (rect.left + (macroWidth - 1)) / macroWidth;
uint32_t macroTileStartY = (rect.top + (macroHeight - 1)) / macroHeight;
uint32_t macroTileEndX = rect.right / macroWidth;
uint32_t macroTileEndY = rect.bottom / macroHeight;
if (pInv->fullTilesOnly == false)
{
// include partial tiles
macroTileStartX = rect.left / macroWidth;
macroTileStartY = rect.top / macroHeight;
macroTileEndX = (rect.right + macroWidth - 1) / macroWidth;
macroTileEndY = (rect.bottom + macroHeight - 1) / macroHeight;
}
// load tiles
BE_WORK work;
work.type = INVALIDATETILES;
work.pfnWork = ProcessInvalidateTilesBE;
work.desc.invalidateTiles = *pInv;
work.type = DISCARDINVALIDATETILES;
work.pfnWork = ProcessDiscardInvalidateTilesBE;
work.desc.discardInvalidateTiles = *pInv;
for (uint32_t x = 0; x < numMacroTilesX; ++x)
for (uint32_t x = macroTileStartX; x < macroTileEndX; ++x)
{
for (uint32_t y = 0; y < numMacroTilesY; ++y)
for (uint32_t y = macroTileStartY; y < macroTileEndY; ++y)
{
pTileMgr->enqueue(x, y, &work);
}

View file

@ -316,7 +316,7 @@ void ProcessDraw(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, vo
void ProcessClear(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
void ProcessStoreTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
void ProcessInvalidateTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
void ProcessDiscardInvalidateTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
void ProcessSync(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
void ProcessQueryStats(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);

View file

@ -186,7 +186,9 @@ HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32
return &tile.Attachment[attachment];
}
HOTTILE* HotTileMgr::GetHotTileNoLoad(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment)
HOTTILE* HotTileMgr::GetHotTileNoLoad(
SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID,
SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples)
{
uint32_t x, y;
MacroTileMgr::getTileIndices(macroID, x, y);
@ -198,7 +200,18 @@ HOTTILE* HotTileMgr::GetHotTileNoLoad(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC,
HOTTILE& hotTile = tile.Attachment[attachment];
if (hotTile.pBuffer == NULL)
{
return NULL;
if (create)
{
uint32_t size = numSamples * mHotTileSize[attachment];
hotTile.pBuffer = (uint8_t*)_aligned_malloc(size, KNOB_SIMD_WIDTH * 4);
hotTile.state = HOTTILE_INVALID;
hotTile.numSamples = numSamples;
hotTile.renderTargetArrayIndex = 0;
}
else
{
return NULL;
}
}
return &hotTile;

View file

@ -297,7 +297,7 @@ public:
HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1,
uint32_t renderTargetArrayIndex = 0);
HOTTILE *GetHotTileNoLoad(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment);
HOTTILE *GetHotTileNoLoad(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1);
static void ClearColorHotTile(const HOTTILE* pHotTile);
static void ClearDepthHotTile(const HOTTILE* pHotTile);