rasterizer/swr: move BucketMgr to SwrContext

This move gets us back to parity  with global manager
in that we can dump render context buckets now.

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
Jan Zielinski 2019-07-24 12:03:49 +02:00
parent cda4c62893
commit c5c05979f7
21 changed files with 290 additions and 261 deletions

View file

@ -48,7 +48,17 @@ extern THREAD UINT tlsThreadId;
class BucketManager
{
public:
BucketManager() {}
uint32_t mCurrentFrame;
std::vector<uint32_t> mBucketMap;
bool mBucketsInitialized;
std::string mBucketMgrName;
BucketManager(std::string name) : mCurrentFrame(0), mBucketsInitialized(false), mBucketMgrName(name)
{
mBucketMap.clear();
}
~BucketManager();
// removes all registered thread data

View file

@ -65,9 +65,6 @@ void WakeAllThreads(SWR_CONTEXT* pContext)
/// @param pCreateInfo - pointer to creation info.
HANDLE SwrCreateContext(SWR_CREATECONTEXT_INFO* pCreateInfo)
{
RDTSC_RESET();
RDTSC_INIT(0);
void* pContextMem = AlignedMalloc(sizeof(SWR_CONTEXT), KNOB_SIMD_WIDTH * 4);
memset(pContextMem, 0, sizeof(SWR_CONTEXT));
SWR_CONTEXT* pContext = new (pContextMem) SWR_CONTEXT();
@ -157,6 +154,12 @@ HANDLE SwrCreateContext(SWR_CREATECONTEXT_INFO* pCreateInfo)
ArchRast::CreateThreadContext(ArchRast::AR_THREAD::API);
#endif
#if defined(KNOB_ENABLE_RDTSC)
pContext->pBucketMgr = new BucketManager(pCreateInfo->contextName);
RDTSC_RESET(pContext->pBucketMgr);
RDTSC_INIT(pContext->pBucketMgr, 0);
#endif
// Allocate scratch space for workers.
///@note We could lazily allocate this but its rather small amount of memory.
for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
@ -205,7 +208,7 @@ HANDLE SwrCreateContext(SWR_CREATECONTEXT_INFO* pCreateInfo)
// pass pointer to bucket manager back to caller
#ifdef KNOB_ENABLE_RDTSC
pCreateInfo->pBucketMgr = &gBucketMgr;
pCreateInfo->pBucketMgr = pContext->pBucketMgr;
#endif
pCreateInfo->contextSaveSize = sizeof(API_STATE);
@ -277,9 +280,9 @@ void QueueWork(SWR_CONTEXT* pContext)
}
else
{
RDTSC_BEGIN(APIDrawWakeAllThreads, pDC->drawId);
RDTSC_BEGIN(pContext->pBucketMgr, APIDrawWakeAllThreads, pDC->drawId);
WakeAllThreads(pContext);
RDTSC_END(APIDrawWakeAllThreads, 1);
RDTSC_END(pContext->pBucketMgr, APIDrawWakeAllThreads, 1);
}
// Set current draw context to NULL so that next state call forces a new draw context to be
@ -300,7 +303,7 @@ INLINE void QueueDispatch(SWR_CONTEXT* pContext)
DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT* pContext, bool isSplitDraw = false)
{
RDTSC_BEGIN(APIGetDrawContext, 0);
RDTSC_BEGIN(pContext->pBucketMgr, APIGetDrawContext, 0);
// If current draw context is null then need to obtain a new draw context to use from ring.
if (pContext->pCurDrawContext == nullptr)
{
@ -389,7 +392,7 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT* pContext, bool isSplitDraw = false)
SWR_ASSERT(isSplitDraw == false, "Split draw should only be used when obtaining a new DC");
}
RDTSC_END(APIGetDrawContext, 0);
RDTSC_END(pContext->pBucketMgr, APIGetDrawContext, 0);
return pContext->pCurDrawContext;
}
@ -441,6 +444,10 @@ void SwrDestroyContext(HANDLE hContext)
#endif
}
#if defined(KNOB_ENABLE_RDTSC)
delete pContext->pBucketMgr;
#endif
delete[] pContext->ppScratch;
AlignedFree(pContext->pStats);
@ -498,7 +505,7 @@ void SWR_API SwrSync(HANDLE hContext,
SWR_CONTEXT* pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
RDTSC_BEGIN(APISync, 0);
RDTSC_BEGIN(pContext->pBucketMgr, APISync, 0);
pDC->FeWork.type = SYNC;
pDC->FeWork.pfnWork = ProcessSync;
@ -514,7 +521,7 @@ void SWR_API SwrSync(HANDLE hContext,
// enqueue
QueueDraw(pContext);
RDTSC_END(APISync, 1);
RDTSC_END(pContext->pBucketMgr, APISync, 1);
}
void SwrStallBE(HANDLE hContext)
@ -529,28 +536,28 @@ void SwrWaitForIdle(HANDLE hContext)
{
SWR_CONTEXT* pContext = GetContext(hContext);
RDTSC_BEGIN(APIWaitForIdle, 0);
RDTSC_BEGIN(pContext->pBucketMgr, APIWaitForIdle, 0);
while (!pContext->dcRing.IsEmpty())
{
_mm_pause();
}
RDTSC_END(APIWaitForIdle, 1);
RDTSC_END(pContext->pBucketMgr, APIWaitForIdle, 1);
}
void SwrWaitForIdleFE(HANDLE hContext)
{
SWR_CONTEXT* pContext = GetContext(hContext);
RDTSC_BEGIN(APIWaitForIdle, 0);
RDTSC_BEGIN(pContext->pBucketMgr, APIWaitForIdle, 0);
while (pContext->drawsOutstandingFE > 0)
{
_mm_pause();
}
RDTSC_END(APIWaitForIdle, 1);
RDTSC_END(pContext->pBucketMgr, APIWaitForIdle, 1);
}
void SwrSetVertexBuffers(HANDLE hContext,
@ -1172,7 +1179,7 @@ void DrawInstanced(HANDLE hContext,
SWR_CONTEXT* pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
RDTSC_BEGIN(APIDraw, pDC->drawId);
RDTSC_BEGIN(pContext->pBucketMgr, APIDraw, pDC->drawId);
uint32_t maxVertsPerDraw = MaxVertsPerDraw(pDC, numVertices, topology);
uint32_t primsPerDraw = GetNumPrims(topology, maxVertsPerDraw);
@ -1243,7 +1250,7 @@ void DrawInstanced(HANDLE hContext,
pDC = GetDrawContext(pContext);
pDC->pState->state.rastState.cullMode = oldCullMode;
RDTSC_END(APIDraw, numVertices * numInstances);
RDTSC_END(pContext->pBucketMgr, APIDraw, numVertices * numInstances);
}
//////////////////////////////////////////////////////////////////////////
@ -1307,7 +1314,7 @@ void DrawIndexedInstance(HANDLE hContext,
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
API_STATE* pState = &pDC->pState->state;
RDTSC_BEGIN(APIDrawIndexed, pDC->drawId);
RDTSC_BEGIN(pContext->pBucketMgr, APIDrawIndexed, pDC->drawId);
uint32_t maxIndicesPerDraw = MaxVertsPerDraw(pDC, numIndices, topology);
uint32_t primsPerDraw = GetNumPrims(topology, maxIndicesPerDraw);
@ -1403,7 +1410,7 @@ void DrawIndexedInstance(HANDLE hContext,
pDC = GetDrawContext(pContext);
pDC->pState->state.rastState.cullMode = oldCullMode;
RDTSC_END(APIDrawIndexed, numIndices * numInstances);
RDTSC_END(pContext->pBucketMgr, APIDrawIndexed, numIndices * numInstances);
}
//////////////////////////////////////////////////////////////////////////
@ -1529,7 +1536,7 @@ void SwrDispatch(HANDLE hContext,
SWR_CONTEXT* pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
RDTSC_BEGIN(APIDispatch, pDC->drawId);
RDTSC_BEGIN(pContext->pBucketMgr, APIDispatch, pDC->drawId);
AR_API_EVENT(
DispatchEvent(pDC->drawId, threadGroupCountX, threadGroupCountY, threadGroupCountZ));
pDC->isCompute = true; // This is a compute context.
@ -1546,7 +1553,9 @@ void SwrDispatch(HANDLE hContext,
pDC->pDispatch->initialize(totalThreadGroups, pTaskData, &ProcessComputeBE);
QueueDispatch(pContext);
RDTSC_END(APIDispatch, threadGroupCountX * threadGroupCountY * threadGroupCountZ);
RDTSC_END(pContext->pBucketMgr,
APIDispatch,
threadGroupCountX * threadGroupCountY * threadGroupCountZ);
}
// Deswizzles, converts and stores current contents of the hot tiles to surface
@ -1564,7 +1573,7 @@ void SWR_API SwrStoreTiles(HANDLE hContext,
SWR_CONTEXT* pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
RDTSC_BEGIN(APIStoreTiles, pDC->drawId);
RDTSC_BEGIN(pContext->pBucketMgr, APIStoreTiles, pDC->drawId);
pDC->FeWork.type = STORETILES;
pDC->FeWork.pfnWork = ProcessStoreTiles;
@ -1578,7 +1587,7 @@ void SWR_API SwrStoreTiles(HANDLE hContext,
AR_API_EVENT(SwrStoreTilesEvent(pDC->drawId));
RDTSC_END(APIStoreTiles, 1);
RDTSC_END(pContext->pBucketMgr, APIStoreTiles, 1);
}
//////////////////////////////////////////////////////////////////////////
@ -1606,7 +1615,7 @@ void SWR_API SwrClearRenderTarget(HANDLE hContext,
SWR_CONTEXT* pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
RDTSC_BEGIN(APIClearRenderTarget, pDC->drawId);
RDTSC_BEGIN(pContext->pBucketMgr, APIClearRenderTarget, pDC->drawId);
pDC->FeWork.type = CLEAR;
pDC->FeWork.pfnWork = ProcessClear;
@ -1624,7 +1633,7 @@ void SWR_API SwrClearRenderTarget(HANDLE hContext,
// enqueue draw
QueueDraw(pContext);
RDTSC_END(APIClearRenderTarget, 1);
RDTSC_END(pContext->pBucketMgr, APIClearRenderTarget, 1);
}
//////////////////////////////////////////////////////////////////////////
@ -1697,7 +1706,7 @@ void SWR_API SwrEndFrame(HANDLE hContext)
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
(void)pDC; // var used
RDTSC_ENDFRAME();
RDTSC_ENDFRAME(pContext->pBucketMgr);
AR_API_EVENT(FrameEndEvent(pContext->frameCount, pDC->drawId));
pContext->frameCount++;

View file

@ -277,6 +277,8 @@ struct SWR_CREATECONTEXT_INFO
// Input: if set to non-zero value, overrides KNOB value for maximum
// number of draws in flight
uint32_t MAX_DRAWS_IN_FLIGHT;
std::string contextName;
};
//////////////////////////////////////////////////////////////////////////

View file

@ -52,7 +52,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC,
{
SWR_CONTEXT* pContext = pDC->pContext;
RDTSC_BEGIN(BEDispatch, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEDispatch, pDC->drawId);
const COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pDispatch->GetTasksData();
SWR_ASSERT(pTaskData != nullptr);
@ -90,7 +90,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC,
UPDATE_STAT_BE(CsInvocations, state.totalThreadsInGroup);
AR_EVENT(CSStats((HANDLE)&csContext.stats));
RDTSC_END(BEDispatch, 1);
RDTSC_END(pDC->pContext->pBucketMgr, BEDispatch, 1);
}
//////////////////////////////////////////////////////////////////////////
@ -119,7 +119,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT* pDC,
SWR_CONTEXT* pContext = pDC->pContext;
HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
RDTSC_BEGIN(BEStoreTiles, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEStoreTiles, pDC->drawId);
SWR_FORMAT srcFormat;
switch (attachment)
@ -194,7 +194,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT* pDC,
}
}
}
RDTSC_END(BEStoreTiles, 1);
RDTSC_END(pDC->pContext->pBucketMgr, BEStoreTiles, 1);
}
void ProcessStoreTilesBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData)
@ -247,9 +247,9 @@ void BackendNullPS(DRAW_CONTEXT* pDC,
SWR_TRIANGLE_DESC& work,
RenderOutputBuffers& renderBuffers)
{
RDTSC_BEGIN(BENullBackend, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BENullBackend, pDC->drawId);
///@todo: handle center multisample pattern
RDTSC_BEGIN(BESetup, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESetup, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
@ -262,7 +262,7 @@ void BackendNullPS(DRAW_CONTEXT* pDC,
SWR_PS_CONTEXT psContext;
// skip SetupPixelShaderContext(&psContext, ...); // not needed here
RDTSC_END(BESetup, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 0);
simdscalar vYSamplePosUL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
@ -305,7 +305,7 @@ void BackendNullPS(DRAW_CONTEXT* pDC,
coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz);
}
RDTSC_BEGIN(BEBarycentric, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId);
// calculate per sample positions
psContext.vX.sample = _simd_add_ps(vXSamplePosUL, samplePos.vX(sample));
@ -321,7 +321,7 @@ void BackendNullPS(DRAW_CONTEXT* pDC,
psContext.vJ.sample);
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
RDTSC_END(BEBarycentric, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 0);
// interpolate user clip distance if available
if (state.backendState.clipDistanceMask)
@ -335,7 +335,7 @@ void BackendNullPS(DRAW_CONTEXT* pDC,
simdscalar vCoverageMask = _simd_vmask_ps(coverageMask);
simdscalar stencilPassMask = vCoverageMask;
RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEarlyDepthTest, pDC->drawId);
simdscalar depthPassMask = DepthStencilTest(&state,
work.triFlags.frontFacing,
work.triFlags.viewportIndex,
@ -356,7 +356,7 @@ void BackendNullPS(DRAW_CONTEXT* pDC,
vCoverageMask,
pStencilSample,
stencilPassMask);
RDTSC_END(BEEarlyDepthTest, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BEEarlyDepthTest, 0);
uint32_t statMask = _simd_movemask_ps(depthPassMask);
uint32_t statCount = _mm_popcnt_u32(statMask);
@ -378,7 +378,7 @@ void BackendNullPS(DRAW_CONTEXT* pDC,
vYSamplePosUL = _simd_add_ps(vYSamplePosUL, dy);
}
RDTSC_END(BENullBackend, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BENullBackend, 0);
}
PFN_CLEAR_TILES gClearTilesTable[NUM_SWR_FORMATS] = {};

View file

@ -168,7 +168,7 @@ void ProcessClearBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, vo
SWR_ASSERT(pClear->attachmentMask != 0); // shouldn't be here without a reason.
RDTSC_BEGIN(BEClear, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEClear, pDC->drawId);
if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
{
@ -226,13 +226,13 @@ void ProcessClearBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, vo
pHotTile->state = HOTTILE_CLEAR;
}
RDTSC_END(BEClear, 1);
RDTSC_END(pDC->pContext->pBucketMgr, BEClear, 1);
}
else
{
// Legacy clear
CLEAR_DESC* pClear = (CLEAR_DESC*)pUserData;
RDTSC_BEGIN(BEClear, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEClear, pDC->drawId);
if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
{
@ -292,7 +292,7 @@ void ProcessClearBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, vo
pClear->rect);
}
RDTSC_END(BEClear, 1);
RDTSC_END(pDC->pContext->pBucketMgr, BEClear, 1);
}
}

View file

@ -755,7 +755,7 @@ struct PixelRateZTestLoop
_simd_vmask_ps(CalcDepthBoundsAcceptMask(z, minz, maxz)));
}
RDTSC_BEGIN(BEBarycentric, pDC->drawId);
RDTSC_BEGIN(psContext.pBucketManager, BEBarycentric, pDC->drawId);
// calculate per sample positions
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample));
@ -778,7 +778,7 @@ struct PixelRateZTestLoop
vZ[sample] = state.pfnQuantizeDepth(vZ[sample]);
}
RDTSC_END(BEBarycentric, 0);
RDTSC_END(psContext.pBucketManager, BEBarycentric, 0);
///@todo: perspective correct vs non-perspective correct clipping?
// if clip distances are enabled, we need to interpolate for each sample
@ -795,7 +795,7 @@ struct PixelRateZTestLoop
// ZTest for this sample
///@todo Need to uncomment out this bucket.
// RDTSC_BEGIN(BEDepthBucket, pDC->drawId);
// RDTSC_BEGIN(psContext.pBucketManager, BEDepthBucket, pDC->drawId);
depthPassMask[sample] = vCoverageMask[sample];
stencilPassMask[sample] = vCoverageMask[sample];
depthPassMask[sample] = DepthStencilTest(&state,
@ -806,7 +806,7 @@ struct PixelRateZTestLoop
vCoverageMask[sample],
pStencilSample,
&stencilPassMask[sample]);
// RDTSC_END(BEDepthBucket, 0);
// RDTSC_END(psContext.pBucketManager, BEDepthBucket, 0);
// early-exit if no pixels passed depth or earlyZ is forced on
if (psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask[sample]))
@ -1007,8 +1007,8 @@ void BackendPixelRate(DRAW_CONTEXT* pDC,
/// backend
RDTSC_BEGIN(BEPixelRateBackend, pDC->drawId);
RDTSC_BEGIN(BESetup, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelRateBackend, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESetup, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
@ -1029,7 +1029,7 @@ void BackendPixelRate(DRAW_CONTEXT* pDC,
state.colorHottileEnable,
renderBuffers);
RDTSC_END(BESetup, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 0);
PixelRateZTestLoop<T> PixelRateZTest(pDC,
workerId,
@ -1075,14 +1075,14 @@ void BackendPixelRate(DRAW_CONTEXT* pDC,
pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
}
RDTSC_BEGIN(BEBarycentric, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId);
CalcPixelBarycentrics(coeffs, psContext);
CalcCentroid<T, false>(
&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
RDTSC_END(BEBarycentric, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 0);
if (T::bForcedSampleCount)
{
@ -1109,12 +1109,12 @@ void BackendPixelRate(DRAW_CONTEXT* pDC,
if (state.psState.usesSourceDepth)
{
RDTSC_BEGIN(BEBarycentric, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId);
// interpolate and quantize z
psContext.vZ = vplaneps(
coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
RDTSC_END(BEBarycentric, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 0);
}
// pixels that are currently active
@ -1122,10 +1122,10 @@ void BackendPixelRate(DRAW_CONTEXT* pDC,
psContext.oMask = T::MultisampleT::FullSampleMask();
// execute pixel shader
RDTSC_BEGIN(BEPixelShader, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelShader, pDC->drawId);
state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext);
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
RDTSC_END(BEPixelShader, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BEPixelShader, 0);
// update stats
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
@ -1159,7 +1159,7 @@ void BackendPixelRate(DRAW_CONTEXT* pDC,
for (uint32_t sample = 0; sample < GetNumOMSamples<T>(state.blendState.sampleCount);
sample++)
{
RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEOutputMerger, pDC->drawId);
// center pattern does a single coverage/depth/stencil test, standard pattern tests
// all samples
uint32_t coverageSampleNum = (T::bIsCenterPattern) ? 0 : sample;
@ -1175,7 +1175,7 @@ void BackendPixelRate(DRAW_CONTEXT* pDC,
if (!_simd_movemask_ps(depthMask))
{
// stencil should already have been written in early/lateZ tests
RDTSC_END(BEOutputMerger, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BEOutputMerger, 0);
continue;
}
}
@ -1210,10 +1210,10 @@ void BackendPixelRate(DRAW_CONTEXT* pDC,
pStencilSample,
PixelRateZTest.stencilPassMask[coverageSampleNum]);
}
RDTSC_END(BEOutputMerger, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BEOutputMerger, 0);
}
Endtile:
RDTSC_BEGIN(BEEndTile, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEndTile, pDC->drawId);
for (uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
{
@ -1242,7 +1242,7 @@ void BackendPixelRate(DRAW_CONTEXT* pDC,
pStencilBuffer +=
(KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
RDTSC_END(BEEndTile, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BEEndTile, 0);
psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
@ -1252,7 +1252,7 @@ void BackendPixelRate(DRAW_CONTEXT* pDC,
psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
}
RDTSC_END(BEPixelRateBackend, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BEPixelRateBackend, 0);
}
template <uint32_t sampleCountT = SWR_MULTISAMPLE_1X,

View file

@ -45,8 +45,8 @@ void BackendSampleRate(DRAW_CONTEXT* pDC,
SWR_TRIANGLE_DESC& work,
RenderOutputBuffers& renderBuffers)
{
RDTSC_BEGIN(BESampleRateBackend, pDC->drawId);
RDTSC_BEGIN(BESetup, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESampleRateBackend, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESetup, pDC->drawId);
void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
const API_STATE& state = GetApiState(pDC);
@ -65,7 +65,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC,
state.colorHottileEnable,
renderBuffers);
RDTSC_END(BESetup, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 0);
psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
@ -95,14 +95,14 @@ void BackendSampleRate(DRAW_CONTEXT* pDC,
pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
}
RDTSC_BEGIN(BEBarycentric, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId);
CalcPixelBarycentrics(coeffs, psContext);
CalcCentroid<T, false>(
&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
RDTSC_END(BEBarycentric, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 0);
for (uint32_t sample = 0; sample < T::MultisampleT::numSamples; sample++)
{
@ -128,7 +128,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC,
coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz);
}
RDTSC_BEGIN(BEBarycentric, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId);
// calculate per sample positions
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample));
@ -144,7 +144,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC,
psContext.vJ.sample);
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
RDTSC_END(BEBarycentric, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 0);
// interpolate user clip distance if available
if (state.backendState.clipDistanceMask)
@ -162,7 +162,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC,
// Early-Z?
if (T::bCanEarlyZ)
{
RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEarlyDepthTest, pDC->drawId);
depthPassMask = DepthStencilTest(&state,
work.triFlags.frontFacing,
work.triFlags.viewportIndex,
@ -174,7 +174,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC,
AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask),
_simd_movemask_ps(stencilPassMask),
_simd_movemask_ps(vCoverageMask)));
RDTSC_END(BEEarlyDepthTest, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BEEarlyDepthTest, 0);
// early-exit if no samples passed depth or earlyZ is forced on.
if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask))
@ -201,9 +201,9 @@ void BackendSampleRate(DRAW_CONTEXT* pDC,
psContext.activeMask = _simd_castps_si(vCoverageMask);
// execute pixel shader
RDTSC_BEGIN(BEPixelShader, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelShader, pDC->drawId);
state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext);
RDTSC_END(BEPixelShader, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BEPixelShader, 0);
// update stats
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
@ -214,7 +214,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC,
// late-Z
if (!T::bCanEarlyZ)
{
RDTSC_BEGIN(BELateDepthTest, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BELateDepthTest, pDC->drawId);
depthPassMask = DepthStencilTest(&state,
work.triFlags.frontFacing,
work.triFlags.viewportIndex,
@ -226,7 +226,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC,
AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask),
_simd_movemask_ps(stencilPassMask),
_simd_movemask_ps(vCoverageMask)));
RDTSC_END(BELateDepthTest, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BELateDepthTest, 0);
if (!_simd_movemask_ps(depthPassMask))
{
@ -251,7 +251,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC,
UPDATE_STAT_BE(DepthPassCount, statCount);
// output merger
RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEOutputMerger, pDC->drawId);
OutputMerger8x2(pDC,
psContext,
@ -278,7 +278,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC,
pStencilSample,
stencilPassMask);
}
RDTSC_END(BEOutputMerger, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BEOutputMerger, 0);
}
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
}
@ -286,7 +286,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC,
Endtile:
ATTR_UNUSED;
RDTSC_BEGIN(BEEndTile, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEndTile, pDC->drawId);
if (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
{
@ -309,7 +309,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC,
pStencilBuffer +=
(KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
RDTSC_END(BEEndTile, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BEEndTile, 0);
psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
@ -319,7 +319,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC,
psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
}
RDTSC_END(BESampleRateBackend, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BESampleRateBackend, 0);
}
// Recursive template used to auto-nest conditionals. Converts dynamic enum function

View file

@ -45,8 +45,8 @@ void BackendSingleSample(DRAW_CONTEXT* pDC,
SWR_TRIANGLE_DESC& work,
RenderOutputBuffers& renderBuffers)
{
RDTSC_BEGIN(BESingleSampleBackend, pDC->drawId);
RDTSC_BEGIN(BESetup, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESingleSampleBackend, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESetup, pDC->drawId);
void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
@ -66,7 +66,7 @@ void BackendSingleSample(DRAW_CONTEXT* pDC,
state.colorHottileEnable,
renderBuffers);
RDTSC_END(BESetup, 1);
RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 1);
psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
@ -114,7 +114,7 @@ void BackendSingleSample(DRAW_CONTEXT* pDC,
pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
}
RDTSC_BEGIN(BEBarycentric, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId);
CalcPixelBarycentrics(coeffs, psContext);
@ -126,7 +126,7 @@ void BackendSingleSample(DRAW_CONTEXT* pDC,
coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
RDTSC_END(BEBarycentric, 1);
RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 1);
// interpolate user clip distance if available
if (state.backendState.clipDistanceMask)
@ -144,7 +144,7 @@ void BackendSingleSample(DRAW_CONTEXT* pDC,
// Early-Z?
if (T::bCanEarlyZ)
{
RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEarlyDepthTest, pDC->drawId);
depthPassMask = DepthStencilTest(&state,
work.triFlags.frontFacing,
work.triFlags.viewportIndex,
@ -156,7 +156,7 @@ void BackendSingleSample(DRAW_CONTEXT* pDC,
AR_EVENT(EarlyDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask),
_simd_movemask_ps(stencilPassMask),
_simd_movemask_ps(vCoverageMask)));
RDTSC_END(BEEarlyDepthTest, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BEEarlyDepthTest, 0);
// early-exit if no pixels passed depth or earlyZ is forced on
if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask))
@ -182,9 +182,9 @@ void BackendSingleSample(DRAW_CONTEXT* pDC,
psContext.activeMask = _simd_castps_si(vCoverageMask);
// execute pixel shader
RDTSC_BEGIN(BEPixelShader, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelShader, pDC->drawId);
state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext);
RDTSC_END(BEPixelShader, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BEPixelShader, 0);
// update stats
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
@ -195,7 +195,7 @@ void BackendSingleSample(DRAW_CONTEXT* pDC,
// late-Z
if (!T::bCanEarlyZ)
{
RDTSC_BEGIN(BELateDepthTest, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BELateDepthTest, pDC->drawId);
depthPassMask = DepthStencilTest(&state,
work.triFlags.frontFacing,
work.triFlags.viewportIndex,
@ -207,7 +207,7 @@ void BackendSingleSample(DRAW_CONTEXT* pDC,
AR_EVENT(LateDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask),
_simd_movemask_ps(stencilPassMask),
_simd_movemask_ps(vCoverageMask)));
RDTSC_END(BELateDepthTest, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BELateDepthTest, 0);
if (!_simd_movemask_ps(depthPassMask))
{
@ -236,7 +236,7 @@ void BackendSingleSample(DRAW_CONTEXT* pDC,
UPDATE_STAT_BE(DepthPassCount, statCount);
// output merger
RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEOutputMerger, pDC->drawId);
OutputMerger8x2(pDC,
psContext,
@ -263,11 +263,11 @@ void BackendSingleSample(DRAW_CONTEXT* pDC,
pStencilBuffer,
stencilPassMask);
}
RDTSC_END(BEOutputMerger, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BEOutputMerger, 0);
}
Endtile:
RDTSC_BEGIN(BEEndTile, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEndTile, pDC->drawId);
work.coverageMask[0] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
if (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
@ -291,7 +291,7 @@ void BackendSingleSample(DRAW_CONTEXT* pDC,
pStencilBuffer +=
(KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
RDTSC_END(BEEndTile, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BEEndTile, 0);
psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
@ -301,7 +301,7 @@ void BackendSingleSample(DRAW_CONTEXT* pDC,
psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
}
RDTSC_END(BESingleSampleBackend, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BESingleSampleBackend, 0);
}
// Recursive template used to auto-nest conditionals. Converts dynamic enum function

View file

@ -373,7 +373,10 @@ uint32_t SIMDCALL EarlyRasterizer(SIMDBBOX_T<SIMD_T>& er_bbox,
Integer<SIMD_T> vNegB1 = SIMD_T::mullo_epi32(vBi[1], SIMD_T::set1_epi32(-1));
Integer<SIMD_T> vNegB2 = SIMD_T::mullo_epi32(vBi[2], SIMD_T::set1_epi32(-1));
RDTSC_EVENT(FEEarlyRastEnter, _mm_popcnt_u32(oneTileMask & triMask), 0);
RDTSC_EVENT(pDC->pContext->pBucketMgr,
FEEarlyRastEnter,
_mm_popcnt_u32(oneTileMask & triMask),
0);
Integer<SIMD_T> vShiftCntrl = EarlyRastHelper<SIMD_T>::InitShiftCntrl();
Integer<SIMD_T> vCwTris = SIMD_T::set1_epi32(cwTrisMask);
@ -639,7 +642,10 @@ uint32_t SIMDCALL EarlyRasterizer(SIMDBBOX_T<SIMD_T>& er_bbox,
if (triMask ^ oldTriMask)
{
RDTSC_EVENT(FEEarlyRastExit, _mm_popcnt_u32(triMask & oneTileMask), 0);
RDTSC_EVENT(pDC->pContext->pBucketMgr,
FEEarlyRastExit,
_mm_popcnt_u32(triMask & oneTileMask),
0);
}
return triMask;
}
@ -668,7 +674,7 @@ void SIMDCALL BinTrianglesImpl(DRAW_CONTEXT* pDC,
{
const uint32_t* aRTAI = reinterpret_cast<const uint32_t*>(&rtIdx);
RDTSC_BEGIN(FEBinTriangles, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEBinTriangles, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = state.rastState;
@ -806,7 +812,10 @@ void SIMDCALL BinTrianglesImpl(DRAW_CONTEXT* pDC,
if (origTriMask ^ triMask)
{
RDTSC_EVENT(FECullZeroAreaAndBackface, _mm_popcnt_u32(origTriMask ^ triMask), 0);
RDTSC_EVENT(pDC->pContext->pBucketMgr,
FECullZeroAreaAndBackface,
_mm_popcnt_u32(origTriMask ^ triMask),
0);
}
AR_EVENT(CullInfoEvent(pDC->drawId, cullZeroAreaMask, cullTris, origTriMask));
@ -917,7 +926,10 @@ void SIMDCALL BinTrianglesImpl(DRAW_CONTEXT* pDC,
if (origTriMask ^ triMask)
{
RDTSC_EVENT(FECullBetweenCenters, _mm_popcnt_u32(origTriMask ^ triMask), 0);
RDTSC_EVENT(pDC->pContext->pBucketMgr,
FECullBetweenCenters,
_mm_popcnt_u32(origTriMask ^ triMask),
0);
}
}
@ -1017,7 +1029,7 @@ void SIMDCALL BinTrianglesImpl(DRAW_CONTEXT* pDC,
if (!triMask)
{
RDTSC_END(FEBinTriangles, 1);
RDTSC_END(pDC->pContext->pBucketMgr, FEBinTriangles, 1);
return;
}
}
@ -1029,7 +1041,7 @@ endBinTriangles:
if (!triMask)
{
RDTSC_END(FEBinTriangles, 1);
RDTSC_END(pDC->pContext->pBucketMgr, FEBinTriangles, 1);
return;
}
@ -1065,7 +1077,7 @@ endBinTriangles:
BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(
pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
RDTSC_END(FEBinTriangles, 1);
RDTSC_END(pDC->pContext->pBucketMgr, FEBinTriangles, 1);
return;
}
else if (rastState.fillMode == SWR_FILLMODE_POINT)
@ -1078,7 +1090,7 @@ endBinTriangles:
BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(
pDC, pa, workerId, &tri[2], triMask, primID, viewportIdx, rtIdx);
RDTSC_END(FEBinTriangles, 1);
RDTSC_END(pDC->pContext->pBucketMgr, FEBinTriangles, 1);
return;
}
@ -1194,7 +1206,7 @@ endBinTriangles:
triMask &= ~(1 << triIndex);
}
RDTSC_END(FEBinTriangles, 1);
RDTSC_END(pDC->pContext->pBucketMgr, FEBinTriangles, 1);
}
template <typename CT>
@ -1274,7 +1286,7 @@ void BinPostSetupPointsImpl(DRAW_CONTEXT* pDC,
Integer<SIMD_T> const& viewportIdx,
Integer<SIMD_T> const& rtIdx)
{
RDTSC_BEGIN(FEBinPoints, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEBinPoints, pDC->drawId);
Vec4<SIMD_T>& primVerts = prim[0];
@ -1572,7 +1584,7 @@ void BinPostSetupPointsImpl(DRAW_CONTEXT* pDC,
}
}
RDTSC_END(FEBinPoints, 1);
RDTSC_END(pDC->pContext->pBucketMgr, FEBinPoints, 1);
}
//////////////////////////////////////////////////////////////////////////
@ -1674,7 +1686,7 @@ void BinPostSetupLinesImpl(DRAW_CONTEXT* pDC,
{
const uint32_t* aRTAI = reinterpret_cast<const uint32_t*>(&rtIdx);
RDTSC_BEGIN(FEBinLines, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEBinLines, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = state.rastState;
@ -1867,7 +1879,7 @@ void BinPostSetupLinesImpl(DRAW_CONTEXT* pDC,
endBinLines:
RDTSC_END(FEBinLines, 1);
RDTSC_END(pDC->pContext->pBucketMgr, FEBinLines, 1);
}
//////////////////////////////////////////////////////////////////////////

View file

@ -185,10 +185,10 @@ void ClipRectangles(DRAW_CONTEXT* pDC,
simdscalari const& viewportIdx,
simdscalari const& rtIdx)
{
RDTSC_BEGIN(FEClipRectangles, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipRectangles, pDC->drawId);
Clipper<SIMD256, 3> clipper(workerId, pDC);
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
RDTSC_END(FEClipRectangles, 1);
RDTSC_END(pDC->pContext->pBucketMgr, FEClipRectangles, 1);
}
void ClipTriangles(DRAW_CONTEXT* pDC,
@ -200,10 +200,10 @@ void ClipTriangles(DRAW_CONTEXT* pDC,
simdscalari const& viewportIdx,
simdscalari const& rtIdx)
{
RDTSC_BEGIN(FEClipTriangles, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipTriangles, pDC->drawId);
Clipper<SIMD256, 3> clipper(workerId, pDC);
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
RDTSC_END(FEClipTriangles, 1);
RDTSC_END(pDC->pContext->pBucketMgr, FEClipTriangles, 1);
}
void ClipLines(DRAW_CONTEXT* pDC,
@ -215,10 +215,10 @@ void ClipLines(DRAW_CONTEXT* pDC,
simdscalari const& viewportIdx,
simdscalari const& rtIdx)
{
RDTSC_BEGIN(FEClipLines, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipLines, pDC->drawId);
Clipper<SIMD256, 2> clipper(workerId, pDC);
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
RDTSC_END(FEClipLines, 1);
RDTSC_END(pDC->pContext->pBucketMgr, FEClipLines, 1);
}
void ClipPoints(DRAW_CONTEXT* pDC,
@ -230,10 +230,10 @@ void ClipPoints(DRAW_CONTEXT* pDC,
simdscalari const& viewportIdx,
simdscalari const& rtIdx)
{
RDTSC_BEGIN(FEClipPoints, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipPoints, pDC->drawId);
Clipper<SIMD256, 1> clipper(workerId, pDC);
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
RDTSC_END(FEClipPoints, 1);
RDTSC_END(pDC->pContext->pBucketMgr, FEClipPoints, 1);
}
#if USE_SIMD16_FRONTEND
@ -246,7 +246,7 @@ void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT* pDC,
simd16scalari const& viewportIdx,
simd16scalari const& rtIdx)
{
RDTSC_BEGIN(FEClipRectangles, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipRectangles, pDC->drawId);
enum
{
@ -258,7 +258,7 @@ void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT* pDC,
pa.useAlternateOffset = false;
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
RDTSC_END(FEClipRectangles, 1);
RDTSC_END(pDC->pContext->pBucketMgr, FEClipRectangles, 1);
}
void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT* pDC,
@ -270,7 +270,7 @@ void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT* pDC,
simd16scalari const& viewportIdx,
simd16scalari const& rtIdx)
{
RDTSC_BEGIN(FEClipTriangles, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipTriangles, pDC->drawId);
enum
{
@ -282,7 +282,7 @@ void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT* pDC,
pa.useAlternateOffset = false;
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
RDTSC_END(FEClipTriangles, 1);
RDTSC_END(pDC->pContext->pBucketMgr, FEClipTriangles, 1);
}
void SIMDCALL ClipLines_simd16(DRAW_CONTEXT* pDC,
@ -294,7 +294,7 @@ void SIMDCALL ClipLines_simd16(DRAW_CONTEXT* pDC,
simd16scalari const& viewportIdx,
simd16scalari const& rtIdx)
{
RDTSC_BEGIN(FEClipLines, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipLines, pDC->drawId);
enum
{
@ -306,7 +306,7 @@ void SIMDCALL ClipLines_simd16(DRAW_CONTEXT* pDC,
pa.useAlternateOffset = false;
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
RDTSC_END(FEClipLines, 1);
RDTSC_END(pDC->pContext->pBucketMgr, FEClipLines, 1);
}
void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT* pDC,
@ -318,7 +318,7 @@ void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT* pDC,
simd16scalari const& viewportIdx,
simd16scalari const& rtIdx)
{
RDTSC_BEGIN(FEClipPoints, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipPoints, pDC->drawId);
enum
{
@ -330,7 +330,7 @@ void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT* pDC,
pa.useAlternateOffset = false;
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
RDTSC_END(FEClipPoints, 1);
RDTSC_END(pDC->pContext->pBucketMgr, FEClipPoints, 1);
}
#endif

View file

@ -781,7 +781,7 @@ public:
if (clipMask)
{
RDTSC_BEGIN(FEGuardbandClip, pa.pDC->drawId);
RDTSC_BEGIN(pa.pDC->pContext->pBucketMgr, FEGuardbandClip, pa.pDC->drawId);
// we have to clip tris, execute the clipper, which will also
// call the binner
ClipSimd(prim,
@ -791,7 +791,7 @@ public:
primId,
viewportIdx,
rtIdx);
RDTSC_END(FEGuardbandClip, 1);
RDTSC_END(pa.pDC->pContext->pBucketMgr, FEGuardbandClip, 1);
}
else if (validMask)
{

View file

@ -40,6 +40,7 @@
#include "core/fifo.hpp"
#include "core/knobs.h"
#include "common/intrin.h"
#include "common/rdtsc_buckets.h"
#include "core/threads.h"
#include "ringbuffer.h"
#include "archrast/archrast.h"
@ -523,14 +524,14 @@ struct SWR_CONTEXT
HotTileMgr* pHotTileMgr;
// Callback functions, passed in at create context time
PFN_LOAD_TILE pfnLoadTile;
PFN_STORE_TILE pfnStoreTile;
PFN_TRANSLATE_GFXPTR_FOR_READ pfnTranslateGfxptrForRead;
PFN_TRANSLATE_GFXPTR_FOR_WRITE pfnTranslateGfxptrForWrite;
PFN_MAKE_GFXPTR pfnMakeGfxPtr;
PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
PFN_UPDATE_STATS pfnUpdateStats;
PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
PFN_LOAD_TILE pfnLoadTile;
PFN_STORE_TILE pfnStoreTile;
PFN_TRANSLATE_GFXPTR_FOR_READ pfnTranslateGfxptrForRead;
PFN_TRANSLATE_GFXPTR_FOR_WRITE pfnTranslateGfxptrForWrite;
PFN_MAKE_GFXPTR pfnMakeGfxPtr;
PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
PFN_UPDATE_STATS pfnUpdateStats;
PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
// Global Stats
@ -550,6 +551,8 @@ struct SWR_CONTEXT
// ArchRast thread contexts.
HANDLE* pArContext;
BucketManager *pBucketMgr;
};
#define UPDATE_STAT_BE(name, count) \
@ -568,11 +571,11 @@ struct SWR_CONTEXT
#define AR_API_CTX pDC->pContext->pArContext[pContext->NumWorkerThreads]
#ifdef KNOB_ENABLE_RDTSC
#define RDTSC_BEGIN(type, drawid) RDTSC_START(type)
#define RDTSC_END(type, count) RDTSC_STOP(type, count, 0)
#define RDTSC_BEGIN(pBucketMgr, type, drawid) RDTSC_START(pBucketMgr, type)
#define RDTSC_END(pBucketMgr, type, count) RDTSC_STOP(pBucketMgr, type, count, 0)
#else
#define RDTSC_BEGIN(type, count)
#define RDTSC_END(type, count)
#define RDTSC_BEGIN(pBucketMgr, type, drawid)
#define RDTSC_END(pBucketMgr, type, count)
#endif
#ifdef KNOB_ENABLE_AR

View file

@ -135,7 +135,7 @@ void ProcessClear(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, v
/// @todo This should go away when we switch this to use compute threading.
void ProcessStoreTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData)
{
RDTSC_BEGIN(FEProcessStoreTiles, pDC->drawId);
RDTSC_BEGIN(pContext->pBucketMgr, FEProcessStoreTiles, pDC->drawId);
MacroTileMgr* pTileMgr = pDC->pTileMgr;
STORE_TILES_DESC* pDesc = (STORE_TILES_DESC*)pUserData;
@ -160,7 +160,7 @@ void ProcessStoreTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t worker
}
}
RDTSC_END(FEProcessStoreTiles, 0);
RDTSC_END(pContext->pBucketMgr, FEProcessStoreTiles, 0);
}
//////////////////////////////////////////////////////////////////////////
@ -175,7 +175,7 @@ void ProcessDiscardInvalidateTiles(SWR_CONTEXT* pContext,
uint32_t workerId,
void* pUserData)
{
RDTSC_BEGIN(FEProcessInvalidateTiles, pDC->drawId);
RDTSC_BEGIN(pContext->pBucketMgr, FEProcessInvalidateTiles, pDC->drawId);
DISCARD_INVALIDATE_TILES_DESC* pDesc = (DISCARD_INVALIDATE_TILES_DESC*)pUserData;
MacroTileMgr* pTileMgr = pDC->pTileMgr;
@ -214,7 +214,7 @@ void ProcessDiscardInvalidateTiles(SWR_CONTEXT* pContext,
}
}
RDTSC_END(FEProcessInvalidateTiles, 0);
RDTSC_END(pContext->pBucketMgr, FEProcessInvalidateTiles, 0);
}
//////////////////////////////////////////////////////////////////////////
@ -518,7 +518,7 @@ static INLINE simd16scalari GenerateMask16(uint32_t numItemsRemaining)
static void StreamOut(
DRAW_CONTEXT* pDC, PA_STATE& pa, uint32_t workerId, uint32_t* pPrimData, uint32_t streamIndex)
{
RDTSC_BEGIN(FEStreamout, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEStreamout, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_STREAMOUT_STATE& soState = state.soState;
@ -598,7 +598,7 @@ static void StreamOut(
UPDATE_STAT_FE(SoPrimStorageNeeded[streamIndex], soContext.numPrimStorageNeeded);
UPDATE_STAT_FE(SoNumPrimsWritten[streamIndex], soContext.numPrimsWritten);
RDTSC_END(FEStreamout, 1);
RDTSC_END(pDC->pContext->pBucketMgr, FEStreamout, 1);
}
#if USE_SIMD16_FRONTEND
@ -834,7 +834,7 @@ static void GeometryShaderStage(DRAW_CONTEXT* pDC,
#endif
simdscalari const& primID)
{
RDTSC_BEGIN(FEGeometryShader, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEGeometryShader, pDC->drawId);
void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
@ -1178,7 +1178,7 @@ static void GeometryShaderStage(DRAW_CONTEXT* pDC,
UPDATE_STAT_FE(GsInvocations, numInputPrims * pState->instanceCount);
UPDATE_STAT_FE(GsPrimitives, totalPrimsGenerated);
AR_EVENT(GSPrimInfo(numInputPrims, totalPrimsGenerated, numVertsPerPrim * numInputPrims));
RDTSC_END(FEGeometryShader, 1);
RDTSC_END(pDC->pContext->pBucketMgr, FEGeometryShader, 1);
}
//////////////////////////////////////////////////////////////////////////
@ -1372,9 +1372,9 @@ static void TessellationStages(DRAW_CONTEXT* pDC,
hsContext.mask = GenerateMask(numPrims);
// Run the HS
RDTSC_BEGIN(FEHullShader, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEHullShader, pDC->drawId);
state.pfnHsFunc(GetPrivateState(pDC), pWorkerData, &hsContext);
RDTSC_END(FEHullShader, 0);
RDTSC_END(pDC->pContext->pBucketMgr, FEHullShader, 0);
UPDATE_STAT_FE(HsInvocations, numPrims);
AR_EVENT(HSStats((HANDLE)&hsContext.stats));
@ -1385,10 +1385,10 @@ static void TessellationStages(DRAW_CONTEXT* pDC,
{
// Run Tessellator
SWR_TS_TESSELLATED_DATA tsData = {0};
RDTSC_BEGIN(FETessellation, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, FETessellation, pDC->drawId);
TSTessellate(tsCtx, hsContext.pCPout[p].tessFactors, tsData);
AR_EVENT(TessPrimCount(1));
RDTSC_END(FETessellation, 0);
RDTSC_END(pDC->pContext->pBucketMgr, FETessellation, 0);
if (tsData.NumPrimitives == 0)
{
@ -1441,9 +1441,9 @@ static void TessellationStages(DRAW_CONTEXT* pDC,
{
dsContext.mask = GenerateMask(tsData.NumDomainPoints - dsInvocations);
RDTSC_BEGIN(FEDomainShader, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEDomainShader, pDC->drawId);
state.pfnDsFunc(GetPrivateState(pDC), pWorkerData, &dsContext);
RDTSC_END(FEDomainShader, 0);
RDTSC_END(pDC->pContext->pBucketMgr, FEDomainShader, 0);
AR_EVENT(DSStats((HANDLE)&dsContext.stats));
@ -1524,14 +1524,14 @@ static void TessellationStages(DRAW_CONTEXT* pDC,
#else
simdvector prim[3]; // Only deal with triangles, lines, or points
#endif
RDTSC_BEGIN(FEPAAssemble, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEPAAssemble, pDC->drawId);
bool assemble =
#if USE_SIMD16_FRONTEND
tessPa.Assemble(VERTEX_POSITION_SLOT, prim_simd16);
#else
tessPa.Assemble(VERTEX_POSITION_SLOT, prim);
#endif
RDTSC_END(FEPAAssemble, 1);
RDTSC_END(pDC->pContext->pBucketMgr, FEPAAssemble, 1);
SWR_ASSERT(assemble);
SWR_ASSERT(pfnClipFunc);
@ -1663,7 +1663,7 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
}
#endif
RDTSC_BEGIN(FEProcessDraw, pDC->drawId);
RDTSC_BEGIN(pContext->pBucketMgr, FEProcessDraw, pDC->drawId);
void* pWorkerData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
@ -1895,7 +1895,7 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
#endif
}
// 1. Execute FS/VS for a single SIMD.
RDTSC_BEGIN(FEFetchShader, pDC->drawId);
RDTSC_BEGIN(pContext->pBucketMgr, FEFetchShader, pDC->drawId);
#if USE_SIMD16_SHADERS
state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_lo, vin);
#else
@ -1906,7 +1906,7 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_hi, vin_hi);
}
#endif
RDTSC_END(FEFetchShader, 0);
RDTSC_END(pContext->pBucketMgr, FEFetchShader, 0);
// forward fetch generated vertex IDs to the vertex shader
#if USE_SIMD16_SHADERS
@ -1950,7 +1950,7 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
if (!KNOB_TOSS_FETCH)
#endif
{
RDTSC_BEGIN(FEVertexShader, pDC->drawId);
RDTSC_BEGIN(pContext->pBucketMgr, FEVertexShader, pDC->drawId);
#if USE_SIMD16_VS
state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_lo);
AR_EVENT(VSStats((HANDLE)&vsContext_lo.stats));
@ -1964,7 +1964,7 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
AR_EVENT(VSStats((HANDLE)&vsContext_hi.stats));
}
#endif
RDTSC_END(FEVertexShader, 0);
RDTSC_END(pContext->pBucketMgr, FEVertexShader, 0);
UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex));
}
@ -1975,9 +1975,9 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
{
simd16vector prim_simd16[MAX_NUM_VERTS_PER_PRIM];
RDTSC_START(FEPAAssemble);
RDTSC_START(pContext->pBucketMgr, FEPAAssemble);
bool assemble = pa.Assemble(VERTEX_POSITION_SLOT, prim_simd16);
RDTSC_STOP(FEPAAssemble, 1, 0);
RDTSC_STOP(pContext->pBucketMgr, FEPAAssemble, 1, 0);
#if KNOB_ENABLE_TOSS_POINTS
if (!KNOB_TOSS_FETCH)
@ -2190,9 +2190,9 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
if (i < endVertex)
{
// 1. Execute FS/VS for a single SIMD.
RDTSC_BEGIN(FEFetchShader, pDC->drawId);
RDTSC_BEGIN(pContext->pBucketMgr, FEFetchShader, pDC->drawId);
state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo, vout);
RDTSC_END(FEFetchShader, 0);
RDTSC_END(pContext->pBucketMgr, FEFetchShader, 0);
// forward fetch generated vertex IDs to the vertex shader
vsContext.VertexID = fetchInfo.VertexID;
@ -2212,9 +2212,9 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
if (!KNOB_TOSS_FETCH)
#endif
{
RDTSC_BEGIN(FEVertexShader, pDC->drawId);
RDTSC_BEGIN(pContext->pBucketMgr, FEVertexShader, pDC->drawId);
state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext);
RDTSC_END(FEVertexShader, 0);
RDTSC_END(pContext->pBucketMgr, FEVertexShader, 0);
UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex));
AR_EVENT(VSStats((HANDLE)&vsContext.stats));
@ -2226,9 +2226,9 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
{
simdvector prim[MAX_NUM_VERTS_PER_PRIM];
// PaAssemble returns false if there is not enough verts to assemble.
RDTSC_BEGIN(FEPAAssemble, pDC->drawId);
RDTSC_BEGIN(pContext->pBucketMgr, FEPAAssemble, pDC->drawId);
bool assemble = pa.Assemble(VERTEX_POSITION_SLOT, prim);
RDTSC_END(FEPAAssemble, 1);
RDTSC_END(pContext->pBucketMgr, FEPAAssemble, 1);
#if KNOB_ENABLE_TOSS_POINTS
if (!KNOB_TOSS_FETCH)
@ -2339,7 +2339,7 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
#endif
RDTSC_END(FEProcessDraw, numPrims * work.numInstances);
RDTSC_END(pContext->pBucketMgr, FEProcessDraw, numPrims * work.numInstances);
}
struct FEDrawChooser

View file

@ -53,7 +53,7 @@ void RasterizeLine(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, voi
#endif
// bloat line to two tris and call the triangle rasterizer twice
RDTSC_BEGIN(BERasterizeLine, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BERasterizeLine, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = state.rastState;
@ -245,7 +245,7 @@ void RasterizeLine(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, voi
pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
}
RDTSC_BEGIN(BERasterizeLine, 1);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BERasterizeLine, 1);
}
void RasterizeSimplePoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData)
@ -308,9 +308,9 @@ void RasterizeSimplePoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTi
renderBuffers,
triDesc.triFlags.renderTargetArrayIndex);
RDTSC_BEGIN(BEPixelBackend, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelBackend, pDC->drawId);
backendFuncs.pfnBackend(pDC, workerId, tileAlignedX, tileAlignedY, triDesc, renderBuffers);
RDTSC_END(BEPixelBackend, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BEPixelBackend, 0);
}
void RasterizeTriPoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData)

View file

@ -842,10 +842,10 @@ struct GenerateSVInnerCoverage<RT, AllEdgesValidT, InnerConservativeCoverageT>
}
// not trivial accept or reject, must rasterize full tile
RDTSC_BEGIN(BERasterizePartial, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BERasterizePartial, pDC->drawId);
innerCoverageMask = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(
pDC, startQuadEdgesAdj, pRastEdges);
RDTSC_END(BERasterizePartial, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BERasterizePartial, 0);
}
};
@ -927,8 +927,8 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
return;
}
#endif
RDTSC_BEGIN(BERasterizeTriangle, pDC->drawId);
RDTSC_BEGIN(BETriangleSetup, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BERasterizeTriangle, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BETriangleSetup, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = state.rastState;
@ -1103,7 +1103,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
intersect.xmin >= 0 && intersect.xmax >= 0 && intersect.ymin >= 0 &&
intersect.ymax >= 0);
RDTSC_END(BETriangleSetup, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BETriangleSetup, 0);
// update triangle desc
uint32_t minTileX = intersect.xmin >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
@ -1115,12 +1115,12 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
if (numTilesX == 0 || numTilesY == 0)
{
RDTSC_EVENT(BEEmptyTriangle, 1, 0);
RDTSC_END(BERasterizeTriangle, 1);
RDTSC_EVENT(pDC->pContext->pBucketMgr, BEEmptyTriangle, 1, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BERasterizeTriangle, 1);
return;
}
RDTSC_BEGIN(BEStepSetup, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEStepSetup, pDC->drawId);
// Step to pixel center of top-left pixel of the triangle bbox
// Align intersect bbox (top/left) to raster tile's (top/left).
@ -1232,7 +1232,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
}
}
RDTSC_END(BEStepSetup, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BEStepSetup, 0);
uint32_t tY = minTileY;
uint32_t tX = minTileX;
@ -1297,7 +1297,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
{
triDesc.innerCoverageMask = 0xffffffffffffffffULL;
}
RDTSC_EVENT(BETrivialAccept, 1, 0);
RDTSC_EVENT(pDC->pContext->pBucketMgr, BETrivialAccept, 1, 0);
}
else
{
@ -1339,11 +1339,11 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
}
// not trivial accept or reject, must rasterize full tile
RDTSC_BEGIN(BERasterizePartial, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BERasterizePartial, pDC->drawId);
triDesc.coverageMask[sampleNum] =
rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(
pDC, startQuadEdges, rastEdges);
RDTSC_END(BERasterizePartial, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BERasterizePartial, 0);
triDesc.anyCoveredSamples |= triDesc.coverageMask[sampleNum];
@ -1362,7 +1362,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
{
triDesc.coverageMask[sampleNum] = 0;
}
RDTSC_EVENT(BETrivialReject, 1, 0);
RDTSC_EVENT(pDC->pContext->pBucketMgr, BETrivialReject, 1, 0);
}
}
@ -1389,14 +1389,14 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
// Track rasterized subspans
AR_EVENT(RasterTileCount(pDC->drawId, 1));
RDTSC_BEGIN(BEPixelBackend, pDC->drawId);
RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelBackend, pDC->drawId);
backendFuncs.pfnBackend(pDC,
workerId,
tileX << KNOB_TILE_X_DIM_SHIFT,
tileY << KNOB_TILE_Y_DIM_SHIFT,
triDesc,
renderBuffers);
RDTSC_END(BEPixelBackend, 0);
RDTSC_END(pDC->pContext->pBucketMgr, BEPixelBackend, 0);
}
// step to the next tile in X
@ -1417,7 +1417,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
StepRasterTileY<RT>(state.colorHottileEnable, renderBuffers, currentRenderBufferRow);
}
RDTSC_END(BERasterizeTriangle, 1);
RDTSC_END(pDC->pContext->pBucketMgr, BERasterizeTriangle, 1);
}
// Get pointers to hot tile memory for color RT, depth, stencil

View file

@ -92,9 +92,3 @@ BUCKET_DESC gCoreBuckets[] = {
static_assert(NumBuckets == (sizeof(gCoreBuckets) / sizeof(gCoreBuckets[0])),
"RDTSC Bucket enum and description table size mismatched.");
/// @todo bucketmanager and mapping should probably be a part of the SWR context
std::vector<uint32_t> gBucketMap;
BucketManager gBucketMgr;
uint32_t gCurrentFrame = 0;
bool gBucketsInitialized = false;

View file

@ -100,90 +100,86 @@ enum CORE_BUCKETS
NumBuckets
};
void rdtscReset();
void rdtscInit(int threadId);
void rdtscStart(uint32_t bucketId);
void rdtscStop(uint32_t bucketId, uint32_t count, uint64_t drawId);
void rdtscEvent(uint32_t bucketId, uint32_t count1, uint32_t count2);
void rdtscEndFrame();
void rdtscReset(BucketManager* pBucketMgr);
void rdtscInit(BucketManager* pBucketMgr, int threadId);
void rdtscStart(BucketManager* pBucketMgr, uint32_t bucketId);
void rdtscStop(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count, uint64_t drawId);
void rdtscEvent(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count1, uint32_t count2);
void rdtscEndFrame(BucketManager* pBucketMgr);
#ifdef KNOB_ENABLE_RDTSC
#define RDTSC_RESET() rdtscReset()
#define RDTSC_INIT(threadId) rdtscInit(threadId)
#define RDTSC_START(bucket) rdtscStart(bucket)
#define RDTSC_STOP(bucket, count, draw) rdtscStop(bucket, count, draw)
#define RDTSC_EVENT(bucket, count1, count2) rdtscEvent(bucket, count1, count2)
#define RDTSC_ENDFRAME() rdtscEndFrame()
#define RDTSC_RESET(pBucketMgr) rdtscReset(pBucketMgr)
#define RDTSC_INIT(pBucketMgr, threadId) rdtscInit(pBucketMgr,threadId)
#define RDTSC_START(pBucketMgr, bucket) rdtscStart(pBucketMgr, bucket)
#define RDTSC_STOP(pBucketMgr, bucket, count, draw) rdtscStop(pBucketMgr, bucket, count, draw)
#define RDTSC_EVENT(pBucketMgr, bucket, count1, count2) rdtscEvent(pBucketMgr, bucket, count1, count2)
#define RDTSC_ENDFRAME(pBucketMgr) rdtscEndFrame(pBucketMgr)
#else
#define RDTSC_RESET()
#define RDTSC_INIT(threadId)
#define RDTSC_START(bucket)
#define RDTSC_STOP(bucket, count, draw)
#define RDTSC_EVENT(bucket, count1, count2)
#define RDTSC_ENDFRAME()
#define RDTSC_RESET(pBucketMgr)
#define RDTSC_INIT(pBucketMgr, threadId)
#define RDTSC_START(pBucketMgr, bucket)
#define RDTSC_STOP(pBucketMgr, bucket, count, draw)
#define RDTSC_EVENT(pBucketMgr, bucket, count1, count2)
#define RDTSC_ENDFRAME(pBucketMgr)
#endif
extern std::vector<uint32_t> gBucketMap;
extern BucketManager gBucketMgr;
extern BUCKET_DESC gCoreBuckets[];
extern uint32_t gCurrentFrame;
extern bool gBucketsInitialized;
INLINE void rdtscReset()
INLINE void rdtscReset(BucketManager *pBucketMgr)
{
gCurrentFrame = 0;
gBucketMgr.ClearThreads();
pBucketMgr->mCurrentFrame = 0;
pBucketMgr->ClearThreads();
}
INLINE void rdtscInit(int threadId)
INLINE void rdtscInit(BucketManager* pBucketMgr, int threadId)
{
// register all the buckets once
if (!gBucketsInitialized && (threadId == 0))
if (!pBucketMgr->mBucketsInitialized && (threadId == 0))
{
gBucketMap.resize(NumBuckets);
pBucketMgr->mBucketMap.resize(NumBuckets);
for (uint32_t i = 0; i < NumBuckets; ++i)
{
gBucketMap[i] = gBucketMgr.RegisterBucket(gCoreBuckets[i]);
pBucketMgr->mBucketMap[i] = pBucketMgr->RegisterBucket(gCoreBuckets[i]);
}
gBucketsInitialized = true;
pBucketMgr->mBucketsInitialized = true;
}
std::string name = threadId == 0 ? "API" : "WORKER";
gBucketMgr.RegisterThread(name);
pBucketMgr->RegisterThread(name);
}
INLINE void rdtscStart(uint32_t bucketId)
INLINE void rdtscStart(BucketManager* pBucketMgr, uint32_t bucketId)
{
uint32_t id = gBucketMap[bucketId];
gBucketMgr.StartBucket(id);
uint32_t id = pBucketMgr->mBucketMap[bucketId];
pBucketMgr->StartBucket(id);
}
INLINE void rdtscStop(uint32_t bucketId, uint32_t count, uint64_t drawId)
INLINE void rdtscStop(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count, uint64_t drawId)
{
uint32_t id = gBucketMap[bucketId];
gBucketMgr.StopBucket(id);
uint32_t id = pBucketMgr->mBucketMap[bucketId];
pBucketMgr->StopBucket(id);
}
INLINE void rdtscEvent(uint32_t bucketId, uint32_t count1, uint32_t count2)
INLINE void rdtscEvent(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count1, uint32_t count2)
{
uint32_t id = gBucketMap[bucketId];
gBucketMgr.AddEvent(id, count1);
uint32_t id = pBucketMgr->mBucketMap[bucketId];
pBucketMgr->AddEvent(id, count1);
}
INLINE void rdtscEndFrame()
INLINE void rdtscEndFrame(BucketManager* pBucketMgr)
{
gCurrentFrame++;
pBucketMgr->mCurrentFrame++;
if (gCurrentFrame == KNOB_BUCKETS_START_FRAME &&
if (pBucketMgr->mCurrentFrame == KNOB_BUCKETS_START_FRAME &&
KNOB_BUCKETS_START_FRAME < KNOB_BUCKETS_END_FRAME)
{
gBucketMgr.StartCapture();
pBucketMgr->StartCapture();
}
if (gCurrentFrame == KNOB_BUCKETS_END_FRAME &&
if (pBucketMgr->mCurrentFrame == KNOB_BUCKETS_END_FRAME &&
KNOB_BUCKETS_START_FRAME < KNOB_BUCKETS_END_FRAME)
{
gBucketMgr.StopCapture();
gBucketMgr.PrintReport("rdtsc.txt");
pBucketMgr->StopCapture();
pBucketMgr->PrintReport("rdtsc.txt");
}
}

View file

@ -31,6 +31,7 @@
#include "common/formats.h"
#include "common/intrin.h"
#include "common/rdtsc_buckets.h"
#include <functional>
#include <algorithm>
@ -381,6 +382,8 @@ struct SWR_PS_CONTEXT
uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render target hottiles
SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
BucketManager *pBucketManager; // @llvm_struct - IN: performance buckets.
};
//////////////////////////////////////////////////////////////////////////

View file

@ -609,7 +609,7 @@ bool WorkOnFifoBE(SWR_CONTEXT* pContext,
{
BE_WORK* pWork;
RDTSC_BEGIN(WorkerFoundWork, pDC->drawId);
RDTSC_BEGIN(pContext->pBucketMgr, WorkerFoundWork, pDC->drawId);
uint32_t numWorkItems = tile->getNumQueued();
SWR_ASSERT(numWorkItems);
@ -630,7 +630,7 @@ bool WorkOnFifoBE(SWR_CONTEXT* pContext,
pWork->pfnWork(pDC, workerId, tileID, &pWork->desc);
tile->dequeue();
}
RDTSC_END(WorkerFoundWork, numWorkItems);
RDTSC_END(pContext->pBucketMgr, WorkerFoundWork, numWorkItems);
_ReadWriteBarrier();
@ -868,7 +868,7 @@ DWORD workerThreadMain(LPVOID pData)
SetCurrentThreadName(threadName);
}
RDTSC_INIT(threadId);
RDTSC_INIT(pContext->pBucketMgr, threadId);
// Only need offset numa index from base for correct masking
uint32_t numaNode = pThreadData->numaId - pContext->threadInfo.BASE_NUMA_NODE;
@ -936,10 +936,10 @@ DWORD workerThreadMain(LPVOID pData)
if (IsBEThread)
{
RDTSC_BEGIN(WorkerWorkOnFifoBE, 0);
RDTSC_BEGIN(pContext->pBucketMgr, WorkerWorkOnFifoBE, 0);
bShutdown |=
WorkOnFifoBE(pContext, workerId, curDrawBE, lockedTiles, numaNode, numaMask);
RDTSC_END(WorkerWorkOnFifoBE, 0);
RDTSC_END(pContext->pBucketMgr, WorkerWorkOnFifoBE, 0);
WorkOnCompute(pContext, workerId, curDrawBE);
}

View file

@ -368,7 +368,7 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext,
if (pHotTile->state == HOTTILE_INVALID)
{
RDTSC_BEGIN(BELoadTiles, pDC->drawId);
RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
pContext->pfnLoadTile(GetPrivateState(pDC),
hWorkerPrivateData,
@ -379,15 +379,15 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext,
pHotTile->renderTargetArrayIndex,
pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
RDTSC_END(BELoadTiles, 0);
RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
}
else if (pHotTile->state == HOTTILE_CLEAR)
{
RDTSC_BEGIN(BELoadTiles, pDC->drawId);
RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
// Clear the tile.
ClearColorHotTile(pHotTile);
pHotTile->state = HOTTILE_DIRTY;
RDTSC_END(BELoadTiles, 0);
RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
}
colorHottileEnableMask &= ~(1 << rtSlot);
}
@ -399,7 +399,7 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext,
pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
if (pHotTile->state == HOTTILE_INVALID)
{
RDTSC_BEGIN(BELoadTiles, pDC->drawId);
RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
pContext->pfnLoadTile(GetPrivateState(pDC),
hWorkerPrivateData,
@ -410,15 +410,15 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext,
pHotTile->renderTargetArrayIndex,
pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
RDTSC_END(BELoadTiles, 0);
RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
}
else if (pHotTile->state == HOTTILE_CLEAR)
{
RDTSC_BEGIN(BELoadTiles, pDC->drawId);
RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
// Clear the tile.
ClearDepthHotTile(pHotTile);
pHotTile->state = HOTTILE_DIRTY;
RDTSC_END(BELoadTiles, 0);
RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
}
}
@ -429,7 +429,7 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext,
pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
if (pHotTile->state == HOTTILE_INVALID)
{
RDTSC_BEGIN(BELoadTiles, pDC->drawId);
RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
pContext->pfnLoadTile(GetPrivateState(pDC),
hWorkerPrivateData,
@ -440,15 +440,15 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext,
pHotTile->renderTargetArrayIndex,
pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
RDTSC_END(BELoadTiles, 0);
RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
}
else if (pHotTile->state == HOTTILE_CLEAR)
{
RDTSC_BEGIN(BELoadTiles, pDC->drawId);
RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
// Clear the tile.
ClearStencilHotTile(pHotTile);
pHotTile->state = HOTTILE_DIRTY;
RDTSC_END(BELoadTiles, 0);
RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
}
}
}

View file

@ -489,8 +489,8 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
ctx->max_draws_in_flight = KNOB_MAX_DRAWS_IN_FLIGHT;
SWR_CREATECONTEXT_INFO createInfo;
memset(&createInfo, 0, sizeof(createInfo));
SWR_CREATECONTEXT_INFO createInfo {0};
createInfo.privateStateSize = sizeof(swr_draw_context);
createInfo.pfnLoadTile = swr_LoadHotTile;
createInfo.pfnStoreTile = swr_StoreHotTile;