mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-01 06:50:22 +01:00
swr/rast: FP consistency between POSH/RENDER pipes
- Ensure all threads have optimal floating-point control state - Disable auto-generation of fused FP ops for VERTEX shader stage - Disable "fast" FP ops for VERTEX shader stage Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
dc7b3c95a4
commit
0b4db43705
4 changed files with 33 additions and 11 deletions
|
|
@ -294,4 +294,25 @@ int SWR_API
|
|||
std::string* pOptStdErr = nullptr, ///< (Optional Out) Standard Error text
|
||||
const std::string* pOptStdIn = nullptr); ///< (Optional In) Standard Input text
|
||||
|
||||
|
||||
/// Helper for setting up FP state
|
||||
/// @returns old csr state
|
||||
static INLINE uint32_t SetOptimalVectorCSR()
|
||||
{
|
||||
uint32_t oldCSR = _mm_getcsr();
|
||||
|
||||
uint32_t newCSR = (oldCSR & ~(_MM_ROUND_MASK | _MM_DENORMALS_ZERO_MASK | _MM_FLUSH_ZERO_MASK));
|
||||
newCSR |= (_MM_ROUND_NEAREST | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON);
|
||||
_mm_setcsr(newCSR);
|
||||
|
||||
return oldCSR;
|
||||
}
|
||||
|
||||
/// Set Vector CSR state.
|
||||
/// @param csrState - should be value returned from SetOptimalVectorCSR()
|
||||
static INLINE void RestoreVectorCSR(uint32_t csrState)
|
||||
{
|
||||
_mm_setcsr(csrState);
|
||||
}
|
||||
|
||||
#endif //__SWR_OS_H__
|
||||
|
|
|
|||
|
|
@ -250,9 +250,7 @@ void QueueWork(SWR_CONTEXT* pContext)
|
|||
|
||||
if (pContext->threadInfo.SINGLE_THREADED)
|
||||
{
|
||||
// flush denormals to 0
|
||||
uint32_t mxcsr = _mm_getcsr();
|
||||
_mm_setcsr(mxcsr | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON);
|
||||
uint32_t mxcsr = SetOptimalVectorCSR();
|
||||
|
||||
if (IsDraw)
|
||||
{
|
||||
|
|
@ -274,7 +272,7 @@ void QueueWork(SWR_CONTEXT* pContext)
|
|||
}
|
||||
|
||||
// restore csr
|
||||
_mm_setcsr(mxcsr);
|
||||
RestoreVectorCSR(mxcsr);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1840,10 +1840,10 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
|
|||
{
|
||||
vIndex = _simd16_add_epi32(_simd16_set1_epi32(work.startVertexID), vScale);
|
||||
|
||||
fetchInfo_lo.xpIndices =
|
||||
pDC->pContext->pfnMakeGfxPtr(GetPrivateState(pDC), &vIndex);
|
||||
fetchInfo_hi.xpIndices =
|
||||
pDC->pContext->pfnMakeGfxPtr(GetPrivateState(pDC), &vIndex + KNOB_SIMD_WIDTH * sizeof(int32_t)); // 1/2 of KNOB_SIMD16_WIDTH
|
||||
fetchInfo_lo.xpIndices = pDC->pContext->pfnMakeGfxPtr(GetPrivateState(pDC), &vIndex);
|
||||
fetchInfo_hi.xpIndices = pDC->pContext->pfnMakeGfxPtr(
|
||||
GetPrivateState(pDC),
|
||||
&vIndex + KNOB_SIMD_WIDTH * sizeof(int32_t)); // 1/2 of KNOB_SIMD16_WIDTH
|
||||
}
|
||||
|
||||
fetchInfo_lo.CurInstance = instanceNum;
|
||||
|
|
|
|||
|
|
@ -421,9 +421,9 @@ INLINE void UpdateClientStats(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CON
|
|||
for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
|
||||
{
|
||||
stats.DepthPassCount += dynState.pStats[i].DepthPassCount;
|
||||
|
||||
stats.PsInvocations += dynState.pStats[i].PsInvocations;
|
||||
stats.CsInvocations += dynState.pStats[i].CsInvocations;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -439,6 +439,10 @@ INLINE void ExecuteCallbacks(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONT
|
|||
pDC->retireCallback.pfnCallbackFunc(pDC->retireCallback.userData,
|
||||
pDC->retireCallback.userData2,
|
||||
pDC->retireCallback.userData3);
|
||||
|
||||
// Callbacks to external code *could* change floating point control state
|
||||
// Reset our optimal flags
|
||||
SetOptimalVectorCSR();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -870,8 +874,7 @@ DWORD workerThreadMain(LPVOID pData)
|
|||
uint32_t numaNode = pThreadData->numaId - pContext->threadInfo.BASE_NUMA_NODE;
|
||||
uint32_t numaMask = pContext->threadPool.numaMask;
|
||||
|
||||
// flush denormals to 0
|
||||
_mm_setcsr(_mm_getcsr() | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON);
|
||||
SetOptimalVectorCSR();
|
||||
|
||||
// Track tiles locked by other threads. If we try to lock a macrotile and find its already
|
||||
// locked then we'll add it to this list so that we don't try and lock it again.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue