mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-01 23:18:20 +02:00
swr: [rasterizer] Backend code adjustments
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
e4d1294afb
commit
f445b6de9c
5 changed files with 70 additions and 45 deletions
|
|
@ -659,6 +659,15 @@ simdscalar vMask(int32_t mask)
|
|||
return _simd_castsi_ps(vec);
|
||||
}
|
||||
|
||||
INLINE
|
||||
simdscalari vMaski(int32_t mask)
|
||||
{
|
||||
__m256i vec = _mm256_set1_epi32(mask);
|
||||
const __m256i bit = _mm256_set_epi32(0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01);
|
||||
vec = _simd_and_si(vec, bit);
|
||||
return _simd_cmplt_epi32(_mm256_setzero_si256(), vec);
|
||||
}
|
||||
|
||||
INLINE
|
||||
void _simd_mov(simdscalar &r, unsigned int rlane, simdscalar& s, unsigned int slane)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -806,7 +806,6 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
|
|||
const bool bMultisampleEnable = ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || rastState.forcedSampleCount) ? 1 : 0;
|
||||
const uint32_t centroid = ((psState.barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0) ? 1 : 0;
|
||||
const uint32_t canEarlyZ = (psState.forceEarlyZ || (!psState.writesODepth && !psState.usesSourceDepth && !psState.usesUAV)) ? 1 : 0;
|
||||
|
||||
SWR_BARYCENTRICS_MASK barycentricsMask = (SWR_BARYCENTRICS_MASK)psState.barycentricsMask;
|
||||
|
||||
// select backend function
|
||||
|
|
@ -817,7 +816,9 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
|
|||
{
|
||||
// always need to generate I & J per sample for Z interpolation
|
||||
barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
|
||||
backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.samplePattern][psState.inputCoverage][centroid][forcedSampleCount][canEarlyZ];
|
||||
backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.samplePattern][psState.inputCoverage]
|
||||
[centroid][forcedSampleCount][canEarlyZ]
|
||||
;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@
|
|||
typedef void(*PFN_CLEAR_TILES)(DRAW_CONTEXT*, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t, uint32_t, DWORD[4], const SWR_RECT& rect);
|
||||
static PFN_CLEAR_TILES sClearTilesTable[NUM_SWR_FORMATS];
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Process compute work.
|
||||
/// @param pDC - pointer to draw context (dispatch).
|
||||
|
|
|
|||
|
|
@ -605,8 +605,10 @@ struct PixelRateZTestLoop
|
|||
|
||||
if(psState.writesODepth)
|
||||
{
|
||||
// broadcast and test oDepth(psContext.vZ) written from the PS for each sample
|
||||
vZ[sample] = psContext.vZ;
|
||||
{
|
||||
// broadcast and test oDepth(psContext.vZ) written from the PS for each sample
|
||||
vZ[sample] = psContext.vZ;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -713,23 +715,26 @@ INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW
|
|||
uint8_t *pColorSample = pColorBase[rt] + rasterTileColorOffset;
|
||||
|
||||
const SWR_RENDER_TARGET_BLEND_STATE *pRTBlend = &pBlendState->renderTarget[rt];
|
||||
// pfnBlendFunc may not update all channels. Initialize with PS output.
|
||||
/// TODO: move this into the blend JIT.
|
||||
blendOut = psContext.shaded[rt];
|
||||
|
||||
// Blend outputs and update coverage mask for alpha test
|
||||
if(pfnBlendFunc[rt] != nullptr)
|
||||
{
|
||||
pfnBlendFunc[rt](
|
||||
pBlendState,
|
||||
psContext.shaded[rt],
|
||||
psContext.shaded[1],
|
||||
psContext.shaded[0].w,
|
||||
sample,
|
||||
pColorSample,
|
||||
blendOut,
|
||||
&psContext.oMask,
|
||||
(simdscalari*)&coverageMask);
|
||||
// pfnBlendFunc may not update all channels. Initialize with PS output.
|
||||
/// TODO: move this into the blend JIT.
|
||||
blendOut = psContext.shaded[rt];
|
||||
|
||||
// Blend outputs and update coverage mask for alpha test
|
||||
if(pfnBlendFunc[rt] != nullptr)
|
||||
{
|
||||
pfnBlendFunc[rt](
|
||||
pBlendState,
|
||||
psContext.shaded[rt],
|
||||
psContext.shaded[1],
|
||||
psContext.shaded[0].w,
|
||||
sample,
|
||||
pColorSample,
|
||||
blendOut,
|
||||
&psContext.oMask,
|
||||
(simdscalari*)&coverageMask);
|
||||
}
|
||||
}
|
||||
|
||||
// final write mask
|
||||
|
|
@ -782,9 +787,6 @@ INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW
|
|||
simdscalar *pColorSample = reinterpret_cast<simdscalar *>(pColorBase[rt] + rasterTileColorOffset);
|
||||
|
||||
const SWR_RENDER_TARGET_BLEND_STATE *pRTBlend = &pBlendState->renderTarget[rt];
|
||||
// pfnBlendFunc may not update all channels. Initialize with PS output.
|
||||
/// TODO: move this into the blend JIT.
|
||||
blendOut = psContext.shaded[rt];
|
||||
|
||||
if (colorBufferBit & colorBufferEnableMask)
|
||||
{
|
||||
|
|
@ -794,19 +796,25 @@ INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW
|
|||
blendSrc[3] = pColorSample[6];
|
||||
}
|
||||
|
||||
// Blend outputs and update coverage mask for alpha test
|
||||
if (pfnBlendFunc[rt] != nullptr)
|
||||
{
|
||||
pfnBlendFunc[rt](
|
||||
pBlendState,
|
||||
psContext.shaded[rt],
|
||||
psContext.shaded[1],
|
||||
psContext.shaded[0].w,
|
||||
sample,
|
||||
reinterpret_cast<uint8_t *>(&blendSrc),
|
||||
blendOut,
|
||||
&psContext.oMask,
|
||||
reinterpret_cast<simdscalari *>(&coverageMask));
|
||||
// pfnBlendFunc may not update all channels. Initialize with PS output.
|
||||
/// TODO: move this into the blend JIT.
|
||||
blendOut = psContext.shaded[rt];
|
||||
|
||||
// Blend outputs and update coverage mask for alpha test
|
||||
if(pfnBlendFunc[rt] != nullptr)
|
||||
{
|
||||
pfnBlendFunc[rt](
|
||||
pBlendState,
|
||||
psContext.shaded[rt],
|
||||
psContext.shaded[1],
|
||||
psContext.shaded[0].w,
|
||||
sample,
|
||||
reinterpret_cast<uint8_t *>(&blendSrc),
|
||||
blendOut,
|
||||
&psContext.oMask,
|
||||
reinterpret_cast<simdscalari *>(&coverageMask));
|
||||
}
|
||||
}
|
||||
|
||||
// final write mask
|
||||
|
|
@ -840,6 +848,9 @@ INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW
|
|||
template<typename T>
|
||||
void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
|
||||
{
|
||||
///@todo: Need to move locals off stack to prevent __chkstk's from being generated for the backend
|
||||
|
||||
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
AR_BEGIN(BEPixelRateBackend, pDC->drawId);
|
||||
|
|
@ -850,12 +861,12 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
|||
BarycentricCoeffs coeffs;
|
||||
SetupBarycentricCoeffs(&coeffs, work);
|
||||
|
||||
uint8_t *pColorBuffer[SWR_NUM_RENDERTARGETS], *pDepthBuffer, *pStencilBuffer;
|
||||
SetupRenderBuffers(pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
|
||||
|
||||
SWR_PS_CONTEXT psContext;
|
||||
SetupPixelShaderContext<T>(&psContext, work);
|
||||
|
||||
uint8_t *pDepthBuffer, *pStencilBuffer;
|
||||
SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
|
||||
|
||||
AR_END(BESetup, 0);
|
||||
|
||||
PixelRateZTestLoop<T> PixelRateZTest(pDC, workerId, work, coeffs, state, pDepthBuffer, pStencilBuffer, state.rastState.clipDistanceMask);
|
||||
|
|
@ -975,10 +986,10 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
|||
|
||||
// broadcast the results of the PS to all passing pixels
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
OutputMerger8x2(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
|
||||
#else
|
||||
OutputMerger4x2(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets);
|
||||
#endif
|
||||
OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState,state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
|
||||
#else // USE_8x2_TILE_BACKEND
|
||||
OutputMerger4x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets);
|
||||
#endif // USE_8x2_TILE_BACKEND
|
||||
|
||||
if(!state.psState.forceEarlyZ && !T::bForcedSampleCount)
|
||||
{
|
||||
|
|
@ -1009,13 +1020,13 @@ Endtile:
|
|||
{
|
||||
for (uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
|
||||
{
|
||||
pColorBuffer[rt] += (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
psContext.pColorBuffer[rt] += (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
}
|
||||
}
|
||||
#else
|
||||
for(uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
|
||||
{
|
||||
pColorBuffer[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
psContext.pColorBuffer[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
}
|
||||
pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
|
|
@ -1035,7 +1046,8 @@ Endtile:
|
|||
}
|
||||
|
||||
template<uint32_t sampleCountT = SWR_MULTISAMPLE_1X, uint32_t samplePattern = SWR_MSAA_STANDARD_PATTERN,
|
||||
uint32_t coverage = 0, uint32_t centroid = 0, uint32_t forced = 0, uint32_t canEarlyZ = 0>
|
||||
uint32_t coverage = 0, uint32_t centroid = 0, uint32_t forced = 0, uint32_t canEarlyZ = 0
|
||||
>
|
||||
struct SwrBackendTraits
|
||||
{
|
||||
static const bool bIsStandardPattern = (samplePattern == SWR_MSAA_STANDARD_PATTERN);
|
||||
|
|
|
|||
|
|
@ -330,6 +330,8 @@ struct SWR_PS_CONTEXT
|
|||
|
||||
uint32_t rasterizerSampleCount; // IN: sample count used by the rasterizer
|
||||
|
||||
uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS];
|
||||
// IN: Pointers to render target hottiles
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
|
@ -511,6 +513,7 @@ struct SWR_SURFACE_STATE
|
|||
uint8_t *pAuxBaseAddress; // Used for compression, append/consume counter, etc.
|
||||
SWR_AUX_MODE auxMode; // @llvm_enum
|
||||
|
||||
|
||||
bool bInterleavedSamples; // are MSAA samples stored interleaved or planar
|
||||
};
|
||||
|
||||
|
|
@ -1087,7 +1090,6 @@ struct SWR_PS_STATE
|
|||
uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate attributes with
|
||||
uint32_t usesUAV : 1; // pixel shader accesses UAV
|
||||
uint32_t forceEarlyZ : 1; // force execution of early depth/stencil test
|
||||
|
||||
};
|
||||
|
||||
// depth bounds state
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue