mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-09 21:20:14 +01:00
swr: [rasterizer core] per-primitive viewports/scissors
- use per-primitive viewports throughout the pipeline. - track whether all available scissor rects are tile aligned. Causes failures, so not taken into account when choosing rasterizer yet. Signed-off-by: Tim Rowley <timothy.o.rowley@intel.com>
This commit is contained in:
parent
63ed11cde9
commit
b473bec878
7 changed files with 215 additions and 72 deletions
|
|
@ -727,34 +727,52 @@ void SwrSetScissorRects(
|
|||
void SetupMacroTileScissors(DRAW_CONTEXT *pDC)
|
||||
{
|
||||
API_STATE *pState = &pDC->pState->state;
|
||||
uint32_t numScissors = pState->gsState.emitsViewportArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
|
||||
pState->scissorsTileAligned = true;
|
||||
|
||||
// Set up scissor dimensions based on scissor or viewport
|
||||
if (pState->rastState.scissorEnable)
|
||||
for (uint32_t index = 0; index < numScissors; ++index)
|
||||
{
|
||||
pState->scissorInFixedPoint = pState->scissorRects[0];
|
||||
}
|
||||
else
|
||||
{
|
||||
// the vp width and height must be added to origin un-rounded then the result round to -inf.
|
||||
// The cast to int works for rounding assuming all [left, right, top, bottom] are positive.
|
||||
pState->scissorInFixedPoint.xmin = (int32_t)pState->vp[0].x;
|
||||
pState->scissorInFixedPoint.xmax = (int32_t)(pState->vp[0].x + pState->vp[0].width);
|
||||
pState->scissorInFixedPoint.ymin = (int32_t)pState->vp[0].y;
|
||||
pState->scissorInFixedPoint.ymax = (int32_t)(pState->vp[0].y + pState->vp[0].height);
|
||||
SWR_RECT &scissorInFixedPoint = pState->scissorsInFixedPoint[index];
|
||||
|
||||
// Set up scissor dimensions based on scissor or viewport
|
||||
if (pState->rastState.scissorEnable)
|
||||
{
|
||||
scissorInFixedPoint = pState->scissorRects[index];
|
||||
}
|
||||
else
|
||||
{
|
||||
// the vp width and height must be added to origin un-rounded then the result round to -inf.
|
||||
// The cast to int works for rounding assuming all [left, right, top, bottom] are positive.
|
||||
scissorInFixedPoint.xmin = (int32_t)pState->vp[index].x;
|
||||
scissorInFixedPoint.xmax = (int32_t)(pState->vp[index].x + pState->vp[index].width);
|
||||
scissorInFixedPoint.ymin = (int32_t)pState->vp[index].y;
|
||||
scissorInFixedPoint.ymax = (int32_t)(pState->vp[index].y + pState->vp[index].height);
|
||||
}
|
||||
|
||||
// Clamp to max rect
|
||||
scissorInFixedPoint &= g_MaxScissorRect;
|
||||
|
||||
// Test for tile alignment
|
||||
bool tileAligned;
|
||||
tileAligned = (scissorInFixedPoint.xmin % KNOB_TILE_X_DIM) == 0;
|
||||
tileAligned &= (scissorInFixedPoint.ymin % KNOB_TILE_Y_DIM) == 0;
|
||||
tileAligned &= (scissorInFixedPoint.xmax % KNOB_TILE_X_DIM) == 0;
|
||||
tileAligned &= (scissorInFixedPoint.xmax % KNOB_TILE_Y_DIM) == 0;
|
||||
|
||||
pState->scissorsTileAligned &= tileAligned;
|
||||
|
||||
// Scale to fixed point
|
||||
scissorInFixedPoint.xmin *= FIXED_POINT_SCALE;
|
||||
scissorInFixedPoint.xmax *= FIXED_POINT_SCALE;
|
||||
scissorInFixedPoint.ymin *= FIXED_POINT_SCALE;
|
||||
scissorInFixedPoint.ymax *= FIXED_POINT_SCALE;
|
||||
|
||||
// Make scissor inclusive
|
||||
scissorInFixedPoint.xmax -= 1;
|
||||
scissorInFixedPoint.ymax -= 1;
|
||||
}
|
||||
|
||||
// Clamp to max rect
|
||||
pState->scissorInFixedPoint &= g_MaxScissorRect;
|
||||
|
||||
// Scale to fixed point
|
||||
pState->scissorInFixedPoint.xmin *= FIXED_POINT_SCALE;
|
||||
pState->scissorInFixedPoint.xmax *= FIXED_POINT_SCALE;
|
||||
pState->scissorInFixedPoint.ymin *= FIXED_POINT_SCALE;
|
||||
pState->scissorInFixedPoint.ymax *= FIXED_POINT_SCALE;
|
||||
|
||||
// Make scissor inclusive
|
||||
pState->scissorInFixedPoint.xmax -= 1;
|
||||
pState->scissorInFixedPoint.ymax -= 1;
|
||||
|
||||
}
|
||||
|
||||
// templated backend function tables
|
||||
|
|
|
|||
|
|
@ -493,14 +493,14 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
|
|||
if(T::bCanEarlyZ)
|
||||
{
|
||||
RDTSC_START(BEEarlyDepthTest);
|
||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
|
||||
psContext.vZ, pDepthBase, vCoverageMask, pStencilBase, &stencilPassMask);
|
||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||
psContext.vZ, pDepthBase, vCoverageMask, pStencilBase, &stencilPassMask);
|
||||
RDTSC_STOP(BEEarlyDepthTest, 0, 0);
|
||||
|
||||
// early-exit if no pixels passed depth or earlyZ is forced on
|
||||
if(pPSState->forceEarlyZ || !_simd_movemask_ps(depthPassMask))
|
||||
{
|
||||
DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
pDepthBase, depthPassMask, vCoverageMask, pStencilBase, stencilPassMask);
|
||||
|
||||
if (!_simd_movemask_ps(depthPassMask))
|
||||
|
|
@ -525,14 +525,14 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
|
|||
if(!T::bCanEarlyZ)
|
||||
{
|
||||
RDTSC_START(BELateDepthTest);
|
||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
|
||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||
psContext.vZ, pDepthBase, vCoverageMask, pStencilBase, &stencilPassMask);
|
||||
RDTSC_STOP(BELateDepthTest, 0, 0);
|
||||
|
||||
if(!_simd_movemask_ps(depthPassMask))
|
||||
{
|
||||
// need to call depth/stencil write for stencil write
|
||||
DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
pDepthBase, depthPassMask, vCoverageMask, pStencilBase, stencilPassMask);
|
||||
goto Endtile;
|
||||
}
|
||||
|
|
@ -549,7 +549,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
|
|||
// do final depth write after all pixel kills
|
||||
if (!pPSState->forceEarlyZ)
|
||||
{
|
||||
DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
pDepthBase, depthPassMask, vCoverageMask, pStencilBase, stencilPassMask);
|
||||
}
|
||||
RDTSC_STOP(BEOutputMerger, 0, 0);
|
||||
|
|
@ -712,14 +712,14 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
|||
if (T::bCanEarlyZ)
|
||||
{
|
||||
RDTSC_START(BEEarlyDepthTest);
|
||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
|
||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
|
||||
RDTSC_STOP(BEEarlyDepthTest, 0, 0);
|
||||
|
||||
// early-exit if no samples passed depth or earlyZ is forced on.
|
||||
if (pPSState->forceEarlyZ || !_simd_movemask_ps(depthPassMask))
|
||||
{
|
||||
DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
|
||||
|
||||
if (!_simd_movemask_ps(depthPassMask))
|
||||
|
|
@ -745,14 +745,14 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
|||
if (!T::bCanEarlyZ)
|
||||
{
|
||||
RDTSC_START(BELateDepthTest);
|
||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
|
||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
|
||||
RDTSC_STOP(BELateDepthTest, 0, 0);
|
||||
|
||||
if (!_simd_movemask_ps(depthPassMask))
|
||||
{
|
||||
// need to call depth/stencil write for stencil write
|
||||
DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
|
||||
|
||||
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
|
|
@ -771,7 +771,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
|||
// do final depth write after all pixel kills
|
||||
if (!pPSState->forceEarlyZ)
|
||||
{
|
||||
DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
|
||||
}
|
||||
RDTSC_STOP(BEOutputMerger, 0, 0);
|
||||
|
|
@ -984,7 +984,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
|||
uint8_t *pDepthSample = pDepthBase + RasterTileDepthOffset(sample);
|
||||
uint8_t * pStencilSample = pStencilBase + RasterTileStencilOffset(sample);
|
||||
|
||||
DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, PixelRateZTest.vZ[coverageSampleNum],
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, PixelRateZTest.vZ[coverageSampleNum],
|
||||
pDepthSample, depthMask, coverageMask, pStencilSample, PixelRateZTest.stencilPassMask[coverageSampleNum]);
|
||||
}
|
||||
RDTSC_STOP(BEOutputMerger, 0, 0);
|
||||
|
|
@ -1093,9 +1093,9 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
|
|||
uint8_t *pStencilSample = pStencilBase + RasterTileStencilOffset(sample);
|
||||
|
||||
RDTSC_START(BEEarlyDepthTest);
|
||||
simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
|
||||
simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
|
||||
DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
|
||||
RDTSC_STOP(BEEarlyDepthTest, 0, 0);
|
||||
|
||||
|
|
|
|||
|
|
@ -491,14 +491,15 @@ struct PixelRateZTestLoop
|
|||
RDTSC_START(BEDepthBucket);
|
||||
depthPassMask[sample] = vCoverageMask[sample];
|
||||
stencilPassMask[sample] = vCoverageMask[sample];
|
||||
depthPassMask[sample] = DepthStencilTest(&state, work.triFlags.frontFacing, vZ[sample], pDepthSample,
|
||||
vCoverageMask[sample], pStencilSample, &stencilPassMask[sample]);
|
||||
depthPassMask[sample] = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||
vZ[sample], pDepthSample, vCoverageMask[sample],
|
||||
pStencilSample, &stencilPassMask[sample]);
|
||||
RDTSC_STOP(BEDepthBucket, 0, 0);
|
||||
|
||||
// early-exit if no pixels passed depth or earlyZ is forced on
|
||||
if(psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask[sample]))
|
||||
{
|
||||
DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, vZ[sample],
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, vZ[sample],
|
||||
pDepthSample, depthPassMask[sample], vCoverageMask[sample], pStencilSample, stencilPassMask[sample]);
|
||||
|
||||
if(!_simd_movemask_ps(depthPassMask[sample]))
|
||||
|
|
|
|||
|
|
@ -63,6 +63,7 @@ struct TRI_FLAGS
|
|||
float pointSize;
|
||||
uint32_t primID;
|
||||
uint32_t renderTargetArrayIndex;
|
||||
uint32_t viewportIndex;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
|
@ -274,7 +275,8 @@ OSALIGNLINE(struct) API_STATE
|
|||
SWR_VIEWPORT_MATRICES vpMatrices;
|
||||
|
||||
SWR_RECT scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS];
|
||||
SWR_RECT scissorInFixedPoint;
|
||||
SWR_RECT scissorsInFixedPoint[KNOB_NUM_VIEWPORTS_SCISSORS];
|
||||
bool scissorsTileAligned;
|
||||
|
||||
// Backend state
|
||||
SWR_BACKEND_STATE backendState;
|
||||
|
|
|
|||
|
|
@ -117,14 +117,14 @@ simdscalar QuantizeDepth(simdscalar depth)
|
|||
|
||||
INLINE
|
||||
simdscalar DepthStencilTest(const API_STATE* pState,
|
||||
bool frontFacing, simdscalar interpZ, uint8_t* pDepthBase, simdscalar coverageMask, uint8_t *pStencilBase,
|
||||
simdscalar* pStencilMask)
|
||||
bool frontFacing, uint32_t viewportIndex, simdscalar interpZ, uint8_t* pDepthBase, simdscalar coverageMask,
|
||||
uint8_t *pStencilBase, simdscalar* pStencilMask)
|
||||
{
|
||||
static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
|
||||
static_assert(KNOB_STENCIL_HOT_TILE_FORMAT == R8_UINT, "Unsupported stencil hot tile format");
|
||||
|
||||
const SWR_DEPTH_STENCIL_STATE* pDSState = &pState->depthStencilState;
|
||||
const SWR_VIEWPORT* pViewport = &pState->vp[0];
|
||||
const SWR_VIEWPORT* pViewport = &pState->vp[viewportIndex];
|
||||
|
||||
simdscalar depthResult = _simd_set1_ps(-1.0f);
|
||||
simdscalar zbuf;
|
||||
|
|
|
|||
|
|
@ -465,6 +465,70 @@ static INLINE simdscalari GenerateMask(uint32_t numItemsRemaining)
|
|||
return _simd_castps_si(vMask(mask));
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Gather scissor rect data based on per-prim viewport indices.
|
||||
/// @param pScissorsInFixedPoint - array of scissor rects in 16.8 fixed point.
|
||||
/// @param pViewportIndex - array of per-primitive vewport indexes.
|
||||
/// @param scisXmin - output vector of per-prmitive scissor rect Xmin data.
|
||||
/// @param scisYmin - output vector of per-prmitive scissor rect Ymin data.
|
||||
/// @param scisXmax - output vector of per-prmitive scissor rect Xmax data.
|
||||
/// @param scisYmax - output vector of per-prmitive scissor rect Ymax data.
|
||||
//
|
||||
/// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
|
||||
template<size_t SimdWidth>
|
||||
struct GatherScissors
|
||||
{
|
||||
static void Gather(const SWR_RECT* pScissorsInFixedPoint, const uint32_t* pViewportIndex,
|
||||
simdscalari &scisXmin, simdscalari &scisYmin,
|
||||
simdscalari &scisXmax, simdscalari &scisYmax)
|
||||
{
|
||||
SWR_ASSERT(0, "Unhandled Simd Width in Scissor Rect Gather");
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct GatherScissors<8>
|
||||
{
|
||||
static void Gather(const SWR_RECT* pScissorsInFixedPoint, const uint32_t* pViewportIndex,
|
||||
simdscalari &scisXmin, simdscalari &scisYmin,
|
||||
simdscalari &scisXmax, simdscalari &scisYmax)
|
||||
{
|
||||
scisXmin = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[0]].xmin,
|
||||
pScissorsInFixedPoint[pViewportIndex[1]].xmin,
|
||||
pScissorsInFixedPoint[pViewportIndex[2]].xmin,
|
||||
pScissorsInFixedPoint[pViewportIndex[3]].xmin,
|
||||
pScissorsInFixedPoint[pViewportIndex[4]].xmin,
|
||||
pScissorsInFixedPoint[pViewportIndex[5]].xmin,
|
||||
pScissorsInFixedPoint[pViewportIndex[6]].xmin,
|
||||
pScissorsInFixedPoint[pViewportIndex[7]].xmin);
|
||||
scisYmin = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[0]].ymin,
|
||||
pScissorsInFixedPoint[pViewportIndex[1]].ymin,
|
||||
pScissorsInFixedPoint[pViewportIndex[2]].ymin,
|
||||
pScissorsInFixedPoint[pViewportIndex[3]].ymin,
|
||||
pScissorsInFixedPoint[pViewportIndex[4]].ymin,
|
||||
pScissorsInFixedPoint[pViewportIndex[5]].ymin,
|
||||
pScissorsInFixedPoint[pViewportIndex[6]].ymin,
|
||||
pScissorsInFixedPoint[pViewportIndex[7]].ymin);
|
||||
scisXmax = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[0]].xmax,
|
||||
pScissorsInFixedPoint[pViewportIndex[1]].xmax,
|
||||
pScissorsInFixedPoint[pViewportIndex[2]].xmax,
|
||||
pScissorsInFixedPoint[pViewportIndex[3]].xmax,
|
||||
pScissorsInFixedPoint[pViewportIndex[4]].xmax,
|
||||
pScissorsInFixedPoint[pViewportIndex[5]].xmax,
|
||||
pScissorsInFixedPoint[pViewportIndex[6]].xmax,
|
||||
pScissorsInFixedPoint[pViewportIndex[7]].xmax);
|
||||
scisYmax = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[0]].ymax,
|
||||
pScissorsInFixedPoint[pViewportIndex[1]].ymax,
|
||||
pScissorsInFixedPoint[pViewportIndex[2]].ymax,
|
||||
pScissorsInFixedPoint[pViewportIndex[3]].ymax,
|
||||
pScissorsInFixedPoint[pViewportIndex[4]].ymax,
|
||||
pScissorsInFixedPoint[pViewportIndex[5]].ymax,
|
||||
pScissorsInFixedPoint[pViewportIndex[6]].ymax,
|
||||
pScissorsInFixedPoint[pViewportIndex[7]].ymax);
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief StreamOut - Streams vertex data out to SO buffers.
|
||||
/// Generally, we are only streaming out a SIMDs worth of triangles.
|
||||
|
|
@ -1849,6 +1913,7 @@ void BinTriangles(
|
|||
// compute per tri backface
|
||||
uint32_t frontFaceMask = frontWindingTris;
|
||||
uint32_t *pPrimID = (uint32_t *)&primID;
|
||||
const uint32_t *pViewportIndex = (uint32_t *)&viewportIdx;
|
||||
DWORD triIndex = 0;
|
||||
// for center sample pattern, all samples are at pixel center; calculate coverage
|
||||
// once at center and broadcast the results in the backend
|
||||
|
|
@ -1944,10 +2009,26 @@ void BinTriangles(
|
|||
}
|
||||
|
||||
// Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
|
||||
bbox.xmin = _simd_max_epi32(bbox.xmin, _simd_set1_epi32(state.scissorInFixedPoint.xmin));
|
||||
bbox.ymin = _simd_max_epi32(bbox.ymin, _simd_set1_epi32(state.scissorInFixedPoint.ymin));
|
||||
bbox.xmax = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.xmax));
|
||||
bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.ymax));
|
||||
// Gather the AOS effective scissor rects based on the per-prim VP index.
|
||||
/// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
|
||||
simdscalari scisXmin, scisYmin, scisXmax, scisYmax;
|
||||
if (state.gsState.emitsViewportArrayIndex)
|
||||
{
|
||||
GatherScissors<KNOB_SIMD_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
|
||||
scisXmin, scisYmin, scisXmax, scisYmax);
|
||||
}
|
||||
else // broadcast fast path for non-VPAI case.
|
||||
{
|
||||
scisXmin = _simd_set1_epi32(state.scissorsInFixedPoint[0].xmin);
|
||||
scisYmin = _simd_set1_epi32(state.scissorsInFixedPoint[0].ymin);
|
||||
scisXmax = _simd_set1_epi32(state.scissorsInFixedPoint[0].xmax);
|
||||
scisYmax = _simd_set1_epi32(state.scissorsInFixedPoint[0].ymax);
|
||||
}
|
||||
|
||||
bbox.xmin = _simd_max_epi32(bbox.xmin, scisXmin);
|
||||
bbox.ymin = _simd_max_epi32(bbox.ymin, scisYmin);
|
||||
bbox.xmax = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), scisXmax);
|
||||
bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), scisYmax);
|
||||
|
||||
if(CT::IsConservativeT::value)
|
||||
{
|
||||
|
|
@ -2044,7 +2125,8 @@ void BinTriangles(
|
|||
desc.triFlags.frontFacing = state.forceFront ? 1 : ((frontFaceMask >> triIndex) & 1);
|
||||
desc.triFlags.primID = pPrimID[triIndex];
|
||||
desc.triFlags.renderTargetArrayIndex = aRTAI[triIndex];
|
||||
|
||||
desc.triFlags.viewportIndex = pViewportIndex[triIndex];
|
||||
|
||||
auto pArena = pDC->pArena;
|
||||
SWR_ASSERT(pArena != nullptr);
|
||||
|
||||
|
|
@ -2130,6 +2212,7 @@ void BinPoints(
|
|||
const SWR_FRONTEND_STATE& feState = state.frontendState;
|
||||
const SWR_GS_STATE& gsState = state.gsState;
|
||||
const SWR_RASTSTATE& rastState = state.rastState;
|
||||
const uint32_t *pViewportIndex = (uint32_t *)&viewportIdx;
|
||||
|
||||
// Select attribute processor
|
||||
PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(1,
|
||||
|
|
@ -2240,6 +2323,7 @@ void BinPoints(
|
|||
desc.triFlags.frontFacing = 1;
|
||||
desc.triFlags.primID = pPrimID[primIndex];
|
||||
desc.triFlags.renderTargetArrayIndex = aRTAI[primIndex];
|
||||
desc.triFlags.viewportIndex = pViewportIndex[primIndex];
|
||||
|
||||
work.pfnWork = RasterizeSimplePoint;
|
||||
|
||||
|
|
@ -2306,10 +2390,26 @@ void BinPoints(
|
|||
bbox.ymax = _simd_add_epi32(bbox.ymax, vHalfWidthi);
|
||||
|
||||
// Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
|
||||
bbox.xmin = _simd_max_epi32(bbox.xmin, _simd_set1_epi32(state.scissorInFixedPoint.xmin));
|
||||
bbox.ymin = _simd_max_epi32(bbox.ymin, _simd_set1_epi32(state.scissorInFixedPoint.ymin));
|
||||
bbox.xmax = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.xmax));
|
||||
bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.ymax));
|
||||
// Gather the AOS effective scissor rects based on the per-prim VP index.
|
||||
/// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
|
||||
simdscalari scisXmin, scisYmin, scisXmax, scisYmax;
|
||||
if (state.gsState.emitsViewportArrayIndex)
|
||||
{
|
||||
GatherScissors<KNOB_SIMD_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
|
||||
scisXmin, scisYmin, scisXmax, scisYmax);
|
||||
}
|
||||
else // broadcast fast path for non-VPAI case.
|
||||
{
|
||||
scisXmin = _simd_set1_epi32(state.scissorsInFixedPoint[0].xmin);
|
||||
scisYmin = _simd_set1_epi32(state.scissorsInFixedPoint[0].ymin);
|
||||
scisXmax = _simd_set1_epi32(state.scissorsInFixedPoint[0].xmax);
|
||||
scisYmax = _simd_set1_epi32(state.scissorsInFixedPoint[0].ymax);
|
||||
}
|
||||
|
||||
bbox.xmin = _simd_max_epi32(bbox.xmin, scisXmin);
|
||||
bbox.ymin = _simd_max_epi32(bbox.ymin, scisYmin);
|
||||
bbox.xmax = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), scisXmax);
|
||||
bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), scisYmax);
|
||||
|
||||
// Cull bloated points completely outside scissor
|
||||
simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.xmin, bbox.xmax);
|
||||
|
|
@ -2374,6 +2474,7 @@ void BinPoints(
|
|||
desc.triFlags.primID = pPrimID[primIndex];
|
||||
desc.triFlags.pointSize = aPointSize[primIndex];
|
||||
desc.triFlags.renderTargetArrayIndex = aRTAI[primIndex];
|
||||
desc.triFlags.viewportIndex = pViewportIndex[primIndex];
|
||||
|
||||
work.pfnWork = RasterizeTriPoint;
|
||||
|
||||
|
|
@ -2431,6 +2532,7 @@ void BinPoints(
|
|||
/// @param workerId - thread's worker id. Even thread has a unique id.
|
||||
/// @param tri - Contains line position data for SIMDs worth of points.
|
||||
/// @param primID - Primitive ID for each line.
|
||||
/// @param viewportIdx - Viewport Array Index for each line.
|
||||
void BinLines(
|
||||
DRAW_CONTEXT *pDC,
|
||||
PA_STATE& pa,
|
||||
|
|
@ -2508,6 +2610,7 @@ void BinLines(
|
|||
primMask &= ~_simd_movemask_ps(_simd_castsi_ps(vZeroLengthMask));
|
||||
|
||||
uint32_t *pPrimID = (uint32_t *)&primID;
|
||||
const uint32_t *pViewportIndex = (uint32_t *)&viewportIdx;
|
||||
|
||||
simdscalar vUnused = _simd_setzero_ps();
|
||||
|
||||
|
|
@ -2533,10 +2636,24 @@ void BinLines(
|
|||
bbox.ymax = _simd_blendv_epi32(bloatBox.ymax, bbox.ymax, vYmajorMask);
|
||||
|
||||
// Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
|
||||
bbox.xmin = _simd_max_epi32(bbox.xmin, _simd_set1_epi32(state.scissorInFixedPoint.xmin));
|
||||
bbox.ymin = _simd_max_epi32(bbox.ymin, _simd_set1_epi32(state.scissorInFixedPoint.ymin));
|
||||
bbox.xmax = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.xmax));
|
||||
bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.ymax));
|
||||
simdscalari scisXmin, scisYmin, scisXmax, scisYmax;
|
||||
if (state.gsState.emitsViewportArrayIndex)
|
||||
{
|
||||
GatherScissors<KNOB_SIMD_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
|
||||
scisXmin, scisYmin, scisXmax, scisYmax);
|
||||
}
|
||||
else // broadcast fast path for non-VPAI case.
|
||||
{
|
||||
scisXmin = _simd_set1_epi32(state.scissorsInFixedPoint[0].xmin);
|
||||
scisYmin = _simd_set1_epi32(state.scissorsInFixedPoint[0].ymin);
|
||||
scisXmax = _simd_set1_epi32(state.scissorsInFixedPoint[0].xmax);
|
||||
scisYmax = _simd_set1_epi32(state.scissorsInFixedPoint[0].ymax);
|
||||
}
|
||||
|
||||
bbox.xmin = _simd_max_epi32(bbox.xmin, scisXmin);
|
||||
bbox.ymin = _simd_max_epi32(bbox.ymin, scisYmin);
|
||||
bbox.xmax = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), scisXmax);
|
||||
bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), scisYmax);
|
||||
|
||||
// Cull prims completely outside scissor
|
||||
{
|
||||
|
|
@ -2602,6 +2719,7 @@ void BinLines(
|
|||
desc.triFlags.primID = pPrimID[primIndex];
|
||||
desc.triFlags.yMajor = (yMajorMask >> primIndex) & 1;
|
||||
desc.triFlags.renderTargetArrayIndex = aRTAI[primIndex];
|
||||
desc.triFlags.viewportIndex = pViewportIndex[primIndex];
|
||||
|
||||
work.pfnWork = RasterizeLine;
|
||||
|
||||
|
|
|
|||
|
|
@ -967,20 +967,22 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
|||
OSALIGNSIMD(SWR_RECT) bbox;
|
||||
calcBoundingBoxInt(vXi, vYi, bbox);
|
||||
|
||||
const SWR_RECT &scissorInFixedPoint = state.scissorsInFixedPoint[workDesc.triFlags.viewportIndex];
|
||||
|
||||
if(RT::ValidEdgeMaskT::value != ALL_EDGES_VALID)
|
||||
{
|
||||
// If we're rasterizing a degenerate triangle, expand bounding box to guarantee the BBox is valid
|
||||
bbox.xmin--; bbox.xmax++; bbox.ymin--; bbox.ymax++;
|
||||
SWR_ASSERT(state.scissorInFixedPoint.xmin >= 0 && state.scissorInFixedPoint.ymin >= 0,
|
||||
SWR_ASSERT(scissorInFixedPoint.xmin >= 0 && scissorInFixedPoint.ymin >= 0,
|
||||
"Conservative rast degenerate handling requires a valid scissor rect");
|
||||
}
|
||||
|
||||
// Intersect with scissor/viewport
|
||||
OSALIGNSIMD(SWR_RECT) intersect;
|
||||
intersect.xmin = std::max(bbox.xmin, state.scissorInFixedPoint.xmin);
|
||||
intersect.xmax = std::min(bbox.xmax - 1, state.scissorInFixedPoint.xmax);
|
||||
intersect.ymin = std::max(bbox.ymin, state.scissorInFixedPoint.ymin);
|
||||
intersect.ymax = std::min(bbox.ymax - 1, state.scissorInFixedPoint.ymax);
|
||||
intersect.xmin = std::max(bbox.xmin, scissorInFixedPoint.xmin);
|
||||
intersect.xmax = std::min(bbox.xmax - 1, scissorInFixedPoint.xmax);
|
||||
intersect.ymin = std::max(bbox.ymin, scissorInFixedPoint.ymin);
|
||||
intersect.ymax = std::min(bbox.ymax - 1, scissorInFixedPoint.ymax);
|
||||
|
||||
triDesc.triFlags = workDesc.triFlags;
|
||||
|
||||
|
|
@ -1087,7 +1089,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
|||
|
||||
// Compute and store triangle edge data if scissor needs to rasterized
|
||||
ComputeScissorEdges<typename RT::RasterizeScissorEdgesT, typename RT::IsConservativeT, RT>
|
||||
(bbox, state.scissorInFixedPoint, x, y, rastEdges, vEdgeFix16);
|
||||
(bbox, scissorInFixedPoint, x, y, rastEdges, vEdgeFix16);
|
||||
|
||||
// Evaluate edge equations at sample positions of each of the 4 corners of a raster tile
|
||||
// used to for testing if entire raster tile is inside a triangle
|
||||
|
|
@ -1573,6 +1575,8 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
|
|||
int32_t macroBoxTop = macroY * KNOB_MACROTILE_Y_DIM_FIXED;
|
||||
int32_t macroBoxBottom = macroBoxTop + KNOB_MACROTILE_Y_DIM_FIXED - 1;
|
||||
|
||||
const SWR_RECT &scissorInFixedPoint = state.scissorsInFixedPoint[workDesc.triFlags.viewportIndex];
|
||||
|
||||
// create a copy of the triangle buffer to write our adjusted vertices to
|
||||
OSALIGNSIMD(float) newTriBuffer[4 * 4];
|
||||
TRIANGLE_WORK_DESC newWorkDesc = workDesc;
|
||||
|
|
@ -1667,13 +1671,13 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
|
|||
calcBoundingBoxInt(vXai, vYai, bboxA);
|
||||
|
||||
if (!(bboxA.xmin > macroBoxRight ||
|
||||
bboxA.xmin > state.scissorInFixedPoint.xmax ||
|
||||
bboxA.xmin > scissorInFixedPoint.xmax ||
|
||||
bboxA.xmax - 1 < macroBoxLeft ||
|
||||
bboxA.xmax - 1 < state.scissorInFixedPoint.xmin ||
|
||||
bboxA.xmax - 1 < scissorInFixedPoint.xmin ||
|
||||
bboxA.ymin > macroBoxBottom ||
|
||||
bboxA.ymin > state.scissorInFixedPoint.ymax ||
|
||||
bboxA.ymin > scissorInFixedPoint.ymax ||
|
||||
bboxA.ymax - 1 < macroBoxTop ||
|
||||
bboxA.ymax - 1 < state.scissorInFixedPoint.ymin)) {
|
||||
bboxA.ymax - 1 < scissorInFixedPoint.ymin)) {
|
||||
// rasterize triangle
|
||||
pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
|
||||
}
|
||||
|
|
@ -1740,13 +1744,13 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
|
|||
calcBoundingBoxInt(vXai, vYai, bboxA);
|
||||
|
||||
if (!(bboxA.xmin > macroBoxRight ||
|
||||
bboxA.xmin > state.scissorInFixedPoint.xmax ||
|
||||
bboxA.xmin > scissorInFixedPoint.xmax ||
|
||||
bboxA.xmax - 1 < macroBoxLeft ||
|
||||
bboxA.xmax - 1 < state.scissorInFixedPoint.xmin ||
|
||||
bboxA.xmax - 1 < scissorInFixedPoint.xmin ||
|
||||
bboxA.ymin > macroBoxBottom ||
|
||||
bboxA.ymin > state.scissorInFixedPoint.ymax ||
|
||||
bboxA.ymin > scissorInFixedPoint.ymax ||
|
||||
bboxA.ymax - 1 < macroBoxTop ||
|
||||
bboxA.ymax - 1 < state.scissorInFixedPoint.ymin)) {
|
||||
bboxA.ymax - 1 < scissorInFixedPoint.ymin)) {
|
||||
// rasterize triangle
|
||||
pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue