mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 20:38:06 +02:00
swr: [rasterizer core] conservative rast degenerate handling
Signed-off-by: Tim Rowley <timothy.o.rowley@intel.com>
This commit is contained in:
parent
f01827a469
commit
9f7d99fcfe
5 changed files with 330 additions and 142 deletions
|
|
@ -109,8 +109,6 @@ template <>
|
|||
struct ConservativeRastFETraits<StandardRastT>
|
||||
{
|
||||
typedef std::false_type IsConservativeT;
|
||||
typedef FixedPointTraits<Fixed_16_8> BBoxPrecisionT;
|
||||
typedef FixedPointTraits<Fixed_16_8> ZeroAreaPrecisionT;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
|
@ -119,13 +117,7 @@ template <>
|
|||
struct ConservativeRastFETraits<ConservativeRastT>
|
||||
{
|
||||
typedef std::true_type IsConservativeT;
|
||||
typedef FixedPointTraits<Fixed_16_8> ZeroAreaPrecisionT;
|
||||
|
||||
/// Conservative bounding box needs to expand the area around each vertex by 1/512, which
|
||||
/// is the potential snapping error when going from FP-> 16.8 fixed
|
||||
typedef FixedPointTraits<Fixed_16_9> BBoxPrecisionT;
|
||||
typedef std::integral_constant<uint32_t, 1> BoundingBoxOffsetT;
|
||||
typedef std::integral_constant<uint32_t, 1> BoundingBoxShiftT;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
|||
|
|
@ -1446,7 +1446,7 @@ PFN_FE_WORK_FUNC GetProcessDrawFunc(
|
|||
/// @param pLinkageMap - maps VS attribute slot to PS slot
|
||||
/// @param triIndex - Triangle to process attributes for
|
||||
/// @param pBuffer - Output result
|
||||
template<typename NumVertsT, typename IsSwizzledT, typename HasConstantInterpT>
|
||||
template<typename NumVertsT, typename IsSwizzledT, typename HasConstantInterpT, typename IsDegenerate>
|
||||
INLINE void ProcessAttributes(
|
||||
DRAW_CONTEXT *pDC,
|
||||
PA_STATE&pa,
|
||||
|
|
@ -1456,7 +1456,8 @@ INLINE void ProcessAttributes(
|
|||
{
|
||||
static_assert(NumVertsT::value > 0 && NumVertsT::value <= 3, "Invalid value for NumVertsT");
|
||||
const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
|
||||
LONG constantInterpMask = backendState.constantInterpolationMask;
|
||||
// Conservative Rasterization requires degenerate tris to have constant attribute interpolation
|
||||
LONG constantInterpMask = IsDegenerate::value ? 0xFFFFFFFF : backendState.constantInterpolationMask;
|
||||
const uint32_t provokingVertex = pDC->pState->state.frontendState.topologyProvokingVertex;
|
||||
const PRIMITIVE_TOPOLOGY topo = pDC->pState->state.topology;
|
||||
|
||||
|
|
@ -1483,7 +1484,7 @@ INLINE void ProcessAttributes(
|
|||
__m128 attrib[3]; // triangle attribs (always 4 wide)
|
||||
float* pAttribStart = pBuffer;
|
||||
|
||||
if (HasConstantInterpT::value)
|
||||
if (HasConstantInterpT::value || IsDegenerate::value)
|
||||
{
|
||||
if (_bittest(&constantInterpMask, i))
|
||||
{
|
||||
|
|
@ -1605,9 +1606,9 @@ struct ProcessAttributesChooser
|
|||
}
|
||||
};
|
||||
|
||||
PFN_PROCESS_ATTRIBUTES GetProcessAttributesFunc(uint32_t NumVerts, bool IsSwizzled, bool HasConstantInterp)
|
||||
PFN_PROCESS_ATTRIBUTES GetProcessAttributesFunc(uint32_t NumVerts, bool IsSwizzled, bool HasConstantInterp, bool IsDegenerate = false)
|
||||
{
|
||||
return TemplateArgUnroller<ProcessAttributesChooser>::GetFunc(IntArg<1, 3>{NumVerts}, IsSwizzled, HasConstantInterp);
|
||||
return TemplateArgUnroller<ProcessAttributesChooser>::GetFunc(IntArg<1, 3>{NumVerts}, IsSwizzled, HasConstantInterp, IsDegenerate);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
|
@ -1668,38 +1669,19 @@ INLINE simdscalari fpToFixedPointVertical(const simdscalar vIn)
|
|||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Helper function to set the X,Y coords of a triangle to the
|
||||
/// requested Fixed Point precision from FP32. If the RequestedT
|
||||
/// FixedPointTraits precision is the same as the CurrentT, no extra
|
||||
/// conversions will be done. If they are different, convert from FP32
|
||||
/// to the Requested precision and set vXi, vYi
|
||||
/// @tparam RequestedT: requested FixedPointTraits type
|
||||
/// @tparam CurrentT: FixedPointTraits type of the last
|
||||
template<typename RequestedT, typename CurrentT = FixedPointTraits<Fixed_Uninit>>
|
||||
struct FPToFixedPoint
|
||||
/// requested Fixed Point precision from FP32.
|
||||
/// @param tri: simdvector[3] of FP triangle verts
|
||||
/// @param vXi: fixed point X coords of tri verts
|
||||
/// @param vYi: fixed point Y coords of tri verts
|
||||
INLINE static void FPToFixedPoint(const simdvector * const tri, simdscalari (&vXi)[3], simdscalari (&vYi)[3])
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @param tri: simdvector[3] of FP triangle verts
|
||||
/// @param vXi: fixed point X coords of tri verts
|
||||
/// @param vYi: fixed point Y coords of tri verts
|
||||
INLINE static void Set(const simdvector * const tri, simdscalari (&vXi)[3], simdscalari (&vYi)[3])
|
||||
{
|
||||
vXi[0] = fpToFixedPointVertical<RequestedT>(tri[0].x);
|
||||
vYi[0] = fpToFixedPointVertical<RequestedT>(tri[0].y);
|
||||
vXi[1] = fpToFixedPointVertical<RequestedT>(tri[1].x);
|
||||
vYi[1] = fpToFixedPointVertical<RequestedT>(tri[1].y);
|
||||
vXi[2] = fpToFixedPointVertical<RequestedT>(tri[2].x);
|
||||
vYi[2] = fpToFixedPointVertical<RequestedT>(tri[2].y);
|
||||
};
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief In the case where the RequestedT and CurrentT fixed point
|
||||
/// precisions are the same, do nothing.
|
||||
template<typename RequestedT>
|
||||
struct FPToFixedPoint<RequestedT, RequestedT>
|
||||
{
|
||||
INLINE static void Set(const simdvector * const tri, simdscalari (&vXi)[3], simdscalari (&vYi)[3]){};
|
||||
};
|
||||
vXi[0] = fpToFixedPointVertical(tri[0].x);
|
||||
vYi[0] = fpToFixedPointVertical(tri[0].y);
|
||||
vXi[1] = fpToFixedPointVertical(tri[1].x);
|
||||
vYi[1] = fpToFixedPointVertical(tri[1].y);
|
||||
vXi[2] = fpToFixedPointVertical(tri[2].x);
|
||||
vYi[2] = fpToFixedPointVertical(tri[2].y);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Calculate bounding box for current triangle
|
||||
|
|
@ -1710,20 +1692,8 @@ struct FPToFixedPoint<RequestedT, RequestedT>
|
|||
/// *Note*: expects vX, vY to be in the correct precision for the type
|
||||
/// of rasterization. This avoids unnecessary FP->fixed conversions.
|
||||
template <typename CT>
|
||||
INLINE void calcBoundingBoxIntVertical(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox){}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief FEStandardRastT specialization of calcBoundingBoxIntVertical
|
||||
template <>
|
||||
INLINE void calcBoundingBoxIntVertical<FEStandardRastT>(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox)
|
||||
INLINE void calcBoundingBoxIntVertical(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox)
|
||||
{
|
||||
// FE conservative rast traits
|
||||
typedef FEStandardRastT CT;
|
||||
|
||||
static_assert(std::is_same<CT::BBoxPrecisionT, FixedPointTraits<Fixed_16_8>>::value, "Standard rast BBox calculation needs to be in 16.8 precision");
|
||||
// Update vXi, vYi fixed point precision for BBox calculation if necessary
|
||||
FPToFixedPoint<CT::BBoxPrecisionT, CT::ZeroAreaPrecisionT>::Set(tri, vX, vY);
|
||||
|
||||
simdscalari vMinX = vX[0];
|
||||
vMinX = _simd_min_epi32(vMinX, vX[1]);
|
||||
vMinX = _simd_min_epi32(vMinX, vX[2]);
|
||||
|
|
@ -1755,10 +1725,6 @@ INLINE void calcBoundingBoxIntVertical<FEConservativeRastT>(const simdvector * c
|
|||
// FE conservative rast traits
|
||||
typedef FEConservativeRastT CT;
|
||||
|
||||
static_assert(std::is_same<CT::BBoxPrecisionT, FixedPointTraits<Fixed_16_9>>::value, "Conservative rast BBox calculation needs to be in 16.9 precision");
|
||||
// Update vXi, vYi fixed point precision for BBox calculation if necessary
|
||||
FPToFixedPoint<CT::BBoxPrecisionT, CT::ZeroAreaPrecisionT>::Set(tri, vX, vY);
|
||||
|
||||
simdscalari vMinX = vX[0];
|
||||
vMinX = _simd_min_epi32(vMinX, vX[1]);
|
||||
vMinX = _simd_min_epi32(vMinX, vX[2]);
|
||||
|
|
@ -1776,10 +1742,11 @@ INLINE void calcBoundingBoxIntVertical<FEConservativeRastT>(const simdvector * c
|
|||
vMaxY = _simd_max_epi32(vMaxY, vY[2]);
|
||||
|
||||
/// Bounding box needs to be expanded by 1/512 before snapping to 16.8 for conservative rasterization
|
||||
bbox.left = _simd_srli_epi32(_simd_sub_epi32(vMinX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
|
||||
bbox.right = _simd_srli_epi32(_simd_add_epi32(vMaxX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
|
||||
bbox.top = _simd_srli_epi32(_simd_sub_epi32(vMinY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
|
||||
bbox.bottom = _simd_srli_epi32(_simd_add_epi32(vMaxY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
|
||||
/// expand bbox by 1/256; coverage will be correctly handled in the rasterizer.
|
||||
bbox.left = _simd_sub_epi32(vMinX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
|
||||
bbox.right = _simd_add_epi32(vMaxX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
|
||||
bbox.top = _simd_sub_epi32(vMinY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
|
||||
bbox.bottom = _simd_add_epi32(vMaxY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
|
@ -1808,10 +1775,6 @@ void BinTriangles(
|
|||
const SWR_GS_STATE& gsState = state.gsState;
|
||||
MacroTileMgr *pTileMgr = pDC->pTileMgr;
|
||||
|
||||
// Select attribute processor
|
||||
PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(3,
|
||||
state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
|
||||
|
||||
|
||||
simdscalar vRecipW0 = _simd_set1_ps(1.0f);
|
||||
simdscalar vRecipW1 = _simd_set1_ps(1.0f);
|
||||
|
|
@ -1852,8 +1815,8 @@ void BinTriangles(
|
|||
tri[2].y = _simd_add_ps(tri[2].y, offset);
|
||||
|
||||
simdscalari vXi[3], vYi[3];
|
||||
// Set vXi, vYi to fixed point precision required for degenerate triangle check
|
||||
FPToFixedPoint<typename CT::ZeroAreaPrecisionT>::Set(tri, vXi, vYi);
|
||||
// Set vXi, vYi to required fixed point precision
|
||||
FPToFixedPoint(tri, vXi, vYi);
|
||||
|
||||
// triangle setup
|
||||
simdscalari vAi[3], vBi[3];
|
||||
|
|
@ -1863,8 +1826,6 @@ void BinTriangles(
|
|||
simdscalari vDet[2];
|
||||
calcDeterminantIntVertical(vAi, vBi, vDet);
|
||||
|
||||
/// todo: handle degen tri's for Conservative Rast.
|
||||
|
||||
// cull zero area
|
||||
int maskLo = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpeq_epi64(vDet[0], _simd_setzero_si())));
|
||||
int maskHi = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpeq_epi64(vDet[1], _simd_setzero_si())));
|
||||
|
|
@ -1872,11 +1833,15 @@ void BinTriangles(
|
|||
int cullZeroAreaMask = maskLo | (maskHi << (KNOB_SIMD_WIDTH / 2));
|
||||
|
||||
uint32_t origTriMask = triMask;
|
||||
triMask &= ~cullZeroAreaMask;
|
||||
// don't cull degenerate triangles if we're conservatively rasterizing
|
||||
if(!CT::IsConservativeT::value)
|
||||
{
|
||||
triMask &= ~cullZeroAreaMask;
|
||||
}
|
||||
|
||||
// determine front winding tris
|
||||
// CW +det
|
||||
// CCW -det
|
||||
// CCW det <= 0; 0 area triangles are marked as backfacing, which is required behavior for conservative rast
|
||||
maskLo = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpgt_epi64(vDet[0], _simd_setzero_si())));
|
||||
maskHi = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpgt_epi64(vDet[1], _simd_setzero_si())));
|
||||
int cwTriMask = maskLo | (maskHi << (KNOB_SIMD_WIDTH /2) );
|
||||
|
|
@ -1898,6 +1863,7 @@ void BinTriangles(
|
|||
case SWR_CULLMODE_BOTH: cullTris = 0xffffffff; break;
|
||||
case SWR_CULLMODE_NONE: cullTris = 0x0; break;
|
||||
case SWR_CULLMODE_FRONT: cullTris = frontWindingTris; break;
|
||||
// 0 area triangles are marked as backfacing, which is required behavior for conservative rast
|
||||
case SWR_CULLMODE_BACK: cullTris = ~frontWindingTris; break;
|
||||
default: SWR_ASSERT(false, "Invalid cull mode: %d", rastState.cullMode); cullTris = 0x0; break;
|
||||
}
|
||||
|
|
@ -1916,9 +1882,53 @@ void BinTriangles(
|
|||
DWORD triIndex = 0;
|
||||
// for center sample pattern, all samples are at pixel center; calculate coverage
|
||||
// once at center and broadcast the results in the backend
|
||||
uint32_t sampleCount = (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN) ? rastState.sampleCount : SWR_MULTISAMPLE_1X;
|
||||
PFN_WORK_FUNC pfnWork = GetRasterizerFunc(sampleCount, (rastState.conservativeRast > 0),
|
||||
pDC->pState->state.psState.inputCoverage, (rastState.scissorEnable > 0));
|
||||
const SWR_MULTISAMPLE_COUNT sampleCount = (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN) ? rastState.sampleCount : SWR_MULTISAMPLE_1X;
|
||||
uint32_t edgeEnable;
|
||||
PFN_WORK_FUNC pfnWork;
|
||||
if(CT::IsConservativeT::value)
|
||||
{
|
||||
// determine which edges of the degenerate tri, if any, are valid to rasterize.
|
||||
// used to call the appropriate templated rasterizer function
|
||||
if(cullZeroAreaMask > 0)
|
||||
{
|
||||
// e0 = v1-v0
|
||||
simdscalari x0x1Mask = _simd_cmpeq_epi32(vXi[0], vXi[1]);
|
||||
simdscalari y0y1Mask = _simd_cmpeq_epi32(vYi[0], vYi[1]);
|
||||
uint32_t e0Mask = _simd_movemask_ps(_simd_castsi_ps(_simd_and_si(x0x1Mask, y0y1Mask)));
|
||||
|
||||
// e1 = v2-v1
|
||||
simdscalari x1x2Mask = _simd_cmpeq_epi32(vXi[1], vXi[2]);
|
||||
simdscalari y1y2Mask = _simd_cmpeq_epi32(vYi[1], vYi[2]);
|
||||
uint32_t e1Mask = _simd_movemask_ps(_simd_castsi_ps(_simd_and_si(x1x2Mask, y1y2Mask)));
|
||||
|
||||
// e2 = v0-v2
|
||||
// if v0 == v1 & v1 == v2, v0 == v2
|
||||
uint32_t e2Mask = e0Mask & e1Mask;
|
||||
SWR_ASSERT(KNOB_SIMD_WIDTH == 8, "Need to update degenerate mask code for avx512");
|
||||
|
||||
// edge order: e0 = v0v1, e1 = v1v2, e2 = v0v2
|
||||
// 32 bit binary: 0000 0000 0010 0100 1001 0010 0100 1001
|
||||
e0Mask = pdep_u32(e0Mask, 0x00249249);
|
||||
// 32 bit binary: 0000 0000 0100 1001 0010 0100 1001 0010
|
||||
e1Mask = pdep_u32(e1Mask, 0x00492492);
|
||||
// 32 bit binary: 0000 0000 1001 0010 0100 1001 0010 0100
|
||||
e2Mask = pdep_u32(e2Mask, 0x00924924);
|
||||
|
||||
edgeEnable = (0x00FFFFFF & (~(e0Mask | e1Mask | e2Mask)));
|
||||
}
|
||||
else
|
||||
{
|
||||
edgeEnable = 0x00FFFFFF;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// degenerate triangles won't be sent to rasterizer; just enable all edges
|
||||
pfnWork = GetRasterizerFunc(sampleCount, (rastState.conservativeRast > 0),
|
||||
(SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, ALL_EDGES_VALID,
|
||||
(rastState.scissorEnable > 0));
|
||||
}
|
||||
|
||||
if (!triMask)
|
||||
{
|
||||
goto endBinTriangles;
|
||||
|
|
@ -1969,6 +1979,16 @@ void BinTriangles(
|
|||
bbox.right = _simd_min_epi32(_simd_sub_epi32(bbox.right, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.right));
|
||||
bbox.bottom = _simd_min_epi32(_simd_sub_epi32(bbox.bottom, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.bottom));
|
||||
|
||||
if(CT::IsConservativeT::value)
|
||||
{
|
||||
// in the case where a degenerate triangle is on a scissor edge, we need to make sure the primitive bbox has
|
||||
// some area. Bump the right/bottom edges out
|
||||
simdscalari topEqualsBottom = _simd_cmpeq_epi32(bbox.top, bbox.bottom);
|
||||
bbox.bottom = _simd_blendv_epi32(bbox.bottom, _simd_add_epi32(bbox.bottom, _simd_set1_epi32(1)), topEqualsBottom);
|
||||
simdscalari leftEqualsRight = _simd_cmpeq_epi32(bbox.left, bbox.right);
|
||||
bbox.right = _simd_blendv_epi32(bbox.right, _simd_add_epi32(bbox.right, _simd_set1_epi32(1)), leftEqualsRight);
|
||||
}
|
||||
|
||||
// Cull tris completely outside scissor
|
||||
{
|
||||
simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.left, bbox.right);
|
||||
|
|
@ -2026,7 +2046,28 @@ void BinTriangles(
|
|||
|
||||
BE_WORK work;
|
||||
work.type = DRAW;
|
||||
work.pfnWork = pfnWork;
|
||||
|
||||
bool isDegenerate;
|
||||
if(CT::IsConservativeT::value)
|
||||
{
|
||||
// only rasterize valid edges if we have a degenerate primitive
|
||||
int32_t triEdgeEnable = (edgeEnable >> (triIndex * 3)) & ALL_EDGES_VALID;
|
||||
work.pfnWork = GetRasterizerFunc(sampleCount, (rastState.conservativeRast > 0),
|
||||
(SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, triEdgeEnable,
|
||||
(rastState.scissorEnable > 0));
|
||||
|
||||
// Degenerate triangles are required to be constant interpolated
|
||||
isDegenerate = (triEdgeEnable != ALL_EDGES_VALID) ? true : false;
|
||||
}
|
||||
else
|
||||
{
|
||||
isDegenerate = false;
|
||||
work.pfnWork = pfnWork;
|
||||
}
|
||||
|
||||
// Select attribute processor
|
||||
PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(3,
|
||||
state.backendState.swizzleEnable, state.backendState.constantInterpolationMask, isDegenerate);
|
||||
|
||||
TRIANGLE_WORK_DESC &desc = work.desc.tri;
|
||||
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ struct EDGE
|
|||
/// @param vA, vB - A & B coefs for each edge of the triangle (Ax + Bx + C)
|
||||
/// @param vStepQuad0-2 - edge equations evaluated at the UL corners of the 2x2 pixel quad.
|
||||
/// Used to step between quads when sweeping over the raster tile.
|
||||
template<uint32_t NumEdges>
|
||||
template<uint32_t NumEdges, typename EdgeMaskT>
|
||||
INLINE uint64_t rasterizePartialTile(DRAW_CONTEXT *pDC, double startEdges[NumEdges], EDGE *pRastEdges)
|
||||
{
|
||||
uint64_t coverageMask = 0;
|
||||
|
|
@ -120,25 +120,25 @@ INLINE uint64_t rasterizePartialTile(DRAW_CONTEXT *pDC, double startEdges[NumEdg
|
|||
|
||||
// evaluate which pixels in the quad are covered
|
||||
#define EVAL \
|
||||
UnrollerL<0, NumEdges, 1>::step(eval_lambda);
|
||||
UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(eval_lambda);
|
||||
|
||||
// update coverage mask
|
||||
#define UPDATE_MASK(bit) \
|
||||
mask = edgeMask[0]; \
|
||||
UnrollerL<1, NumEdges, 1>::step(update_lambda); \
|
||||
UnrollerLMask<1, NumEdges, 1, EdgeMaskT::value>::step(update_lambda); \
|
||||
coverageMask |= (mask << bit);
|
||||
|
||||
// step in the +x direction to the next quad
|
||||
#define INCX \
|
||||
UnrollerL<0, NumEdges, 1>::step(incx_lambda);
|
||||
UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(incx_lambda);
|
||||
|
||||
// step in the +y direction to the next quad
|
||||
#define INCY \
|
||||
UnrollerL<0, NumEdges, 1>::step(incy_lambda);
|
||||
UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(incy_lambda);
|
||||
|
||||
// step in the -x direction to the next quad
|
||||
#define DECX \
|
||||
UnrollerL<0, NumEdges, 1>::step(decx_lambda);
|
||||
UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(decx_lambda);
|
||||
|
||||
// sweep 2x2 quad back and forth through the raster tile,
|
||||
// computing coverage masks for the entire tile
|
||||
|
|
@ -274,6 +274,17 @@ INLINE void adjustTopLeftRuleIntFix16(const __m128i vA, const __m128i vB, __m256
|
|||
vEdge = _mm256_blendv_pd(vEdgeOut, vEdgeAdjust, gMaskToVecpd[msk | msk2]);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief calculates difference in precision between the result of manh
|
||||
/// calculation and the edge precision, based on compile time trait values
|
||||
template<typename RT>
|
||||
constexpr int64_t ManhToEdgePrecisionAdjust()
|
||||
{
|
||||
static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >= RT::EdgePrecisionT::BitsT::value,
|
||||
"Inadequate precision of result of manh calculation ");
|
||||
return ((RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value) - RT::EdgePrecisionT::BitsT::value);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @struct adjustEdgeConservative
|
||||
/// @brief Primary template definition used for partially specializing
|
||||
|
|
@ -306,15 +317,15 @@ struct adjustEdgeConservative<RT, std::true_type>
|
|||
/// instead of having to test individual pixel corners for conservative coverage
|
||||
INLINE adjustEdgeConservative(const __m128i &vAi, const __m128i &vBi, __m256d &vEdge)
|
||||
{
|
||||
/// Assumes CCW winding order. Subtracting from the evaluated edge equation moves the edge away
|
||||
/// from the pixel center (in the direction of the edge normal A/B)
|
||||
// Assumes CCW winding order. Subtracting from the evaluated edge equation moves the edge away
|
||||
// from the pixel center (in the direction of the edge normal A/B)
|
||||
|
||||
/// edge = Ax + Bx + C - (manh/e)
|
||||
/// manh = manhattan distance = abs(A) + abs(B)
|
||||
/// e = absolute rounding error from snapping from float to fixed point precision
|
||||
// edge = Ax + Bx + C - (manh/e)
|
||||
// manh = manhattan distance = abs(A) + abs(B)
|
||||
// e = absolute rounding error from snapping from float to fixed point precision
|
||||
|
||||
/// 'fixed point' multiply (in double to be avx1 friendly)
|
||||
/// need doubles to hold result of a fixed multiply: 16.8 * 16.9 = 32.17, for example
|
||||
// 'fixed point' multiply (in double to be avx1 friendly)
|
||||
// need doubles to hold result of a fixed multiply: 16.8 * 16.9 = 32.17, for example
|
||||
__m256d vAai = _mm256_cvtepi32_pd(_mm_abs_epi32(vAi)), vBai = _mm256_cvtepi32_pd(_mm_abs_epi32(vBi));
|
||||
__m256d manh = _mm256_add_pd(_mm256_mul_pd(vAai, _mm256_set1_pd(RT::ConservativeEdgeOffsetT::value)),
|
||||
_mm256_mul_pd(vBai, _mm256_set1_pd(RT::ConservativeEdgeOffsetT::value)));
|
||||
|
|
@ -322,15 +333,13 @@ struct adjustEdgeConservative<RT, std::true_type>
|
|||
static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >= RT::EdgePrecisionT::BitsT::value,
|
||||
"Inadequate precision of result of manh calculation ");
|
||||
|
||||
/// rasterizer incoming edge precision is x.16, so we need to get our edge offset into the same precision
|
||||
/// since we're doing fixed math in double format, multiply by multiples of 1/2 instead of a bit shift right
|
||||
manh = _mm256_mul_pd(manh, _mm256_set1_pd(((RT::PrecisionT::BitsT::value +
|
||||
RT::ConservativePrecisionT::BitsT::value) -
|
||||
RT::EdgePrecisionT::BitsT::value) * 0.5));
|
||||
// rasterizer incoming edge precision is x.16, so we need to get our edge offset into the same precision
|
||||
// since we're doing fixed math in double format, multiply by multiples of 1/2 instead of a bit shift right
|
||||
manh = _mm256_mul_pd(manh, _mm256_set1_pd(ManhToEdgePrecisionAdjust<RT>() * 0.5));
|
||||
|
||||
/// move the edge away from the pixel center by the required conservative precision + 1/2 pixel
|
||||
/// this allows the rasterizer to do a single conservative coverage test to see if the primitive
|
||||
/// intersects the pixel at all
|
||||
// move the edge away from the pixel center by the required conservative precision + 1/2 pixel
|
||||
// this allows the rasterizer to do a single conservative coverage test to see if the primitive
|
||||
// intersects the pixel at all
|
||||
vEdge = _mm256_sub_pd(vEdge, manh);
|
||||
};
|
||||
};
|
||||
|
|
@ -346,6 +355,19 @@ struct adjustEdgeConservative<RT, std::false_type>
|
|||
INLINE adjustEdgeConservative(const __m128i &vAi, const __m128i &vBi, __m256d &vEdge){};
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief calculates the distance a degenerate BBox needs to be adjusted
|
||||
/// for conservative rast based on compile time trait values
|
||||
template<typename RT>
|
||||
constexpr int64_t ConservativeScissorOffset()
|
||||
{
|
||||
static_assert(RT::ConservativePrecisionT::BitsT::value - RT::PrecisionT::BitsT::value >= 0, "Rasterizer precision > conservative precision");
|
||||
// if we have a degenerate triangle, we need to compensate for adjusting the degenerate BBox when calculating scissor edges
|
||||
typedef std::integral_constant<int32_t, (RT::ValidEdgeMaskT::value == ALL_EDGES_VALID) ? 0 : 1> DegenerateEdgeOffsetT;
|
||||
// 1/2 pixel edge offset + conservative offset - degenerateTriangle
|
||||
return RT::ConservativeEdgeOffsetT::value - (DegenerateEdgeOffsetT::value << (RT::ConservativePrecisionT::BitsT::value - RT::PrecisionT::BitsT::value));
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Performs calculations to adjust each a scalar edge out
|
||||
/// from the pixel center by 1/2 pixel + uncertainty region in both the x and y
|
||||
|
|
@ -354,13 +376,7 @@ template <typename RT>
|
|||
INLINE void adjustScissorEdge(const double a, const double b, __m256d &vEdge)
|
||||
{
|
||||
int64_t aabs = std::abs(static_cast<int64_t>(a)), babs = std::abs(static_cast<int64_t>(b));
|
||||
|
||||
int64_t manh = ((aabs * RT::ConservativeEdgeOffsetT::value) + (babs * RT::ConservativeEdgeOffsetT::value)) >>
|
||||
((RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value) - RT::EdgePrecisionT::BitsT::value);
|
||||
|
||||
static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >= RT::EdgePrecisionT::BitsT::value,
|
||||
"Inadequate precision of result of manh calculation ");
|
||||
|
||||
int64_t manh = ((aabs * ConservativeScissorOffset<RT>()) + (babs * ConservativeScissorOffset<RT>())) >> ManhToEdgePrecisionAdjust<RT>();
|
||||
vEdge = _mm256_sub_pd(vEdge, _mm256_set1_pd(manh));
|
||||
};
|
||||
|
||||
|
|
@ -371,7 +387,7 @@ INLINE void adjustEdgesFix16(const __m128i &vAi, const __m128i &vBi, __m256d &vE
|
|||
{
|
||||
static_assert(std::is_same<typename RT::EdgePrecisionT, FixedPointTraits<Fixed_X_16>>::value,
|
||||
"Edge equation expected to be in x.16 fixed point");
|
||||
/// need to offset the edge before applying the top-left rule
|
||||
// need to offset the edge before applying the top-left rule
|
||||
adjustEdgeConservative<RT, typename RT::IsConservativeT>(vAi, vBi, vEdge);
|
||||
|
||||
adjustTopLeftRuleIntFix16(vAi, vBi, vEdge);
|
||||
|
|
@ -563,14 +579,13 @@ struct ComputeScissorEdges
|
|||
template <typename RT>
|
||||
struct ComputeScissorEdges<std::true_type, std::true_type, RT>
|
||||
{
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Intersect tri bbox with scissor, compute scissor edge vectors,
|
||||
/// evaluate edge equations and offset them away from pixel center.
|
||||
INLINE ComputeScissorEdges(const BBOX &triBBox, const BBOX &scissorBBox, const int32_t x, const int32_t y,
|
||||
EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7])
|
||||
{
|
||||
/// if conservative rasterizing, triangle bbox intersected with scissor bbox is used
|
||||
// if conservative rasterizing, triangle bbox intersected with scissor bbox is used
|
||||
BBOX scissor;
|
||||
scissor.left = std::max(triBBox.left, scissorBBox.left);
|
||||
scissor.right = std::min(triBBox.right, scissorBBox.right);
|
||||
|
|
@ -593,7 +608,7 @@ struct ComputeScissorEdges<std::true_type, std::true_type, RT>
|
|||
vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.right)) + (rastEdges[5].b * (y - scissor.bottom)));
|
||||
vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.right)) + (rastEdges[6].b * (y - scissor.top)));
|
||||
|
||||
/// if conservative rasterizing, need to bump the scissor edges out by the conservative uncertainty distance, else do nothing
|
||||
// if conservative rasterizing, need to bump the scissor edges out by the conservative uncertainty distance, else do nothing
|
||||
adjustScissorEdge<RT>(rastEdges[3].a, rastEdges[3].b, vEdgeFix16[3]);
|
||||
adjustScissorEdge<RT>(rastEdges[4].a, rastEdges[4].b, vEdgeFix16[4]);
|
||||
adjustScissorEdge<RT>(rastEdges[5].a, rastEdges[5].b, vEdgeFix16[5]);
|
||||
|
|
@ -632,6 +647,81 @@ struct ComputeScissorEdges<std::true_type, std::false_type, RT>
|
|||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Primary function template for TrivialRejectTest. Should
|
||||
/// never be called, but TemplateUnroller instantiates a few unused values,
|
||||
/// so it calls a runtime assert instead of a static_assert.
|
||||
template <typename ValidEdgeMaskT>
|
||||
INLINE bool TrivialRejectTest(const int, const int, const int)
|
||||
{
|
||||
SWR_ASSERT(0, "Primary templated function should never be called");
|
||||
return false;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief E0E1ValidT specialization of TrivialRejectTest. Tests edge 0
|
||||
/// and edge 1 for trivial coverage reject
|
||||
template <>
|
||||
INLINE bool TrivialRejectTest<E0E1ValidT>(const int mask0, const int mask1, const int)
|
||||
{
|
||||
return (!(mask0 && mask1)) ? true : false;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief E0E2ValidT specialization of TrivialRejectTest. Tests edge 0
|
||||
/// and edge 2 for trivial coverage reject
|
||||
template <>
|
||||
INLINE bool TrivialRejectTest<E0E2ValidT>(const int mask0, const int, const int mask2)
|
||||
{
|
||||
return (!(mask0 && mask2)) ? true : false;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief E1E2ValidT specialization of TrivialRejectTest. Tests edge 1
|
||||
/// and edge 2 for trivial coverage reject
|
||||
template <>
|
||||
INLINE bool TrivialRejectTest<E1E2ValidT>(const int, const int mask1, const int mask2)
|
||||
{
|
||||
return (!(mask1 && mask2)) ? true : false;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief AllEdgesValidT specialization of TrivialRejectTest. Tests all
|
||||
/// primitive edges for trivial coverage reject
|
||||
template <>
|
||||
INLINE bool TrivialRejectTest<AllEdgesValidT>(const int mask0, const int mask1, const int mask2)
|
||||
{
|
||||
return (!(mask0 && mask1 && mask2)) ? true : false;;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief NoEdgesValidT specialization of TrivialRejectTest. Degenerate
|
||||
/// point, so return false and rasterize against conservative BBox
|
||||
template <>
|
||||
INLINE bool TrivialRejectTest<NoEdgesValidT>(const int, const int, const int)
|
||||
{
|
||||
return false;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Primary function template for TrivialAcceptTest. Always returns
|
||||
/// false, since it will only be called for degenerate tris, and as such
|
||||
/// will never cover the entire raster tile
|
||||
template <typename ValidEdgeMaskT>
|
||||
INLINE bool TrivialAcceptTest(const int, const int, const int)
|
||||
{
|
||||
return false;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief AllEdgesValidT specialization for TrivialAcceptTest. Test all
|
||||
/// edge masks for a fully covered raster tile
|
||||
template <>
|
||||
INLINE bool TrivialAcceptTest<AllEdgesValidT>(const int mask0, const int mask1, const int mask2)
|
||||
{
|
||||
return ((mask0 & mask1 & mask2) == 0xf);
|
||||
};
|
||||
|
||||
template <typename RT>
|
||||
void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pDesc)
|
||||
{
|
||||
|
|
@ -681,8 +771,8 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
|||
// determinant
|
||||
float det = calcDeterminantInt(vAi, vBi);
|
||||
|
||||
/// Verts in Pixel Coordinate Space at this point
|
||||
/// Det > 0 = CW winding order
|
||||
// Verts in Pixel Coordinate Space at this point
|
||||
// Det > 0 = CW winding order
|
||||
// Convert CW triangles to CCW
|
||||
if (det > 0.0)
|
||||
{
|
||||
|
|
@ -693,28 +783,39 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
|||
det = -det;
|
||||
}
|
||||
|
||||
/// @todo: handle degenerates for ConservativeRast
|
||||
|
||||
__m128 vC;
|
||||
// Finish triangle setup - C edge coef
|
||||
triangleSetupC(vX, vY, vA, vB, vC);
|
||||
|
||||
// compute barycentric i and j
|
||||
// i = (A1x + B1y + C1)/det
|
||||
// j = (A2x + B2y + C2)/det
|
||||
__m128 vDet = _mm_set1_ps(det);
|
||||
__m128 vRecipDet = _mm_div_ps(_mm_set1_ps(1.0f), vDet);//_mm_rcp_ps(vDet);
|
||||
_mm_store_ss(&triDesc.recipDet, vRecipDet);
|
||||
if(RT::ValidEdgeMaskT::value != ALL_EDGES_VALID)
|
||||
{
|
||||
// If we have degenerate edge(s) to rasterize, set I and J coefs
|
||||
// to 0 for constant interpolation of attributes
|
||||
triDesc.I[0] = 0.0f;
|
||||
triDesc.I[1] = 0.0f;
|
||||
triDesc.I[2] = 0.0f;
|
||||
triDesc.J[0] = 0.0f;
|
||||
triDesc.J[1] = 0.0f;
|
||||
triDesc.J[2] = 0.0f;
|
||||
|
||||
// only extract coefs for 2 of the barycentrics; the 3rd can be
|
||||
// determined from the barycentric equation:
|
||||
// i + j + k = 1 <=> k = 1 - j - i
|
||||
_MM_EXTRACT_FLOAT(triDesc.I[0], vA, 1);
|
||||
_MM_EXTRACT_FLOAT(triDesc.I[1], vB, 1);
|
||||
_MM_EXTRACT_FLOAT(triDesc.I[2], vC, 1);
|
||||
_MM_EXTRACT_FLOAT(triDesc.J[0], vA, 2);
|
||||
_MM_EXTRACT_FLOAT(triDesc.J[1], vB, 2);
|
||||
_MM_EXTRACT_FLOAT(triDesc.J[2], vC, 2);
|
||||
// Degenerate triangles have no area
|
||||
triDesc.recipDet = 0.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
// only extract coefs for 2 of the barycentrics; the 3rd can be
|
||||
// determined from the barycentric equation:
|
||||
// i + j + k = 1 <=> k = 1 - j - i
|
||||
_MM_EXTRACT_FLOAT(triDesc.I[0], vA, 1);
|
||||
_MM_EXTRACT_FLOAT(triDesc.I[1], vB, 1);
|
||||
_MM_EXTRACT_FLOAT(triDesc.I[2], vC, 1);
|
||||
_MM_EXTRACT_FLOAT(triDesc.J[0], vA, 2);
|
||||
_MM_EXTRACT_FLOAT(triDesc.J[1], vB, 2);
|
||||
_MM_EXTRACT_FLOAT(triDesc.J[2], vC, 2);
|
||||
|
||||
// compute recipDet, used to calculate barycentric i and j in the backend
|
||||
triDesc.recipDet = 1.0f/det;
|
||||
}
|
||||
|
||||
OSALIGNSIMD(float) oneOverW[4];
|
||||
_mm_store_ps(oneOverW, vRecipW);
|
||||
|
|
@ -764,6 +865,14 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
|||
OSALIGNSIMD(BBOX) bbox;
|
||||
calcBoundingBoxInt(vXi, vYi, bbox);
|
||||
|
||||
if(RT::ValidEdgeMaskT::value != ALL_EDGES_VALID)
|
||||
{
|
||||
// If we're rasterizing a degenerate triangle, expand bounding box to guarantee the BBox is valid
|
||||
bbox.left--; bbox.right++; bbox.top--; bbox.bottom++;
|
||||
SWR_ASSERT(state.scissorInFixedPoint.left >= 0 && state.scissorInFixedPoint.top >= 0,
|
||||
"Conservative rast degenerate handling requires a valid scissor rect");
|
||||
}
|
||||
|
||||
// Intersect with scissor/viewport
|
||||
OSALIGNSIMD(BBOX) intersect;
|
||||
intersect.left = std::max(bbox.left, state.scissorInFixedPoint.left);
|
||||
|
|
@ -941,13 +1050,13 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
|||
for (uint32_t sampleNum = 0; sampleNum < NumRasterSamplesT::value; sampleNum++)
|
||||
{
|
||||
// trivial reject, at least one edge has all 4 corners of raster tile outside
|
||||
bool trivialReject = (!(mask0 && mask1 && mask2)) ? true : false;
|
||||
bool trivialReject = TrivialRejectTest<typename RT::ValidEdgeMaskT>(mask0, mask1, mask2);
|
||||
|
||||
if (!trivialReject)
|
||||
{
|
||||
// trivial accept mask
|
||||
triDesc.coverageMask[sampleNum] = 0xffffffffffffffffULL;
|
||||
if ((mask0 & mask1 & mask2) == 0xf)
|
||||
if (TrivialAcceptTest<typename RT::ValidEdgeMaskT>(mask0, mask1, mask2))
|
||||
{
|
||||
triDesc.anyCoveredSamples = triDesc.coverageMask[sampleNum];
|
||||
// trivial accept, all 4 corners of all 3 edges are negative
|
||||
|
|
@ -991,7 +1100,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
|||
|
||||
// not trivial accept or reject, must rasterize full tile
|
||||
RDTSC_START(BERasterizePartial);
|
||||
triDesc.coverageMask[sampleNum] = rasterizePartialTile<RT::NumEdgesT::value>(pDC, startQuadEdges, rastEdges);
|
||||
triDesc.coverageMask[sampleNum] = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(pDC, startQuadEdges, rastEdges);
|
||||
RDTSC_STOP(BERasterizePartial, 0, 0);
|
||||
|
||||
triDesc.anyCoveredSamples |= triDesc.coverageMask[sampleNum];
|
||||
|
|
@ -1101,7 +1210,7 @@ void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile,
|
|||
// once at center and broadcast the results in the backend
|
||||
uint32_t sampleCount = (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN) ? rastState.sampleCount : SWR_MULTISAMPLE_1X;
|
||||
// conservative rast not supported for points/lines
|
||||
pfnTriRast = GetRasterizerFunc(sampleCount, false, SWR_INPUT_COVERAGE_NONE, (rastState.scissorEnable > 0));
|
||||
pfnTriRast = GetRasterizerFunc(sampleCount, false, SWR_INPUT_COVERAGE_NONE, ALL_EDGES_VALID, (rastState.scissorEnable > 0));
|
||||
|
||||
// overwrite texcoords for point sprites
|
||||
if (isPointSpriteTexCoordEnabled)
|
||||
|
|
@ -1429,7 +1538,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
|
|||
PFN_WORK_FUNC pfnTriRast;
|
||||
uint32_t sampleCount = (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN) ? rastState.sampleCount : SWR_MULTISAMPLE_1X;
|
||||
// conservative rast not supported for points/lines
|
||||
pfnTriRast = GetRasterizerFunc(sampleCount, false, SWR_INPUT_COVERAGE_NONE, (rastState.scissorEnable > 0));
|
||||
pfnTriRast = GetRasterizerFunc(sampleCount, false, SWR_INPUT_COVERAGE_NONE, ALL_EDGES_VALID, (rastState.scissorEnable > 0));
|
||||
|
||||
// make sure this macrotile intersects the triangle
|
||||
__m128i vXai = fpToFixedPoint(vXa);
|
||||
|
|
@ -1541,6 +1650,7 @@ PFN_WORK_FUNC GetRasterizerFunc(
|
|||
uint32_t numSamples,
|
||||
bool IsConservative,
|
||||
uint32_t InputCoverage,
|
||||
uint32_t EdgeEnable,
|
||||
bool RasterizeScissorEdges
|
||||
)
|
||||
{
|
||||
|
|
@ -1548,5 +1658,6 @@ PFN_WORK_FUNC GetRasterizerFunc(
|
|||
IntArg<SWR_MULTISAMPLE_1X,SWR_MULTISAMPLE_TYPE_COUNT-1>{numSamples},
|
||||
IsConservative,
|
||||
IntArg<SWR_INPUT_COVERAGE_NONE, SWR_INPUT_COVERAGE_COUNT-1>{InputCoverage},
|
||||
IntArg<0, VALID_TRI_EDGE_COUNT-1>{EdgeEnable},
|
||||
RasterizeScissorEdges);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,8 +48,28 @@ PFN_WORK_FUNC GetRasterizerFunc(
|
|||
uint32_t numSamples,
|
||||
bool IsConservative,
|
||||
uint32_t InputCoverage,
|
||||
uint32_t EdgeEnable,
|
||||
bool RasterizeScissorEdges);
|
||||
|
||||
enum ValidTriEdges
|
||||
{
|
||||
NO_VALID_EDGES = 0,
|
||||
E0_E1_VALID = 0x3,
|
||||
E0_E2_VALID = 0x5,
|
||||
E1_E2_VALID = 0x6,
|
||||
ALL_EDGES_VALID = 0x7,
|
||||
VALID_TRI_EDGE_COUNT,
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief ValidTriEdges convenience typedefs used for templated function
|
||||
/// specialization supported Fixed Point precisions
|
||||
typedef std::integral_constant<uint32_t, ALL_EDGES_VALID> AllEdgesValidT;
|
||||
typedef std::integral_constant<uint32_t, E0_E1_VALID> E0E1ValidT;
|
||||
typedef std::integral_constant<uint32_t, E0_E2_VALID> E0E2ValidT;
|
||||
typedef std::integral_constant<uint32_t, E1_E2_VALID> E1E2ValidT;
|
||||
typedef std::integral_constant<uint32_t, NO_VALID_EDGES> NoEdgesValidT;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @struct RasterScissorEdgesT
|
||||
/// @brief Primary RasterScissorEdgesT templated struct that holds compile
|
||||
|
|
@ -59,22 +79,26 @@ PFN_WORK_FUNC GetRasterizerFunc(
|
|||
/// 3 triangle edges + 4 scissor edges for coverage.
|
||||
/// @tparam RasterScissorEdgesT: number of multisamples
|
||||
/// @tparam ConservativeT: is this a conservative rasterization
|
||||
template <typename RasterScissorEdgesT, typename ConservativeT>
|
||||
/// @tparam EdgeMaskT: Which edges are valid(not degenerate)
|
||||
template <typename RasterScissorEdgesT, typename ConservativeT, typename EdgeMaskT>
|
||||
struct RasterEdgeTraits
|
||||
{
|
||||
typedef std::true_type RasterizeScissorEdgesT;
|
||||
typedef std::integral_constant<uint32_t, 7> NumEdgesT;
|
||||
typedef std::integral_constant<uint32_t, EdgeMaskT::value> ValidEdgeMaskT;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief specialization of RasterEdgeTraits. If neither scissor rect
|
||||
/// nor conservative rast is enabled, only test 3 triangle edges
|
||||
/// for coverage
|
||||
template <>
|
||||
struct RasterEdgeTraits<std::false_type, std::false_type>
|
||||
template <typename EdgeMaskT>
|
||||
struct RasterEdgeTraits<std::false_type, std::false_type, EdgeMaskT>
|
||||
{
|
||||
typedef std::false_type RasterizeScissorEdgesT;
|
||||
typedef std::integral_constant<uint32_t, 3> NumEdgesT;
|
||||
// no need for degenerate edge masking in non-conservative case; rasterize all triangle edges
|
||||
typedef std::integral_constant<uint32_t, ALL_EDGES_VALID> ValidEdgeMaskT;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
|
@ -86,19 +110,19 @@ struct RasterEdgeTraits<std::false_type, std::false_type>
|
|||
/// @tparam InputCoverageT: what type of input coverage is the PS expecting?
|
||||
/// (only used with conservative rasterization)
|
||||
/// @tparam RasterScissorEdgesT: do we need to rasterize with a scissor?
|
||||
template <typename NumSamplesT, typename ConservativeT, typename InputCoverageT, typename RasterScissorEdgesT>
|
||||
template <typename NumSamplesT, typename ConservativeT, typename InputCoverageT, typename EdgeEnableT, typename RasterScissorEdgesT>
|
||||
struct RasterizerTraits final : public ConservativeRastBETraits<ConservativeT, InputCoverageT>,
|
||||
public RasterEdgeTraits<RasterScissorEdgesT, ConservativeT>
|
||||
public RasterEdgeTraits<RasterScissorEdgesT, ConservativeT, std::integral_constant<uint32_t, EdgeEnableT::value>>
|
||||
{
|
||||
typedef MultisampleTraits<static_cast<SWR_MULTISAMPLE_COUNT>(NumSamplesT::value)> MT;
|
||||
|
||||
|
||||
/// Fixed point precision the rasterizer is using
|
||||
typedef FixedPointTraits<Fixed_16_8> PrecisionT;
|
||||
/// Fixed point precision of the edge tests used during rasterization
|
||||
typedef FixedPointTraits<Fixed_X_16> EdgePrecisionT;
|
||||
|
||||
// If conservative rast is enabled, only need a single sample coverage test, with the result copied to all samples
|
||||
typedef std::integral_constant<int, (ConservativeT::value) ? 1 : MT::numSamples> NumRasterSamplesT;
|
||||
typedef std::integral_constant<int, (ConservativeT::value) ? 1 : MT::numSamples> NumRasterSamplesT;
|
||||
|
||||
static_assert(EdgePrecisionT::BitsT::value >= ConservativeRastBETraits<ConservativeT, InputCoverageT>::ConservativePrecisionT::BitsT::value,
|
||||
"Rasterizer edge fixed point precision < required conservative rast precision");
|
||||
|
|
|
|||
|
|
@ -831,6 +831,26 @@ struct UnrollerL<End, End, Step> {
|
|||
}
|
||||
};
|
||||
|
||||
// helper function to unroll loops, with mask to skip specific iterations
|
||||
template<int Begin, int End, int Step = 1, int Mask = 0x7f>
|
||||
struct UnrollerLMask {
|
||||
template<typename Lambda>
|
||||
INLINE static void step(Lambda& func) {
|
||||
if(Mask & (1 << Begin))
|
||||
{
|
||||
func(Begin);
|
||||
}
|
||||
UnrollerL<Begin + Step, End, Step>::step(func);
|
||||
}
|
||||
};
|
||||
|
||||
template<int End, int Step, int Mask>
|
||||
struct UnrollerLMask<End, End, Step, Mask> {
|
||||
template<typename Lambda>
|
||||
static void step(Lambda& func) {
|
||||
}
|
||||
};
|
||||
|
||||
// general CRC compute
|
||||
INLINE
|
||||
uint32_t ComputeCRC(uint32_t crc, const void *pData, uint32_t size)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue