swr/rast: Fix read-back of viewport array index

Binner/clipper read viewport array index from the vertex header as needed.
Move viewport state to BACKEND_STATE.

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
Tim Rowley 2017-06-09 18:37:27 -05:00
parent 9b448da60f
commit a6237e4b7f
10 changed files with 182 additions and 117 deletions

View file

@ -680,7 +680,7 @@ void SwrSetBlendFunc(
// update guardband multipliers for the viewport
void updateGuardbands(API_STATE *pState)
{
uint32_t numGbs = pState->backendState.readRenderTargetArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
uint32_t numGbs = pState->backendState.readViewportArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
for(uint32_t i = 0; i < numGbs; ++i)
{
@ -736,7 +736,7 @@ void SwrSetScissorRects(
void SetupMacroTileScissors(DRAW_CONTEXT *pDC)
{
API_STATE *pState = &pDC->pState->state;
uint32_t numScissors = pState->gsState.emitsViewportArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
uint32_t numScissors = pState->backendState.readViewportArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
pState->scissorsTileAligned = true;
for (uint32_t index = 0; index < numScissors; ++index)

View file

@ -434,8 +434,7 @@ void BinTriangles(
uint32_t workerId,
simdvector tri[3],
uint32_t triMask,
simdscalari primID,
simdscalari viewportIdx)
simdscalari primID)
{
SWR_CONTEXT *pContext = pDC->pContext;
@ -451,6 +450,21 @@ void BinTriangles(
simdscalar vRecipW1 = _simd_set1_ps(1.0f);
simdscalar vRecipW2 = _simd_set1_ps(1.0f);
// Read viewport array index if needed
simdscalari viewportIdx = _simd_set1_epi32(0);
if (state.backendState.readViewportArrayIndex)
{
simdvector vpiAttrib[3];
pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
// OOB indices => forced to zero.
simdscalari vpai = _simd_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
vpai = _simd_max_epi32(_simd_setzero_si(), vpai);
simdscalari vNumViewports = _simd_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
simdscalari vClearMask = _simd_cmplt_epi32(vpai, vNumViewports);
viewportIdx = _simd_and_si(vClearMask, vpai);
}
if (feState.vpTransformDisable)
{
// RHW is passed in directly when VP transform is disabled
@ -478,7 +492,7 @@ void BinTriangles(
tri[2].v[2] = _simd_mul_ps(tri[2].v[2], vRecipW2);
// Viewport transform to screen space coords
if (state.gsState.emitsViewportArrayIndex)
if (state.backendState.readViewportArrayIndex)
{
viewportTransform<3>(tri, state.vpMatrices, viewportIdx);
}
@ -661,7 +675,7 @@ void BinTriangles(
// Gather the AOS effective scissor rects based on the per-prim VP index.
/// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
simdscalari scisXmin, scisYmin, scisXmax, scisYmax;
if (state.gsState.emitsViewportArrayIndex)
if (state.backendState.readViewportArrayIndex)
{
GatherScissors<KNOB_SIMD_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
scisXmin, scisYmin, scisXmax, scisYmax);
@ -863,8 +877,7 @@ void SIMDAPI BinTriangles_simd16(
uint32_t workerId,
simd16vector tri[3],
uint32_t triMask,
simd16scalari primID,
simd16scalari viewportIdx)
simd16scalari primID)
{
SWR_CONTEXT *pContext = pDC->pContext;
@ -880,6 +893,20 @@ void SIMDAPI BinTriangles_simd16(
simd16scalar vRecipW0 = _simd16_set1_ps(1.0f);
simd16scalar vRecipW1 = _simd16_set1_ps(1.0f);
simd16scalar vRecipW2 = _simd16_set1_ps(1.0f);
simd16scalari viewportIdx = _simd16_set1_epi32(0);
if (state.backendState.readViewportArrayIndex)
{
simd16vector vpiAttrib[3];
pa.Assemble_simd16(VERTEX_SGV_SLOT, vpiAttrib);
// OOB indices => forced to zero.
simd16scalari vpai = _simd16_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
vpai = _simd16_max_epi32(_simd16_setzero_si(), vpai);
simd16scalari vNumViewports = _simd16_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
simd16scalari vClearMask = _simd16_cmplt_epi32(vpai, vNumViewports);
viewportIdx = _simd16_and_si(vClearMask, vpai);
}
if (feState.vpTransformDisable)
{
@ -908,7 +935,7 @@ void SIMDAPI BinTriangles_simd16(
tri[2].v[2] = _simd16_mul_ps(tri[2].v[2], vRecipW2);
// Viewport transform to screen space coords
if (state.gsState.emitsViewportArrayIndex)
if (state.backendState.readViewportArrayIndex)
{
viewportTransform<3>(tri, state.vpMatrices, viewportIdx);
}
@ -1101,7 +1128,7 @@ void SIMDAPI BinTriangles_simd16(
/// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
simd16scalari scisXmin, scisYmin, scisXmax, scisYmax;
if (state.gsState.emitsViewportArrayIndex)
if (state.backendState.readViewportArrayIndex)
{
GatherScissors_simd16<KNOB_SIMD16_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
scisXmin, scisYmin, scisXmax, scisYmax);
@ -1524,7 +1551,7 @@ void BinPostSetupPoints(
// Gather the AOS effective scissor rects based on the per-prim VP index.
/// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
simdscalari scisXmin, scisYmin, scisXmax, scisYmax;
if (state.gsState.emitsViewportArrayIndex)
if (state.backendState.readViewportArrayIndex)
{
GatherScissors<KNOB_SIMD_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
scisXmin, scisYmin, scisXmax, scisYmax);
@ -1672,8 +1699,7 @@ void BinPoints(
uint32_t workerId,
simdvector prim[3],
uint32_t primMask,
simdscalari primID,
simdscalari viewportIdx)
simdscalari primID)
{
simdvector& primVerts = prim[0];
@ -1681,6 +1707,21 @@ void BinPoints(
const SWR_FRONTEND_STATE& feState = state.frontendState;
const SWR_RASTSTATE& rastState = state.rastState;
// Read back viewport index if required
simdscalari viewportIdx = _simd_set1_epi32(0);
if (state.backendState.readViewportArrayIndex)
{
simdvector vpiAttrib[1];
pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
simdscalari vpai = _simd_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
// OOB indices => forced to zero.
vpai = _simd_max_epi32(_simd_setzero_si(), vpai);
simdscalari vNumViewports = _simd_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
simdscalari vClearMask = _simd_cmplt_epi32(vpai, vNumViewports);
viewportIdx = _simd_and_si(vClearMask, vpai);
}
if (!feState.vpTransformDisable)
{
// perspective divide
@ -1690,7 +1731,7 @@ void BinPoints(
primVerts.z = _simd_mul_ps(primVerts.z, vRecipW0);
// viewport transform to screen coords
if (state.gsState.emitsViewportArrayIndex)
if (state.backendState.readViewportArrayIndex)
{
viewportTransform<1>(&primVerts, state.vpMatrices, viewportIdx);
}
@ -1898,7 +1939,7 @@ void BinPostSetupPoints_simd16(
// Gather the AOS effective scissor rects based on the per-prim VP index.
/// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
simd16scalari scisXmin, scisYmin, scisXmax, scisYmax;
if (state.gsState.emitsViewportArrayIndex)
if (state.backendState.readViewportArrayIndex)
{
GatherScissors_simd16<KNOB_SIMD16_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
scisXmin, scisYmin, scisXmax, scisYmax);
@ -2040,8 +2081,7 @@ void SIMDAPI BinPoints_simd16(
uint32_t workerId,
simd16vector prim[3],
uint32_t primMask,
simd16scalari primID,
simd16scalari viewportIdx)
simd16scalari primID)
{
simd16vector& primVerts = prim[0];
@ -2049,6 +2089,21 @@ void SIMDAPI BinPoints_simd16(
const SWR_FRONTEND_STATE& feState = state.frontendState;
const SWR_RASTSTATE& rastState = state.rastState;
// Read back viewport index if required
simd16scalari viewportIdx = _simd16_set1_epi32(0);
if (state.backendState.readViewportArrayIndex)
{
simd16vector vpiAttrib[1];
pa.Assemble_simd16(VERTEX_SGV_SLOT, vpiAttrib);
// OOB indices => forced to zero.
simd16scalari vpai = _simd16_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
vpai = _simd16_max_epi32(_simd16_setzero_si(), vpai)
simd16scalari vNumViewports = _simd16_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
simd16scalari vClearMask = _simd16_cmplt_epi32(vpai, vNumViewports);
viewportIdx = _simd16_and_si(vClearMask, vpai);
}
if (!feState.vpTransformDisable)
{
// perspective divide
@ -2059,7 +2114,7 @@ void SIMDAPI BinPoints_simd16(
primVerts.z = _simd16_mul_ps(primVerts.z, vRecipW0);
// viewport transform to screen coords
if (state.gsState.emitsViewportArrayIndex)
if (state.backendState.readViewportArrayIndex)
{
viewportTransform<1>(&primVerts, state.vpMatrices, viewportIdx);
}
@ -2165,7 +2220,7 @@ void BinPostSetupLines(
// Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
simdscalari scisXmin, scisYmin, scisXmax, scisYmax;
if (state.gsState.emitsViewportArrayIndex)
if (state.backendState.readViewportArrayIndex)
{
GatherScissors<KNOB_SIMD_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
scisXmin, scisYmin, scisXmax, scisYmax);
@ -2370,7 +2425,7 @@ void BinPostSetupLines_simd16(
// Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
simd16scalari scisXmin, scisYmin, scisXmax, scisYmax;
if (state.gsState.emitsViewportArrayIndex)
if (state.backendState.readViewportArrayIndex)
{
GatherScissors_simd16<KNOB_SIMD16_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
scisXmin, scisYmin, scisXmax, scisYmax);
@ -2533,8 +2588,7 @@ void BinLines(
uint32_t workerId,
simdvector prim[],
uint32_t primMask,
simdscalari primID,
simdscalari viewportIdx)
simdscalari primID)
{
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = state.rastState;
@ -2542,6 +2596,20 @@ void BinLines(
simdscalar vRecipW[2] = { _simd_set1_ps(1.0f), _simd_set1_ps(1.0f) };
simdscalari viewportIdx = _simd_set1_epi32(0);
if (state.backendState.readViewportArrayIndex)
{
simdvector vpiAttrib[2];
pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
simdscalari vpai = _simd_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
vpai = _simd_max_epi32(_simd_setzero_si(), vpai);
// OOB indices => forced to zero.
simdscalari vNumViewports = _simd_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
simdscalari vClearMask = _simd_cmplt_epi32(vpai, vNumViewports);
viewportIdx = _simd_and_si(vClearMask, vpai);
}
if (!feState.vpTransformDisable)
{
// perspective divide
@ -2558,7 +2626,7 @@ void BinLines(
prim[1].v[2] = _simd_mul_ps(prim[1].v[2], vRecipW[1]);
// viewport transform to screen coords
if (state.gsState.emitsViewportArrayIndex)
if (state.backendState.readViewportArrayIndex)
{
viewportTransform<2>(prim, state.vpMatrices, viewportIdx);
}
@ -2594,8 +2662,7 @@ void SIMDAPI BinLines_simd16(
uint32_t workerId,
simd16vector prim[3],
uint32_t primMask,
simd16scalari primID,
simd16scalari viewportIdx)
simd16scalari primID)
{
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = state.rastState;
@ -2603,6 +2670,20 @@ void SIMDAPI BinLines_simd16(
simd16scalar vRecipW[2] = { _simd16_set1_ps(1.0f), _simd16_set1_ps(1.0f) };
simd16scalari viewportIdx = _simd16_set1_epi32(0);
if (state.backendState.readViewportArrayIndex)
{
simd16vector vpiAttrib[2];
pa.Assemble_simd16(VERTEX_SGV_SLOT, vpiAttrib);
// OOB indices => forced to zero.
simd16scalari vpai = _simd16_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
vpai = _simd16_max_epi32(_simd16_setzero_si(), vpai);
simd16scalari vNumViewports = _simd16_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
simd16scalari vClearMask = _simd16_cmplt_epi32(vpai, vNumViewports);
viewportIdx = _simd16_and_si(vClearMask, vpai);
}
if (!feState.vpTransformDisable)
{
// perspective divide
@ -2619,7 +2700,7 @@ void SIMDAPI BinLines_simd16(
prim[1].v[2] = _simd16_mul_ps(prim[1].v[2], vRecipW[1]);
// viewport transform to screen coords
if (state.gsState.emitsViewportArrayIndex)
if (state.backendState.readViewportArrayIndex)
{
viewportTransform<2>(prim, state.vpMatrices, viewportIdx);
}

View file

@ -160,35 +160,35 @@ int ClipTriToPlane( const float *pInPts, int numInPts,
return i;
}
void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipTriangles, pDC->drawId);
Clipper<3> clipper(workerId, pDC);
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
clipper.ExecuteStage(pa, prims, primMask, primId);
AR_END(FEClipTriangles, 1);
}
void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipLines, pDC->drawId);
Clipper<2> clipper(workerId, pDC);
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
clipper.ExecuteStage(pa, prims, primMask, primId);
AR_END(FEClipLines, 1);
}
void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipPoints, pDC->drawId);
Clipper<1> clipper(workerId, pDC);
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
clipper.ExecuteStage(pa, prims, primMask, primId);
AR_END(FEClipPoints, 1);
}
#if USE_SIMD16_FRONTEND
void SIMDAPI ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx)
void SIMDAPI ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipTriangles, pDC->drawId);
@ -198,12 +198,12 @@ void SIMDAPI ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t work
Clipper<VERTS_PER_PRIM> clipper(workerId, pDC);
pa.useAlternateOffset = false;
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
clipper.ExecuteStage(pa, prims, primMask, primId);
AR_END(FEClipTriangles, 1);
}
void SIMDAPI ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx)
void SIMDAPI ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipLines, pDC->drawId);
@ -213,12 +213,12 @@ void SIMDAPI ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId
Clipper<VERTS_PER_PRIM> clipper(workerId, pDC);
pa.useAlternateOffset = false;
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
clipper.ExecuteStage(pa, prims, primMask, primId);
AR_END(FEClipLines, 1);
}
void SIMDAPI ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx)
void SIMDAPI ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipPoints, pDC->drawId);
@ -228,7 +228,7 @@ void SIMDAPI ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerI
Clipper<VERTS_PER_PRIM> clipper(workerId, pDC);
pa.useAlternateOffset = false;
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
clipper.ExecuteStage(pa, prims, primMask, primId);
AR_END(FEClipPoints, 1);
}

View file

@ -459,7 +459,7 @@ public:
#endif
// clip SIMD primitives
void ClipSimd(const simdscalar& vPrimMask, const simdscalar& vClipMask, PA_STATE& pa, const simdscalari& vPrimId, const simdscalari& vViewportIdx)
void ClipSimd(const simdscalar& vPrimMask, const simdscalar& vClipMask, PA_STATE& pa, const simdscalari& vPrimId)
{
// input/output vertex store for clipper
simdvertex vertices[7]; // maximum 7 verts generated per triangle
@ -559,7 +559,6 @@ public:
uint32_t* pVertexCount = (uint32_t*)&vNumClippedVerts;
uint32_t* pPrimitiveId = (uint32_t*)&vPrimId;
uint32_t* pViewportIdx = (uint32_t*)&vViewportIdx;
const simdscalari vOffsets = _mm256_set_epi32(
0 * sizeof(simdvertex), // unused lane
@ -697,7 +696,7 @@ public:
}
clipPa.useAlternateOffset = false;
pfnBinFunc(this->pDC, clipPa, this->workerId, attrib, primMaskMap[numEmittedPrims], _simd_set1_epi32(pPrimitiveId[inputPrim]), _simd_set1_epi32(pViewportIdx[inputPrim]));
pfnBinFunc(this->pDC, clipPa, this->workerId, attrib, primMaskMap[numEmittedPrims], _simd_set1_epi32(pPrimitiveId[inputPrim]));
}
#else
simdvector attrib[NumVertsPerPrim];
@ -705,7 +704,7 @@ public:
if (assemble)
{
static const uint32_t primMaskMap[] = { 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff };
pfnBinFunc(this->pDC, clipPa, this->workerId, attrib, primMaskMap[numEmittedPrims], _simd_set1_epi32(pPrimitiveId[inputPrim]), _simd_set1_epi32(pViewportIdx[inputPrim]));
pfnBinFunc(this->pDC, clipPa, this->workerId, attrib, primMaskMap[numEmittedPrims], _simd_set1_epi32(pPrimitiveId[inputPrim]));
}
#endif
} while (clipPa.NextPrim());
@ -717,7 +716,7 @@ public:
}
#if USE_SIMD16_FRONTEND
void ClipSimd(const simd16scalar& vPrimMask, const simd16scalar& vClipMask, PA_STATE& pa, const simd16scalari& vPrimId, const simd16scalari& vViewportIdx)
void ClipSimd(const simd16scalar& vPrimMask, const simd16scalar& vClipMask, PA_STATE& pa, const simd16scalari& vPrimId)
{
// input/output vertex store for clipper
simd16vertex vertices[7]; // maximum 7 verts generated per triangle
@ -817,7 +816,6 @@ public:
uint32_t* pVertexCount = (uint32_t*)&vNumClippedVerts;
uint32_t* pPrimitiveId = (uint32_t*)&vPrimId;
uint32_t* pViewportIdx = (uint32_t*)&vViewportIdx;
const simdscalari vOffsets = _simd_set_epi32(
0 * sizeof(simd16vertex), // unused lane
@ -928,7 +926,7 @@ public:
static const uint32_t primMaskMap[] = { 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff };
clipPa.useAlternateOffset = false;
pfnBinFunc(this->pDC, clipPa, this->workerId, attrib, primMaskMap[numEmittedPrims], _simd16_set1_epi32(pPrimitiveId[inputPrim]), _simd16_set1_epi32(pViewportIdx[inputPrim]));
pfnBinFunc(this->pDC, clipPa, this->workerId, attrib, primMaskMap[numEmittedPrims], _simd16_set1_epi32(pPrimitiveId[inputPrim]));
}
} while (clipPa.NextPrim());
@ -945,7 +943,7 @@ public:
#endif
// execute the clipper stage
void ExecuteStage(PA_STATE& pa, simdvector prim[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
void ExecuteStage(PA_STATE& pa, simdvector prim[], uint32_t primMask, simdscalari primId)
{
SWR_ASSERT(this->pDC != nullptr);
SWR_CONTEXT* pContext = this->pDC->pContext;
@ -973,6 +971,20 @@ public:
// update clipper invocations pipeline stat
uint32_t numInvoc = _mm_popcnt_u32(primMask);
UPDATE_STAT_FE(CInvocations, numInvoc);
// Read back viewport index if required
simdscalari viewportIdx = _simd_set1_epi32(0);
if (state.backendState.readViewportArrayIndex)
{
simdvector vpiAttrib[NumVertsPerPrim];
pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
simdscalari vpai = _simd_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
// OOB indices => forced to zero.
simdscalari vNumViewports = _simd_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
simdscalari vClearMask = _simd_cmplt_epi32(vpai, vNumViewports);
viewportIdx = _simd_and_si(vClearMask, vpai);
}
ComputeClipCodes(prim, viewportIdx);
@ -1001,7 +1013,7 @@ public:
AR_BEGIN(FEGuardbandClip, pa.pDC->drawId);
// we have to clip tris, execute the clipper, which will also
// call the binner
ClipSimd(vMask(primMask), vMask(clipMask), pa, primId, viewportIdx);
ClipSimd(vMask(primMask), vMask(clipMask), pa, primId);
AR_END(FEGuardbandClip, 1);
}
else if (validMask)
@ -1010,12 +1022,12 @@ public:
UPDATE_STAT_FE(CPrimitives, _mm_popcnt_u32(validMask));
// forward valid prims directly to binner
pfnBinner(this->pDC, pa, this->workerId, prim, validMask, primId, viewportIdx);
pfnBinner(this->pDC, pa, this->workerId, prim, validMask, primId);
}
}
#if USE_SIMD16_FRONTEND
void ExecuteStage(PA_STATE& pa, simd16vector prim[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx)
void ExecuteStage(PA_STATE& pa, simd16vector prim[], uint32_t primMask, simd16scalari primId)
{
SWR_ASSERT(pa.pDC != nullptr);
SWR_CONTEXT* pContext = pa.pDC->pContext;
@ -1043,6 +1055,19 @@ public:
uint32_t numInvoc = _mm_popcnt_u32(primMask);
UPDATE_STAT_FE(CInvocations, numInvoc);
// Read back viewport index if required
simd16scalari viewportIdx = _simd16_set1_epi32(0);
if (state.backendState.readViewportArrayIndex)
{
simd16vector vpiAttrib[NumVertsPerPrim];
pa.Assemble_simd16(VERTEX_SGV_SLOT, vpiAttrib);
// OOB indices => forced to zero.
simd16scalari vpai = _simd16_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
simd16scalari vNumViewports = _simd16_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
simd16scalari vClearMask = _simd16_cmplt_epi32(vpai, vNumViewports);
viewportIdx = _simd16_and_si(vClearMask, vpai);
}
ComputeClipCodes(prim, viewportIdx);
// cull prims with NAN coords
@ -1070,7 +1095,7 @@ public:
AR_BEGIN(FEGuardbandClip, pa.pDC->drawId);
// we have to clip tris, execute the clipper, which will also
// call the binner
ClipSimd(vMask16(primMask), vMask16(clipMask), pa, primId, viewportIdx);
ClipSimd(vMask16(primMask), vMask16(clipMask), pa, primId);
AR_END(FEGuardbandClip, 1);
}
else if (validMask)
@ -1079,7 +1104,7 @@ public:
UPDATE_STAT_FE(CPrimitives, _mm_popcnt_u32(validMask));
// forward valid prims directly to binner
pfnBinner(this->pDC, pa, this->workerId, prim, validMask, primId, viewportIdx);
pfnBinner(this->pDC, pa, this->workerId, prim, validMask, primId);
}
}
@ -1854,12 +1879,12 @@ private:
// pipeline stage functions
void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx);
void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx);
void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx);
void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId);
void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId);
void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId);
#if USE_SIMD16_FRONTEND
void SIMDAPI ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx);
void SIMDAPI ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx);
void SIMDAPI ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx);
void SIMDAPI ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId);
void SIMDAPI ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId);
void SIMDAPI ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId);
#endif

View file

@ -214,12 +214,12 @@ struct PA_STATE;
// function signature for pipeline stages that execute after primitive assembly
typedef void(*PFN_PROCESS_PRIMS)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[],
uint32_t primMask, simdscalari primID, simdscalari viewportIdx);
uint32_t primMask, simdscalari primID);
#if ENABLE_AVX512_SIMD16
// function signature for pipeline stages that execute after primitive assembly
typedef void(SIMDAPI *PFN_PROCESS_PRIMS_SIMD16)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[],
uint32_t primMask, simd16scalari primID, simd16scalari viewportIdx);
uint32_t primMask, simd16scalari primID);
#endif
OSALIGNLINE(struct) API_STATE

View file

@ -950,48 +950,11 @@ static void GeometryShaderStage(
#if USE_SIMD16_FRONTEND
simd16scalari vPrimId = _simd16_set1_epi32(pPrimitiveId[inputPrim]);
// use viewport array index if GS declares it as an output attribute. Otherwise use index 0.
simd16scalari vViewPortIdx;
if (state.gsState.emitsViewportArrayIndex)
{
simd16vector vpiAttrib[3];
gsPa.Assemble_simd16(VERTEX_SGV_SLOT, vpiAttrib);
// OOB indices => forced to zero.
simd16scalari vpai = _simd16_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
simd16scalari vNumViewports = _simd16_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
simd16scalari vClearMask = _simd16_cmplt_epi32(vpai, vNumViewports);
vViewPortIdx = _simd16_and_si(vClearMask, vpai);
}
else
{
vViewPortIdx = _simd16_set1_epi32(0);
}
gsPa.useAlternateOffset = false;
pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId, vViewPortIdx);
pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId);
#else
simdscalari vPrimId = _simd_set1_epi32(pPrimitiveId[inputPrim]);
// use viewport array index if GS declares it as an output attribute. Otherwise use index 0.
simdscalari vViewPortIdx;
if (state.gsState.emitsViewportArrayIndex)
{
simdvector vpiAttrib[3];
gsPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
simdscalari vpai = _simd_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
// OOB indices => forced to zero.
simdscalari vNumViewports = _simd_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
simdscalari vClearMask = _simd_cmplt_epi32(vpai, vNumViewports);
vViewPortIdx = _simd_and_si(vClearMask, vpai);
}
else
{
vViewPortIdx = _simd_set1_epi32(0);
}
pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId, vViewPortIdx);
pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId);
#endif
}
}
@ -1340,10 +1303,10 @@ static void TessellationStages(
SWR_ASSERT(pfnClipFunc);
#if USE_SIMD16_FRONTEND
tessPa.useAlternateOffset = false;
pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID, _simd16_set1_epi32(0));
pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID);
#else
pfnClipFunc(pDC, tessPa, workerId, prim,
GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID), _simd_set1_epi32(0));
GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID));
#endif
}
}
@ -1702,7 +1665,7 @@ void ProcessDraw(
SWR_ASSERT(pDC->pState->pfnProcessPrims_simd16);
pa.useAlternateOffset = false;
pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID, _simd16_setzero_si());
pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID);
}
}
}
@ -1864,7 +1827,7 @@ void ProcessDraw(
SWR_ASSERT(pDC->pState->pfnProcessPrims);
pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim,
GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID), _simd_set1_epi32(0));
GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID));
}
}
}

View file

@ -388,10 +388,10 @@ PFN_PROCESS_PRIMS_SIMD16 GetBinTrianglesFunc_simd16(bool IsConservative);
#endif
struct PA_STATE_BASE; // forward decl
void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID, simdscalari viewportIdx);
void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID, simdscalari viewportIdx);
void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID);
void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID);
#if USE_SIMD16_FRONTEND
void SIMDAPI BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari primID, simd16scalari viewportIdx);
void SIMDAPI BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari primID, simd16scalari viewportIdx);
void SIMDAPI BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari primID);
void SIMDAPI BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari primID);
#endif

View file

@ -710,9 +710,6 @@ struct SWR_GS_STATE
// instance count
uint32_t instanceCount;
// geometry shader emits ViewportArrayIndex
bool emitsViewportArrayIndex;
// if true, geometry shader emits a single stream, with separate cut buffer.
// if false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
// to map vertices to streams
@ -1049,6 +1046,7 @@ struct SWR_BACKEND_STATE
SWR_ATTRIB_SWIZZLE swizzleMap[32];
bool readRenderTargetArrayIndex; // Forward render target array index from last FE stage to the backend
bool readViewportArrayIndex; // Read viewport array index from last FE stage during binning
};

View file

@ -547,8 +547,6 @@ BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
pGS->maxNumVerts = info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
pGS->instanceCount = info->properties[TGSI_PROPERTY_GS_INVOCATIONS];
pGS->emitsViewportArrayIndex = info->writes_viewport_index;
// XXX: single stream for now...
pGS->isSingleStream = true;
pGS->singleStreamID = 0;

View file

@ -1755,12 +1755,12 @@ swr_update_derived(struct pipe_context *pipe,
(ctx->rasterizer->flatshade ? ctx->fs->flatConstantMask : 0);
backendState.pointSpriteTexCoordMask = ctx->fs->pointSpriteMask;
if (ctx->gs)
backendState.readRenderTargetArrayIndex =
ctx->gs->info.base.writes_layer;
else
backendState.readRenderTargetArrayIndex =
ctx->vs->info.base.writes_layer;
struct tgsi_shader_info *pLastFE =
ctx->gs ?
&ctx->gs->info.base :
&ctx->vs->info.base;
backendState.readRenderTargetArrayIndex = pLastFE->writes_layer;
backendState.readViewportArrayIndex = pLastFE->writes_viewport_index;
SwrSetBackendState(ctx->swrContext, &backendState);