swr/rasterizer: cleanups for tessellation

This commit introduces small fixes in preparation for tessellation
support.

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
Jan Zielinski 2019-07-24 12:10:27 +02:00
parent c5c05979f7
commit ad9aff5528
2 changed files with 56 additions and 28 deletions

View file

@ -583,8 +583,9 @@ static void StreamOut(
{
if (state.soBuffer[i].pWriteOffset)
{
bool nullTileAccessed = false;
void* pWriteOffset = pDC->pContext->pfnTranslateGfxptrForWrite(GetPrivateState(pDC), soContext.pBuffer[i]->pWriteOffset, &nullTileAccessed);
bool nullTileAccessed = false;
void* pWriteOffset = pDC->pContext->pfnTranslateGfxptrForWrite(
GetPrivateState(pDC), soContext.pBuffer[i]->pWriteOffset, &nullTileAccessed);
*((uint32_t*)pWriteOffset) = soContext.pBuffer[i]->streamOffset * sizeof(uint32_t);
}
@ -786,21 +787,20 @@ void TransposeSOAtoAOS(uint8_t* pDst, uint8_t* pSrc, uint32_t numVerts, uint32_t
{
auto attribGatherX = SIMD_T::mask_i32gather_ps(
SIMD_T::setzero_ps(), (const float*)pSrcBase, vGatherOffsets, vMask);
auto attribGatherY = SIMD_T::mask_i32gather_ps(
SIMD_T::setzero_ps(),
(const float*)(pSrcBase + sizeof(float)),
vGatherOffsets,
vMask);
auto attribGatherZ = SIMD_T::mask_i32gather_ps(
SIMD_T::setzero_ps(),
(const float*)(pSrcBase + sizeof(float) * 2),
vGatherOffsets,
vMask);
auto attribGatherW = SIMD_T::mask_i32gather_ps(
SIMD_T::setzero_ps(),
(const float*)(pSrcBase + sizeof(float) * 3),
vGatherOffsets,
vMask);
auto attribGatherY = SIMD_T::mask_i32gather_ps(SIMD_T::setzero_ps(),
(const float*)(pSrcBase + sizeof(float)),
vGatherOffsets,
vMask);
auto attribGatherZ =
SIMD_T::mask_i32gather_ps(SIMD_T::setzero_ps(),
(const float*)(pSrcBase + sizeof(float) * 2),
vGatherOffsets,
vMask);
auto attribGatherW =
SIMD_T::mask_i32gather_ps(SIMD_T::setzero_ps(),
(const float*)(pSrcBase + sizeof(float) * 3),
vGatherOffsets,
vMask);
SIMD_T::maskstore_ps((float*)pDstBase, viMask, attribGatherX);
SIMD_T::maskstore_ps((float*)(pDstBase + sizeof(Float<SIMD_T>)), viMask, attribGatherY);
@ -1235,10 +1235,12 @@ static INLINE void AllocateGsBuffers(DRAW_CONTEXT* pDC,
struct TessellationThreadLocalData
{
SWR_HS_CONTEXT hsContext;
ScalarPatch patchData[KNOB_SIMD_WIDTH];
void* pTxCtx;
size_t tsCtxSize;
uint8_t* pHSOutput;
size_t hsOutputAllocSize;
simdscalar* pDSOutput;
size_t dsOutputAllocSize;
};
@ -1340,9 +1342,9 @@ static void TessellationStages(DRAW_CONTEXT* pDC,
}
#endif
SWR_HS_CONTEXT& hsContext = gt_pTessellationThreadData->hsContext;
hsContext.pCPout = gt_pTessellationThreadData->patchData;
hsContext.PrimitiveID = primID;
SWR_HS_CONTEXT& hsContext = gt_pTessellationThreadData->hsContext;
hsContext.PrimitiveID = primID;
hsContext.outputSize = tsState.hsAllocationSize;
uint32_t numVertsPerPrim = NumVertsPerPrim(pa.binTopology, false);
// Max storage for one attribute for an entire simdprimitive
@ -1351,17 +1353,29 @@ static void TessellationStages(DRAW_CONTEXT* pDC,
// assemble all attributes for the input primitives
for (uint32_t slot = 0; slot < tsState.numHsInputAttribs; ++slot)
{
uint32_t attribSlot = tsState.vertexAttribOffset + slot;
uint32_t attribSlot = tsState.srcVertexAttribOffset + slot;
pa.Assemble(attribSlot, simdattrib);
for (uint32_t i = 0; i < numVertsPerPrim; ++i)
{
hsContext.vert[i].attrib[VERTEX_ATTRIB_START_SLOT + slot] = simdattrib[i];
hsContext.vert[i].attrib[tsState.vertexAttribOffset + slot] = simdattrib[i];
}
}
// Allocate HS output storage
uint32_t requiredAllocSize = KNOB_SIMD_WIDTH * tsState.hsAllocationSize;
if (requiredAllocSize > gt_pTessellationThreadData->hsOutputAllocSize)
{
AlignedFree(gt_pTessellationThreadData->pHSOutput);
gt_pTessellationThreadData->pHSOutput = (uint8_t*)AlignedMalloc(requiredAllocSize, 64);
gt_pTessellationThreadData->hsOutputAllocSize = requiredAllocSize;
}
hsContext.pCPout = (ScalarPatch*)gt_pTessellationThreadData->pHSOutput;
#if defined(_DEBUG)
memset(hsContext.pCPout, 0x90, sizeof(ScalarPatch) * KNOB_SIMD_WIDTH);
//memset(hsContext.pCPout, 0x90, sizeof(ScalarPatch) * KNOB_SIMD_WIDTH);
#endif
#if USE_SIMD16_FRONTEND
@ -1383,10 +1397,15 @@ static void TessellationStages(DRAW_CONTEXT* pDC,
for (uint32_t p = 0; p < numPrims; ++p)
{
ScalarPatch* pCPout = (ScalarPatch*)(gt_pTessellationThreadData->pHSOutput + tsState.hsAllocationSize * p);
SWR_TESSELLATION_FACTORS tessFactors;
tessFactors = hsContext.pCPout[p].tessFactors;
// Run Tessellator
SWR_TS_TESSELLATED_DATA tsData = {0};
RDTSC_BEGIN(pDC->pContext->pBucketMgr, FETessellation, pDC->drawId);
TSTessellate(tsCtx, hsContext.pCPout[p].tessFactors, tsData);
TSTessellate(tsCtx, tessFactors, tsData);
AR_EVENT(TessPrimCount(1));
RDTSC_END(pDC->pContext->pBucketMgr, FETessellation, 0);
@ -1423,7 +1442,7 @@ static void TessellationStages(DRAW_CONTEXT* pDC,
// Run Domain Shader
SWR_DS_CONTEXT dsContext;
dsContext.PrimitiveID = pPrimId[p];
dsContext.pCpIn = &hsContext.pCPout[p];
dsContext.pCpIn = pCPout;
dsContext.pDomainU = (simdscalar*)tsData.pDomainPointsU;
dsContext.pDomainV = (simdscalar*)tsData.pDomainPointsV;
dsContext.pOutputData = gt_pTessellationThreadData->pDSOutput;

View file

@ -169,8 +169,8 @@ enum SWR_INNER_TESSFACTOR_ID
enum SWR_OUTER_TESSFACTOR_ID
{
SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL,
SWR_QUAD_V_EQ0_TRI_V_LINE_DENSITY,
SWR_QUAD_U_EQ1_TRI_W,
SWR_QUAD_U_EQ1_TRI_V_LINE_DENSITY,
SWR_QUAD_V_EQ0_TRI_W,
SWR_QUAD_V_EQ1,
SWR_NUM_OUTER_TESS_FACTORS,
@ -281,8 +281,11 @@ struct SWR_TESSELLATION_FACTORS
{
float OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS];
float InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS];
float pad[2];
};
SWR_STATIC_ASSERT(sizeof(SWR_TESSELLATION_FACTORS) == 32);
#define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches
struct ScalarPatch
{
@ -300,6 +303,7 @@ struct SWR_HS_CONTEXT
simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data
simdscalari PrimitiveID; // IN: (SIMD) primitive ID generated from the draw call
simdscalari mask; // IN: Active mask for shader
uint32_t outputSize; // IN: Size of HS output (per lane)
ScalarPatch* pCPout; // OUT: Output control point patch SIMD-sized-array of SCALAR patches
SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
};
@ -818,11 +822,16 @@ struct SWR_TS_STATE
uint32_t numHsInputAttribs;
uint32_t numHsOutputAttribs;
uint32_t hsAllocationSize; // Size of HS output in bytes, per lane
uint32_t numDsOutputAttribs;
uint32_t dsAllocationSize;
uint32_t dsOutVtxAttribOffset;
// Offset to the start of the attributes of the input vertices, in simdvector units
uint32_t srcVertexAttribOffset;
// Offset to the start of the attributes expected by the hull shader
uint32_t vertexAttribOffset;
};