mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-19 12:20:40 +02:00
swr/rasterizer: cleanups for tessellation
This commit introduces small fixes in preparation for tessellation support. Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
c5c05979f7
commit
ad9aff5528
2 changed files with 56 additions and 28 deletions
|
|
@ -583,8 +583,9 @@ static void StreamOut(
|
|||
{
|
||||
if (state.soBuffer[i].pWriteOffset)
|
||||
{
|
||||
bool nullTileAccessed = false;
|
||||
void* pWriteOffset = pDC->pContext->pfnTranslateGfxptrForWrite(GetPrivateState(pDC), soContext.pBuffer[i]->pWriteOffset, &nullTileAccessed);
|
||||
bool nullTileAccessed = false;
|
||||
void* pWriteOffset = pDC->pContext->pfnTranslateGfxptrForWrite(
|
||||
GetPrivateState(pDC), soContext.pBuffer[i]->pWriteOffset, &nullTileAccessed);
|
||||
*((uint32_t*)pWriteOffset) = soContext.pBuffer[i]->streamOffset * sizeof(uint32_t);
|
||||
}
|
||||
|
||||
|
|
@ -786,21 +787,20 @@ void TransposeSOAtoAOS(uint8_t* pDst, uint8_t* pSrc, uint32_t numVerts, uint32_t
|
|||
{
|
||||
auto attribGatherX = SIMD_T::mask_i32gather_ps(
|
||||
SIMD_T::setzero_ps(), (const float*)pSrcBase, vGatherOffsets, vMask);
|
||||
auto attribGatherY = SIMD_T::mask_i32gather_ps(
|
||||
SIMD_T::setzero_ps(),
|
||||
(const float*)(pSrcBase + sizeof(float)),
|
||||
vGatherOffsets,
|
||||
vMask);
|
||||
auto attribGatherZ = SIMD_T::mask_i32gather_ps(
|
||||
SIMD_T::setzero_ps(),
|
||||
(const float*)(pSrcBase + sizeof(float) * 2),
|
||||
vGatherOffsets,
|
||||
vMask);
|
||||
auto attribGatherW = SIMD_T::mask_i32gather_ps(
|
||||
SIMD_T::setzero_ps(),
|
||||
(const float*)(pSrcBase + sizeof(float) * 3),
|
||||
vGatherOffsets,
|
||||
vMask);
|
||||
auto attribGatherY = SIMD_T::mask_i32gather_ps(SIMD_T::setzero_ps(),
|
||||
(const float*)(pSrcBase + sizeof(float)),
|
||||
vGatherOffsets,
|
||||
vMask);
|
||||
auto attribGatherZ =
|
||||
SIMD_T::mask_i32gather_ps(SIMD_T::setzero_ps(),
|
||||
(const float*)(pSrcBase + sizeof(float) * 2),
|
||||
vGatherOffsets,
|
||||
vMask);
|
||||
auto attribGatherW =
|
||||
SIMD_T::mask_i32gather_ps(SIMD_T::setzero_ps(),
|
||||
(const float*)(pSrcBase + sizeof(float) * 3),
|
||||
vGatherOffsets,
|
||||
vMask);
|
||||
|
||||
SIMD_T::maskstore_ps((float*)pDstBase, viMask, attribGatherX);
|
||||
SIMD_T::maskstore_ps((float*)(pDstBase + sizeof(Float<SIMD_T>)), viMask, attribGatherY);
|
||||
|
|
@ -1235,10 +1235,12 @@ static INLINE void AllocateGsBuffers(DRAW_CONTEXT* pDC,
|
|||
struct TessellationThreadLocalData
|
||||
{
|
||||
SWR_HS_CONTEXT hsContext;
|
||||
ScalarPatch patchData[KNOB_SIMD_WIDTH];
|
||||
void* pTxCtx;
|
||||
size_t tsCtxSize;
|
||||
|
||||
uint8_t* pHSOutput;
|
||||
size_t hsOutputAllocSize;
|
||||
|
||||
simdscalar* pDSOutput;
|
||||
size_t dsOutputAllocSize;
|
||||
};
|
||||
|
|
@ -1340,9 +1342,9 @@ static void TessellationStages(DRAW_CONTEXT* pDC,
|
|||
}
|
||||
|
||||
#endif
|
||||
SWR_HS_CONTEXT& hsContext = gt_pTessellationThreadData->hsContext;
|
||||
hsContext.pCPout = gt_pTessellationThreadData->patchData;
|
||||
hsContext.PrimitiveID = primID;
|
||||
SWR_HS_CONTEXT& hsContext = gt_pTessellationThreadData->hsContext;
|
||||
hsContext.PrimitiveID = primID;
|
||||
hsContext.outputSize = tsState.hsAllocationSize;
|
||||
|
||||
uint32_t numVertsPerPrim = NumVertsPerPrim(pa.binTopology, false);
|
||||
// Max storage for one attribute for an entire simdprimitive
|
||||
|
|
@ -1351,17 +1353,29 @@ static void TessellationStages(DRAW_CONTEXT* pDC,
|
|||
// assemble all attributes for the input primitives
|
||||
for (uint32_t slot = 0; slot < tsState.numHsInputAttribs; ++slot)
|
||||
{
|
||||
uint32_t attribSlot = tsState.vertexAttribOffset + slot;
|
||||
uint32_t attribSlot = tsState.srcVertexAttribOffset + slot;
|
||||
pa.Assemble(attribSlot, simdattrib);
|
||||
|
||||
for (uint32_t i = 0; i < numVertsPerPrim; ++i)
|
||||
{
|
||||
hsContext.vert[i].attrib[VERTEX_ATTRIB_START_SLOT + slot] = simdattrib[i];
|
||||
hsContext.vert[i].attrib[tsState.vertexAttribOffset + slot] = simdattrib[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate HS output storage
|
||||
uint32_t requiredAllocSize = KNOB_SIMD_WIDTH * tsState.hsAllocationSize;
|
||||
|
||||
if (requiredAllocSize > gt_pTessellationThreadData->hsOutputAllocSize)
|
||||
{
|
||||
AlignedFree(gt_pTessellationThreadData->pHSOutput);
|
||||
gt_pTessellationThreadData->pHSOutput = (uint8_t*)AlignedMalloc(requiredAllocSize, 64);
|
||||
gt_pTessellationThreadData->hsOutputAllocSize = requiredAllocSize;
|
||||
}
|
||||
|
||||
hsContext.pCPout = (ScalarPatch*)gt_pTessellationThreadData->pHSOutput;
|
||||
|
||||
#if defined(_DEBUG)
|
||||
memset(hsContext.pCPout, 0x90, sizeof(ScalarPatch) * KNOB_SIMD_WIDTH);
|
||||
//memset(hsContext.pCPout, 0x90, sizeof(ScalarPatch) * KNOB_SIMD_WIDTH);
|
||||
#endif
|
||||
|
||||
#if USE_SIMD16_FRONTEND
|
||||
|
|
@ -1383,10 +1397,15 @@ static void TessellationStages(DRAW_CONTEXT* pDC,
|
|||
|
||||
for (uint32_t p = 0; p < numPrims; ++p)
|
||||
{
|
||||
ScalarPatch* pCPout = (ScalarPatch*)(gt_pTessellationThreadData->pHSOutput + tsState.hsAllocationSize * p);
|
||||
|
||||
SWR_TESSELLATION_FACTORS tessFactors;
|
||||
tessFactors = hsContext.pCPout[p].tessFactors;
|
||||
|
||||
// Run Tessellator
|
||||
SWR_TS_TESSELLATED_DATA tsData = {0};
|
||||
RDTSC_BEGIN(pDC->pContext->pBucketMgr, FETessellation, pDC->drawId);
|
||||
TSTessellate(tsCtx, hsContext.pCPout[p].tessFactors, tsData);
|
||||
TSTessellate(tsCtx, tessFactors, tsData);
|
||||
AR_EVENT(TessPrimCount(1));
|
||||
RDTSC_END(pDC->pContext->pBucketMgr, FETessellation, 0);
|
||||
|
||||
|
|
@ -1423,7 +1442,7 @@ static void TessellationStages(DRAW_CONTEXT* pDC,
|
|||
// Run Domain Shader
|
||||
SWR_DS_CONTEXT dsContext;
|
||||
dsContext.PrimitiveID = pPrimId[p];
|
||||
dsContext.pCpIn = &hsContext.pCPout[p];
|
||||
dsContext.pCpIn = pCPout;
|
||||
dsContext.pDomainU = (simdscalar*)tsData.pDomainPointsU;
|
||||
dsContext.pDomainV = (simdscalar*)tsData.pDomainPointsV;
|
||||
dsContext.pOutputData = gt_pTessellationThreadData->pDSOutput;
|
||||
|
|
|
|||
|
|
@ -169,8 +169,8 @@ enum SWR_INNER_TESSFACTOR_ID
|
|||
enum SWR_OUTER_TESSFACTOR_ID
|
||||
{
|
||||
SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL,
|
||||
SWR_QUAD_V_EQ0_TRI_V_LINE_DENSITY,
|
||||
SWR_QUAD_U_EQ1_TRI_W,
|
||||
SWR_QUAD_U_EQ1_TRI_V_LINE_DENSITY,
|
||||
SWR_QUAD_V_EQ0_TRI_W,
|
||||
SWR_QUAD_V_EQ1,
|
||||
|
||||
SWR_NUM_OUTER_TESS_FACTORS,
|
||||
|
|
@ -281,8 +281,11 @@ struct SWR_TESSELLATION_FACTORS
|
|||
{
|
||||
float OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS];
|
||||
float InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS];
|
||||
float pad[2];
|
||||
};
|
||||
|
||||
SWR_STATIC_ASSERT(sizeof(SWR_TESSELLATION_FACTORS) == 32);
|
||||
|
||||
#define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches
|
||||
struct ScalarPatch
|
||||
{
|
||||
|
|
@ -300,6 +303,7 @@ struct SWR_HS_CONTEXT
|
|||
simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data
|
||||
simdscalari PrimitiveID; // IN: (SIMD) primitive ID generated from the draw call
|
||||
simdscalari mask; // IN: Active mask for shader
|
||||
uint32_t outputSize; // IN: Size of HS output (per lane)
|
||||
ScalarPatch* pCPout; // OUT: Output control point patch SIMD-sized-array of SCALAR patches
|
||||
SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
|
||||
};
|
||||
|
|
@ -818,11 +822,16 @@ struct SWR_TS_STATE
|
|||
|
||||
uint32_t numHsInputAttribs;
|
||||
uint32_t numHsOutputAttribs;
|
||||
uint32_t hsAllocationSize; // Size of HS output in bytes, per lane
|
||||
|
||||
uint32_t numDsOutputAttribs;
|
||||
uint32_t dsAllocationSize;
|
||||
uint32_t dsOutVtxAttribOffset;
|
||||
|
||||
// Offset to the start of the attributes of the input vertices, in simdvector units
|
||||
uint32_t srcVertexAttribOffset;
|
||||
|
||||
// Offset to the start of the attributes expected by the hull shader
|
||||
uint32_t vertexAttribOffset;
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue