mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-04 11:20:20 +01:00
swr/rast: Support dynamically sized vertex layout
Each shader stage state (VS, TS, GS, SO, BE/CLIP) now has a vertexAttribOffset to specify the offset to the start of the general attribute section of the incoming verts for that stage. It is up to the driver to set this up correctly based on the active stages. All the shader stages use this value instead of VERTEX_ATTRIB_START_SLOT to offset to the incoming attributes. Only the vertex shader stage supports dynamic layout output currently. The other stages continue to expect the output to be the fixed layout slots as before. Will be enabling GS next. Reviewed-by: Bruce Cherniak <bruce.cherniak at intel.com>
This commit is contained in:
parent
cae53b24d7
commit
f87ff64850
6 changed files with 49 additions and 21 deletions
|
|
@ -80,12 +80,12 @@ INLINE void ProcessAttributes(
|
|||
if (IsSwizzledT::value)
|
||||
{
|
||||
SWR_ATTRIB_SWIZZLE attribSwizzle = backendState.swizzleMap[i];
|
||||
inputSlot = VERTEX_ATTRIB_START_SLOT + attribSwizzle.sourceAttrib;
|
||||
inputSlot = backendState.vertexAttribOffset + attribSwizzle.sourceAttrib;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
inputSlot = VERTEX_ATTRIB_START_SLOT + i;
|
||||
inputSlot = backendState.vertexAttribOffset + i;
|
||||
}
|
||||
|
||||
__m128 attrib[3]; // triangle attribs (always 4 wide)
|
||||
|
|
|
|||
|
|
@ -489,7 +489,7 @@ public:
|
|||
// Compute absolute attrib slot in vertex array
|
||||
uint32_t mapSlot = backendState.swizzleEnable ? backendState.swizzleMap[slot].sourceAttrib : slot;
|
||||
maxSlot = std::max<int32_t>(maxSlot, mapSlot);
|
||||
uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + mapSlot;
|
||||
uint32_t inputSlot = backendState.vertexAttribOffset + mapSlot;
|
||||
|
||||
pa.Assemble(inputSlot, tmpVector);
|
||||
|
||||
|
|
@ -625,10 +625,10 @@ public:
|
|||
}
|
||||
|
||||
// transpose attribs
|
||||
pBase = (uint8_t*)(&vertices[0].attrib[VERTEX_ATTRIB_START_SLOT]) + sizeof(float) * inputPrim;
|
||||
pBase = (uint8_t*)(&vertices[0].attrib[backendState.vertexAttribOffset]) + sizeof(float) * inputPrim;
|
||||
for (uint32_t attrib = 0; attrib < numAttribs; ++attrib)
|
||||
{
|
||||
uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + attrib;
|
||||
uint32_t attribSlot = backendState.vertexAttribOffset + attrib;
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
#if USE_SIMD16_FRONTEND
|
||||
|
|
@ -746,7 +746,7 @@ public:
|
|||
// Compute absolute attrib slot in vertex array
|
||||
uint32_t mapSlot = backendState.swizzleEnable ? backendState.swizzleMap[slot].sourceAttrib : slot;
|
||||
maxSlot = std::max<int32_t>(maxSlot, mapSlot);
|
||||
uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + mapSlot;
|
||||
uint32_t inputSlot = backendState.vertexAttribOffset + mapSlot;
|
||||
|
||||
pa.Assemble_simd16(inputSlot, tmpVector);
|
||||
|
||||
|
|
@ -877,10 +877,10 @@ public:
|
|||
}
|
||||
|
||||
// transpose attribs
|
||||
pBase = (uint8_t*)(&vertices[0].attrib[VERTEX_ATTRIB_START_SLOT]) + sizeof(float) * inputPrim;
|
||||
pBase = (uint8_t*)(&vertices[0].attrib[backendState.vertexAttribOffset]) + sizeof(float) * inputPrim;
|
||||
for (uint32_t attrib = 0; attrib < numAttribs; ++attrib)
|
||||
{
|
||||
uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + attrib;
|
||||
uint32_t attribSlot = backendState.vertexAttribOffset + attrib;
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
simdscalar temp = _simd_mask_i32gather_ps(_simd_setzero_ps(), (const float *)pBase, vOffsets, vMask, 1);
|
||||
|
|
@ -1230,6 +1230,8 @@ private:
|
|||
uint32_t numInAttribs, // number of attributes per vertex.
|
||||
float *pOutVerts) // array of output positions. We'll write our new intersection point at i*4.
|
||||
{
|
||||
uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset;
|
||||
|
||||
// compute interpolation factor
|
||||
simdscalar t;
|
||||
switch (ClippingPlane)
|
||||
|
|
@ -1263,7 +1265,7 @@ private:
|
|||
// interpolate attributes and store
|
||||
for (uint32_t a = 0; a < numInAttribs; ++a)
|
||||
{
|
||||
uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a;
|
||||
uint32_t attribSlot = vertexAttribOffset + a;
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
simdscalar vAttrib0 = GatherComponent(pInVerts, attribSlot, vActiveMask, s, c);
|
||||
|
|
@ -1312,6 +1314,8 @@ private:
|
|||
uint32_t numInAttribs, // number of attributes per vertex.
|
||||
float *pOutVerts) // array of output positions. We'll write our new intersection point at i*4.
|
||||
{
|
||||
uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset;
|
||||
|
||||
// compute interpolation factor
|
||||
simd16scalar t;
|
||||
switch (ClippingPlane)
|
||||
|
|
@ -1345,7 +1349,7 @@ private:
|
|||
// interpolate attributes and store
|
||||
for (uint32_t a = 0; a < numInAttribs; ++a)
|
||||
{
|
||||
uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a;
|
||||
uint32_t attribSlot = vertexAttribOffset + a;
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
simd16scalar vAttrib0 = GatherComponent(pInVerts, attribSlot, vActiveMask, s, c);
|
||||
|
|
@ -1421,6 +1425,8 @@ private:
|
|||
template<SWR_CLIPCODES ClippingPlane>
|
||||
simdscalari ClipTriToPlane(const float* pInVerts, const simdscalari& vNumInPts, uint32_t numInAttribs, float* pOutVerts)
|
||||
{
|
||||
uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset;
|
||||
|
||||
simdscalari vCurIndex = _simd_setzero_si();
|
||||
simdscalari vOutIndex = _simd_setzero_si();
|
||||
simdscalar vActiveMask = _simd_castsi_ps(_simd_cmplt_epi32(vCurIndex, vNumInPts));
|
||||
|
|
@ -1461,7 +1467,7 @@ private:
|
|||
// store attribs
|
||||
for (uint32_t a = 0; a < numInAttribs; ++a)
|
||||
{
|
||||
uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a;
|
||||
uint32_t attribSlot = vertexAttribOffset + a;
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
simdscalar vAttrib = GatherComponent(pInVerts, attribSlot, s_in, s, c);
|
||||
|
|
@ -1515,6 +1521,8 @@ private:
|
|||
template<SWR_CLIPCODES ClippingPlane>
|
||||
simd16scalari ClipTriToPlane(const float* pInVerts, const simd16scalari& vNumInPts, uint32_t numInAttribs, float* pOutVerts)
|
||||
{
|
||||
uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset;
|
||||
|
||||
simd16scalari vCurIndex = _simd16_setzero_si();
|
||||
simd16scalari vOutIndex = _simd16_setzero_si();
|
||||
simd16scalar vActiveMask = _simd16_castsi_ps(_simd16_cmplt_epi32(vCurIndex, vNumInPts));
|
||||
|
|
@ -1555,7 +1563,7 @@ private:
|
|||
// store attribs
|
||||
for (uint32_t a = 0; a < numInAttribs; ++a)
|
||||
{
|
||||
uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a;
|
||||
uint32_t attribSlot = vertexAttribOffset + a;
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
simd16scalar vAttrib = GatherComponent(pInVerts, attribSlot, s_in, s, c);
|
||||
|
|
@ -1609,6 +1617,8 @@ private:
|
|||
template<SWR_CLIPCODES ClippingPlane>
|
||||
simdscalari ClipLineToPlane(const float* pInVerts, const simdscalari& vNumInPts, uint32_t numInAttribs, float* pOutVerts)
|
||||
{
|
||||
uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset;
|
||||
|
||||
simdscalari vCurIndex = _simd_setzero_si();
|
||||
simdscalari vOutIndex = _simd_setzero_si();
|
||||
simdscalar vActiveMask = _simd_castsi_ps(_simd_cmplt_epi32(vCurIndex, vNumInPts));
|
||||
|
|
@ -1646,7 +1656,7 @@ private:
|
|||
// interpolate attributes and store
|
||||
for (uint32_t a = 0; a < numInAttribs; ++a)
|
||||
{
|
||||
uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a;
|
||||
uint32_t attribSlot = vertexAttribOffset + a;
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
simdscalar vAttrib = GatherComponent(pInVerts, attribSlot, s_in, s, c);
|
||||
|
|
@ -1679,7 +1689,7 @@ private:
|
|||
// interpolate attributes and store
|
||||
for (uint32_t a = 0; a < numInAttribs; ++a)
|
||||
{
|
||||
uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a;
|
||||
uint32_t attribSlot = vertexAttribOffset + a;
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
simdscalar vAttrib = GatherComponent(pInVerts, attribSlot, p_in, p, c);
|
||||
|
|
@ -1699,6 +1709,8 @@ private:
|
|||
template<SWR_CLIPCODES ClippingPlane>
|
||||
simd16scalari ClipLineToPlane(const float* pInVerts, const simd16scalari& vNumInPts, uint32_t numInAttribs, float* pOutVerts)
|
||||
{
|
||||
uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset;
|
||||
|
||||
simd16scalari vCurIndex = _simd16_setzero_si();
|
||||
simd16scalari vOutIndex = _simd16_setzero_si();
|
||||
simd16scalar vActiveMask = _simd16_castsi_ps(_simd16_cmplt_epi32(vCurIndex, vNumInPts));
|
||||
|
|
@ -1736,7 +1748,7 @@ private:
|
|||
// interpolate attributes and store
|
||||
for (uint32_t a = 0; a < numInAttribs; ++a)
|
||||
{
|
||||
uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a;
|
||||
uint32_t attribSlot = vertexAttribOffset + a;
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
simd16scalar vAttrib = GatherComponent(pInVerts, attribSlot, s_in, s, c);
|
||||
|
|
@ -1769,7 +1781,7 @@ private:
|
|||
// interpolate attributes and store
|
||||
for (uint32_t a = 0; a < numInAttribs; ++a)
|
||||
{
|
||||
uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a;
|
||||
uint32_t attribSlot = vertexAttribOffset + a;
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
simd16scalar vAttrib = GatherComponent(pInVerts, attribSlot, p_in, p, c);
|
||||
|
|
|
|||
|
|
@ -528,7 +528,7 @@ static void StreamOut(
|
|||
while (_BitScanForward(&slot, soMask))
|
||||
{
|
||||
__m128 attrib[MAX_NUM_VERTS_PER_PRIM]; // prim attribs (always 4 wide)
|
||||
uint32_t paSlot = slot + VERTEX_ATTRIB_START_SLOT;
|
||||
uint32_t paSlot = slot + soState.vertexAttribOffset[streamIndex];
|
||||
pa.AssembleSingle(paSlot, primIndex, attrib);
|
||||
|
||||
// Attribute offset is relative offset from start of vertex.
|
||||
|
|
@ -792,12 +792,12 @@ static void GeometryShaderStage(
|
|||
// assemble all attributes for the input primitive
|
||||
for (uint32_t slot = 0; slot < pState->numInputAttribs; ++slot)
|
||||
{
|
||||
uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + slot;
|
||||
uint32_t attribSlot = pState->vertexAttribOffset + slot;
|
||||
pa.Assemble(attribSlot, attrib);
|
||||
|
||||
for (uint32_t i = 0; i < numVertsPerPrim; ++i)
|
||||
{
|
||||
tlsGsContext.vert[i].attrib[attribSlot] = attrib[i];
|
||||
tlsGsContext.vert[i].attrib[VERTEX_ATTRIB_START_SLOT + slot] = attrib[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1131,12 +1131,12 @@ static void TessellationStages(
|
|||
// assemble all attributes for the input primitives
|
||||
for (uint32_t slot = 0; slot < tsState.numHsInputAttribs; ++slot)
|
||||
{
|
||||
uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + slot;
|
||||
uint32_t attribSlot = tsState.vertexAttribOffset + slot;
|
||||
pa.Assemble(attribSlot, simdattrib);
|
||||
|
||||
for (uint32_t i = 0; i < numVertsPerPrim; ++i)
|
||||
{
|
||||
hsContext.vert[i].attrib[attribSlot] = simdattrib[i];
|
||||
hsContext.vert[i].attrib[VERTEX_ATTRIB_START_SLOT + slot] = simdattrib[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -673,6 +673,9 @@ struct SWR_STREAMOUT_STATE
|
|||
// Number of attributes, including position, per vertex that are streamed out.
|
||||
// This should match number of bits in stream mask.
|
||||
uint32_t streamNumEntries[MAX_SO_STREAMS];
|
||||
|
||||
// Offset to the start of the attributes of the input vertices, in simdvector units
|
||||
uint32_t vertexAttribOffset[MAX_SO_STREAMS];
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
|
@ -718,6 +721,9 @@ struct SWR_GS_STATE
|
|||
// when single stream is enabled, singleStreamID dictates which stream is being output.
|
||||
// field ignored if isSingleStream is false
|
||||
uint32_t singleStreamID;
|
||||
|
||||
// Offset to the start of the attributes of the input vertices, in simdvector units
|
||||
uint32_t vertexAttribOffset;
|
||||
};
|
||||
|
||||
|
||||
|
|
@ -773,6 +779,9 @@ struct SWR_TS_STATE
|
|||
uint32_t numHsInputAttribs;
|
||||
uint32_t numHsOutputAttribs;
|
||||
uint32_t numDsOutputAttribs;
|
||||
|
||||
// Offset to the start of the attributes of the input vertices, in simdvector units
|
||||
uint32_t vertexAttribOffset;
|
||||
};
|
||||
|
||||
// output merger state
|
||||
|
|
@ -1047,6 +1056,9 @@ struct SWR_BACKEND_STATE
|
|||
|
||||
bool readRenderTargetArrayIndex; // Forward render target array index from last FE stage to the backend
|
||||
bool readViewportArrayIndex; // Read viewport array index from last FE stage during binning
|
||||
|
||||
// Offset to the start of the attributes of the input vertices, in simdvector units
|
||||
uint32_t vertexAttribOffset;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -551,6 +551,8 @@ BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
|
|||
pGS->isSingleStream = true;
|
||||
pGS->singleStreamID = 0;
|
||||
|
||||
pGS->vertexAttribOffset = VERTEX_ATTRIB_START_SLOT; // TODO: optimize
|
||||
|
||||
struct swr_geometry_shader *gs = ctx->gs;
|
||||
|
||||
LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
|
||||
|
|
|
|||
|
|
@ -351,6 +351,7 @@ swr_create_vs_state(struct pipe_context *pipe,
|
|||
for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) {
|
||||
swr_vs->soState.streamNumEntries[i] =
|
||||
_mm_popcnt_u32(swr_vs->soState.streamMasks[i]);
|
||||
swr_vs->soState.vertexAttribOffset[i] = VERTEX_ATTRIB_START_SLOT; // TODO: optimize
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1747,6 +1748,7 @@ swr_update_derived(struct pipe_context *pipe,
|
|||
&ctx->vs->info.base;
|
||||
backendState.readRenderTargetArrayIndex = pLastFE->writes_layer;
|
||||
backendState.readViewportArrayIndex = pLastFE->writes_viewport_index;
|
||||
backendState.vertexAttribOffset = VERTEX_ATTRIB_START_SLOT; // TODO: optimize
|
||||
|
||||
SwrSetBackendState(ctx->swrContext, &backendState);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue