mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-05 11:10:10 +01:00
swr/rast: SIMD16 FE - interleaved simdvertex output in GS
Eliminates conversion copies on GS output from simdvertex to simd16vertex. Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
cbd33e71f7
commit
f64aea0959
2 changed files with 31 additions and 20 deletions
|
|
@ -717,10 +717,6 @@ void ProcessStreamIdBuffer(uint32_t stream, uint8_t* pStreamIdBase, uint32_t num
|
|||
|
||||
THREAD SWR_GS_CONTEXT tlsGsContext;
|
||||
|
||||
#if USE_SIMD16_FRONTEND
|
||||
THREAD simd16vertex tempVertex_simd16[128];
|
||||
|
||||
#endif
|
||||
template<typename SIMDVERTEX, uint32_t SIMD_WIDTH>
|
||||
struct GsBufferInfo
|
||||
{
|
||||
|
|
@ -819,7 +815,11 @@ static void GeometryShaderStage(
|
|||
tlsGsContext.vert[i].attrib[VERTEX_POSITION_SLOT] = attrib[i];
|
||||
}
|
||||
|
||||
#if USE_SIMD16_FRONTEND
|
||||
const GsBufferInfo<simd16vertex, KNOB_SIMD16_WIDTH> bufferInfo(state.gsState);
|
||||
#else
|
||||
const GsBufferInfo<simdvertex, KNOB_SIMD_WIDTH> bufferInfo(state.gsState);
|
||||
#endif
|
||||
|
||||
// record valid prims from the frontend to avoid over binning the newly generated
|
||||
// prims from the GS
|
||||
|
|
@ -923,19 +923,7 @@ static void GeometryShaderStage(
|
|||
}
|
||||
|
||||
#if USE_SIMD16_FRONTEND
|
||||
// TEMPORARY: GS outputs simdvertex, PA inputs simd16vertex, so convert simdvertex to simd16vertex
|
||||
|
||||
SWR_ASSERT(numEmittedVerts <= 256);
|
||||
|
||||
PackPairsOfSimdVertexIntoSimd16Vertex(
|
||||
tempVertex_simd16,
|
||||
reinterpret_cast<const simdvertex *>(pBase),
|
||||
numEmittedVerts,
|
||||
SWR_VTX_NUM_SLOTS);
|
||||
|
||||
#endif
|
||||
#if USE_SIMD16_FRONTEND
|
||||
PA_STATE_CUT gsPa(pDC, reinterpret_cast<uint8_t *>(tempVertex_simd16), numEmittedVerts, reinterpret_cast<simd16mask *>(pCutBuffer), numEmittedVerts, numAttribs, pState->outputTopology, processCutVerts);
|
||||
PA_STATE_CUT gsPa(pDC, pBase, numEmittedVerts, reinterpret_cast<simd16mask *>(pCutBuffer), numEmittedVerts, numAttribs, pState->outputTopology, processCutVerts);
|
||||
|
||||
#else
|
||||
PA_STATE_CUT gsPa(pDC, pBase, numEmittedVerts, pCutBuffer, numEmittedVerts, numAttribs, pState->outputTopology, processCutVerts);
|
||||
|
|
|
|||
|
|
@ -370,8 +370,13 @@ BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base
|
|||
|
||||
IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
|
||||
|
||||
#if USE_SIMD16_FRONTEND
|
||||
const uint32_t simdVertexStride = sizeof(simdvertex) * 2;
|
||||
const uint32_t numSimdBatches = (pGS->maxNumVerts + (mVWidth * 2) - 1) / (mVWidth * 2);
|
||||
#else
|
||||
const uint32_t simdVertexStride = sizeof(simdvertex);
|
||||
const uint32_t numSimdBatches = (pGS->maxNumVerts + 7) / 8;
|
||||
const uint32_t numSimdBatches = (pGS->maxNumVerts + mVWidth - 1) / mVWidth;
|
||||
#endif
|
||||
const uint32_t inputPrimStride = numSimdBatches * simdVertexStride;
|
||||
|
||||
Value *pStream = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_pStream });
|
||||
|
|
@ -388,8 +393,14 @@ BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base
|
|||
inputPrimStride * 6,
|
||||
inputPrimStride * 7 } );
|
||||
|
||||
Value *vVertexSlot = ASHR(unwrap(emitted_vertices_vec), 3);
|
||||
Value *vSimdSlot = AND(unwrap(emitted_vertices_vec), 7);
|
||||
#if USE_SIMD16_FRONTEND
|
||||
const uint32_t simdShift = log2(mVWidth * 2);
|
||||
Value *vSimdSlot = AND(unwrap(emitted_vertices_vec), (mVWidth * 2) - 1);
|
||||
#else
|
||||
const uint32_t simdShift = log2(mVWidth);
|
||||
Value *vSimdSlot = AND(unwrap(emitted_vertices_vec), mVWidth - 1);
|
||||
#endif
|
||||
Value *vVertexSlot = ASHR(unwrap(emitted_vertices_vec), simdShift);
|
||||
|
||||
for (uint32_t attrib = 0; attrib < iface->num_outputs; ++attrib) {
|
||||
uint32_t attribSlot = attrib;
|
||||
|
|
@ -400,10 +411,17 @@ BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base
|
|||
else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_LAYER)
|
||||
attribSlot = VERTEX_RTAI_SLOT;
|
||||
|
||||
#if USE_SIMD16_FRONTEND
|
||||
Value *vOffsetsAttrib =
|
||||
ADD(vOffsets, MUL(vVertexSlot, VIMMED1((uint32_t)sizeof(simdvertex) * 2)));
|
||||
vOffsetsAttrib =
|
||||
ADD(vOffsetsAttrib, VIMMED1((uint32_t)(attribSlot*sizeof(simdvector) * 2)));
|
||||
#else
|
||||
Value *vOffsetsAttrib =
|
||||
ADD(vOffsets, MUL(vVertexSlot, VIMMED1((uint32_t)sizeof(simdvertex))));
|
||||
vOffsetsAttrib =
|
||||
ADD(vOffsetsAttrib, VIMMED1((uint32_t)(attribSlot*sizeof(simdvector))));
|
||||
#endif
|
||||
vOffsetsAttrib =
|
||||
ADD(vOffsetsAttrib, MUL(vSimdSlot, VIMMED1((uint32_t)sizeof(float))));
|
||||
|
||||
|
|
@ -416,8 +434,13 @@ BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base
|
|||
|
||||
MASKED_SCATTER(vData, vPtrs, 32, vMask1);
|
||||
|
||||
#if USE_SIMD16_FRONTEND
|
||||
vOffsetsAttrib =
|
||||
ADD(vOffsetsAttrib, VIMMED1((uint32_t)sizeof(simdscalar) * 2));
|
||||
#else
|
||||
vOffsetsAttrib =
|
||||
ADD(vOffsetsAttrib, VIMMED1((uint32_t)sizeof(simdscalar)));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue