swr: [rasterizer core] TemplateArgUnroller

Switch boolean template arguments to typename template arguments of type
std::integral_constant<bool, VALUE>.

This allows the template argument unroller to easily be extended to enums.

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
Tim Rowley 2016-04-04 20:00:13 -06:00
parent 46a448d161
commit 27cc5924ea
5 changed files with 101 additions and 109 deletions

View file

@ -41,6 +41,7 @@
#include "core/threads.h"
#include "core/tilemgr.h"
#include "core/clip.h"
#include "core/utils.h"
#include "common/simdintrin.h"
#include "common/os.h"
@ -1029,42 +1030,6 @@ uint32_t MaxVertsPerDraw(
return vertsPerDraw;
}
// Recursive template used to auto-nest conditionals. Converts dynamic boolean function
// arguments to static template arguments.
template <bool... ArgsB>
struct FEDrawChooser
{
// Last Arg Terminator
static PFN_FE_WORK_FUNC GetFunc(bool bArg)
{
if (bArg)
{
return ProcessDraw<ArgsB..., true>;
}
return ProcessDraw<ArgsB..., false>;
}
// Recursively parse args
template <typename... TArgsT>
static PFN_FE_WORK_FUNC GetFunc(bool bArg, TArgsT... remainingArgs)
{
if (bArg)
{
return FEDrawChooser<ArgsB..., true>::GetFunc(remainingArgs...);
}
return FEDrawChooser<ArgsB..., false>::GetFunc(remainingArgs...);
}
};
// Selector for correct templated Draw front-end function
INLINE
static PFN_FE_WORK_FUNC GetFEDrawFunc(bool IsIndexed, bool HasTessellation, bool HasGeometryShader, bool HasStreamOut, bool RasterizerEnabled)
{
return FEDrawChooser<>::GetFunc(IsIndexed, HasTessellation, HasGeometryShader, HasStreamOut, RasterizerEnabled);
}
//////////////////////////////////////////////////////////////////////////
/// @brief DrawInstanced
@ -1119,7 +1084,7 @@ void DrawInstanced(
InitDraw(pDC, isSplitDraw);
pDC->FeWork.type = DRAW;
pDC->FeWork.pfnWork = GetFEDrawFunc(
pDC->FeWork.pfnWork = GetProcessDrawFunc(
false, // IsIndexed
pState->tsState.tsEnable,
pState->gsState.gsEnable,
@ -1252,7 +1217,7 @@ void DrawIndexedInstance(
InitDraw(pDC, isSplitDraw);
pDC->FeWork.type = DRAW;
pDC->FeWork.pfnWork = GetFEDrawFunc(
pDC->FeWork.pfnWork = GetProcessDrawFunc(
true, // IsIndexed
pState->tsState.tsEnable,
pState->gsState.gsEnable,

View file

@ -675,8 +675,8 @@ THREAD SWR_GS_CONTEXT tlsGsContext;
/// @param pa - The primitive assembly object.
/// @param pGsOut - output stream for GS
template <
bool HasStreamOutT,
bool HasRastT>
typename HasStreamOutT,
typename HasRastT>
static void GeometryShaderStage(
DRAW_CONTEXT *pDC,
uint32_t workerId,
@ -759,7 +759,7 @@ static void GeometryShaderStage(
// set up new binner and state for the GS output topology
PFN_PROCESS_PRIMS pfnClipFunc = nullptr;
if (HasRastT)
if (HasRastT::value)
{
switch (pState->outputTopology)
{
@ -819,7 +819,7 @@ static void GeometryShaderStage(
else
{
// early exit if this stream is not enabled for streamout
if (HasStreamOutT && !state.soState.streamEnable[stream])
if (HasStreamOutT::value && !state.soState.streamEnable[stream])
{
continue;
}
@ -842,12 +842,12 @@ static void GeometryShaderStage(
{
totalPrimsGenerated += gsPa.NumPrims();
if (HasStreamOutT)
if (HasStreamOutT::value)
{
StreamOut(pDC, gsPa, workerId, pSoPrimData, stream);
}
if (HasRastT && state.soState.streamToRasterizer == stream)
if (HasRastT::value && state.soState.streamToRasterizer == stream)
{
simdscalari vPrimId;
// pull primitiveID from the GS output if available
@ -957,9 +957,9 @@ static void AllocateTessellationData(SWR_CONTEXT* pContext)
/// @param pa - The primitive assembly object.
/// @param pGsOut - output stream for GS
template <
bool HasGeometryShaderT,
bool HasStreamOutT,
bool HasRastT>
typename HasGeometryShaderT,
typename HasStreamOutT,
typename HasRastT>
static void TessellationStages(
DRAW_CONTEXT *pDC,
uint32_t workerId,
@ -995,7 +995,7 @@ static void TessellationStages(
SWR_ASSERT(tsCtx);
PFN_PROCESS_PRIMS pfnClipFunc = nullptr;
if (HasRastT)
if (HasRastT::value)
{
switch (tsState.postDSTopology)
{
@ -1107,7 +1107,7 @@ static void TessellationStages(
while (tessPa.HasWork())
{
if (HasGeometryShaderT)
if (HasGeometryShaderT::value)
{
GeometryShaderStage<HasStreamOutT, HasRastT>(
pDC, workerId, tessPa, pGsOut, pCutBuffer, pCutStreamBuffer, pSoPrimData,
@ -1115,12 +1115,12 @@ static void TessellationStages(
}
else
{
if (HasStreamOutT)
if (HasStreamOutT::value)
{
StreamOut(pDC, tessPa, workerId, pSoPrimData, 0);
}
if (HasRastT)
if (HasRastT::value)
{
simdvector prim[3]; // Only deal with triangles, lines, or points
RDTSC_START(FEPAAssemble);
@ -1149,7 +1149,7 @@ static void TessellationStages(
/// @brief FE handler for SwrDraw.
/// @tparam IsIndexedT - Is indexed drawing enabled
/// @tparam HasTessellationT - Is tessellation enabled
/// @tparam HasGeometryShaderT - Is the geometry shader stage enabled
/// @tparam HasGeometryShaderT::value - Is the geometry shader stage enabled
/// @tparam HasStreamOutT - Is stream-out enabled
/// @tparam HasRastT - Is rasterization enabled
/// @param pContext - pointer to SWR context.
@ -1157,11 +1157,11 @@ static void TessellationStages(
/// @param workerId - thread's worker id.
/// @param pUserData - Pointer to DRAW_WORK
template <
bool IsIndexedT,
bool HasTessellationT,
bool HasGeometryShaderT,
bool HasStreamOutT,
bool HasRastT>
typename IsIndexedT,
typename HasTessellationT,
typename HasGeometryShaderT,
typename HasStreamOutT,
typename HasRastT>
void ProcessDraw(
SWR_CONTEXT *pContext,
DRAW_CONTEXT *pDC,
@ -1188,7 +1188,7 @@ void ProcessDraw(
uint32_t endVertex = work.numVerts;
const int32_t* pLastRequestedIndex = nullptr;
if (IsIndexedT)
if (IsIndexedT::value)
{
switch (work.type)
{
@ -1223,7 +1223,7 @@ void ProcessDraw(
vsContext.pVin = &vin;
if (IsIndexedT)
if (IsIndexedT::value)
{
fetchInfo.BaseVertex = work.baseVertex;
@ -1247,12 +1247,12 @@ void ProcessDraw(
void* pGsOut = nullptr;
void* pCutBuffer = nullptr;
void* pStreamCutBuffer = nullptr;
if (HasGeometryShaderT)
if (HasGeometryShaderT::value)
{
AllocateGsBuffers(pDC, state, &pGsOut, &pCutBuffer, &pStreamCutBuffer);
}
if (HasTessellationT)
if (HasTessellationT::value)
{
SWR_ASSERT(state.tsState.tsEnable == true);
SWR_ASSERT(state.pfnHsFunc != nullptr);
@ -1269,7 +1269,7 @@ void ProcessDraw(
// allocate space for streamout input prim data
uint32_t* pSoPrimData = nullptr;
if (HasStreamOutT)
if (HasStreamOutT::value)
{
pSoPrimData = (uint32_t*)pDC->pArena->AllocAligned(4096, 16);
@ -1291,7 +1291,7 @@ void ProcessDraw(
simdscalari vIndex;
uint32_t i = 0;
if (IsIndexedT)
if (IsIndexedT::value)
{
fetchInfo.pIndices = work.pIB;
}
@ -1309,7 +1309,7 @@ void ProcessDraw(
// PaGetNextVsOutput currently has the side effect of updating some PA state machine state.
// So we need to keep this outside of (i < endVertex) check.
simdmask* pvCutIndices = nullptr;
if (IsIndexedT)
if (IsIndexedT::value)
{
pvCutIndices = &pa.GetNextVsIndices();
}
@ -1332,7 +1332,7 @@ void ProcessDraw(
vsContext.mask = GenerateMask(endVertex - i);
// forward cut mask to the PA
if (IsIndexedT)
if (IsIndexedT::value)
{
*pvCutIndices = _simd_movemask_ps(_simd_castsi_ps(fetchInfo.CutMask));
}
@ -1372,12 +1372,12 @@ void ProcessDraw(
{
UPDATE_STAT(IaPrimitives, pa.NumPrims());
if (HasTessellationT)
if (HasTessellationT::value)
{
TessellationStages<HasGeometryShaderT, HasStreamOutT, HasRastT>(
pDC, workerId, pa, pGsOut, pCutBuffer, pStreamCutBuffer, pSoPrimData, pa.GetPrimID(work.startPrimID));
}
else if (HasGeometryShaderT)
else if (HasGeometryShaderT::value)
{
GeometryShaderStage<HasStreamOutT, HasRastT>(
pDC, workerId, pa, pGsOut, pCutBuffer, pStreamCutBuffer, pSoPrimData, pa.GetPrimID(work.startPrimID));
@ -1385,12 +1385,12 @@ void ProcessDraw(
else
{
// If streamout is enabled then stream vertices out to memory.
if (HasStreamOutT)
if (HasStreamOutT::value)
{
StreamOut(pDC, pa, workerId, pSoPrimData, 0);
}
if (HasRastT)
if (HasRastT::value)
{
SWR_ASSERT(pDC->pState->pfnProcessPrims);
pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim,
@ -1403,7 +1403,7 @@ void ProcessDraw(
} while (pa.NextPrim());
i += KNOB_SIMD_WIDTH;
if (IsIndexedT)
if (IsIndexedT::value)
{
fetchInfo.pIndices = (int*)((uint8_t*)fetchInfo.pIndices + KNOB_SIMD_WIDTH * indexSize);
}
@ -1417,39 +1417,29 @@ void ProcessDraw(
RDTSC_STOP(FEProcessDraw, numPrims * work.numInstances, pDC->drawId);
}
// Explicit Instantiation of all combinations
template void ProcessDraw<false, false, false, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<false, false, false, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<false, false, false, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<false, false, false, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<false, false, true, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<false, false, true, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<false, false, true, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<false, false, true, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<false, true, false, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<false, true, false, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<false, true, false, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<false, true, false, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<false, true, true, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<false, true, true, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<false, true, true, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<false, true, true, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<true, false, false, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<true, false, false, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<true, false, false, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<true, false, false, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<true, false, true, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<true, false, true, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<true, false, true, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<true, false, true, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<true, true, false, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<true, true, false, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<true, true, false, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<true, true, false, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<true, true, true, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<true, true, true, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<true, true, true, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
template void ProcessDraw<true, true, true, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
struct FEDrawChooser
{
typedef PFN_FE_WORK_FUNC FuncType;
template <typename... ArgsB>
static FuncType GetFunc()
{
return ProcessDraw<ArgsB...>;
}
};
// Selector for correct templated Draw front-end function
PFN_FE_WORK_FUNC GetProcessDrawFunc(
bool IsIndexed,
bool HasTessellation,
bool HasGeometryShader,
bool HasStreamOut,
bool HasRasterization)
{
return TemplateArgUnroller<FEDrawChooser>::GetFunc(IsIndexed, HasTessellation, HasGeometryShader, HasStreamOut, HasRasterization);
}
//////////////////////////////////////////////////////////////////////////

View file

@ -28,6 +28,7 @@
******************************************************************************/
#pragma once
#include "context.h"
#include <type_traits>
INLINE
__m128i fpToFixedPoint(const __m128 vIn)
@ -309,9 +310,14 @@ bool CanUseSimplePoints(DRAW_CONTEXT *pDC)
uint32_t GetNumPrims(PRIMITIVE_TOPOLOGY mode, uint32_t numElements);
uint32_t NumVertsPerPrim(PRIMITIVE_TOPOLOGY topology, bool includeAdjVerts);
// Templated Draw front-end function. All combinations of template parameter values are available
template <bool IsIndexedT, bool HasTessellationT, bool HasGeometryShaderT, bool HasStreamOutT, bool HasRastT>
void ProcessDraw(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
// ProcessDraw front-end function. All combinations of parameter values are available
PFN_FE_WORK_FUNC GetProcessDrawFunc(
bool IsIndexed,
bool HasTessellation,
bool HasGeometryShader,
bool HasStreamOut,
bool HasRasterization);
void ProcessClear(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
void ProcessStoreTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);

View file

@ -1146,14 +1146,14 @@ private:
// Primitive Assembler factory class, responsible for creating and initializing the correct assembler
// based on state.
template <bool IsIndexedT>
template <typename IsIndexedT>
struct PA_FACTORY
{
PA_FACTORY(DRAW_CONTEXT* pDC, PRIMITIVE_TOPOLOGY in_topo, uint32_t numVerts) : topo(in_topo)
{
#if KNOB_ENABLE_CUT_AWARE_PA == TRUE
const API_STATE& state = GetApiState(pDC);
if ((IsIndexedT && (
if ((IsIndexedT::value && (
topo == TOP_TRIANGLE_STRIP || topo == TOP_POINT_LIST ||
topo == TOP_LINE_LIST || topo == TOP_LINE_STRIP ||
topo == TOP_TRIANGLE_LIST || topo == TOP_LINE_LIST_ADJ ||
@ -1162,7 +1162,7 @@ struct PA_FACTORY
// non-indexed draws with adjacency topologies must use cut-aware PA until we add support
// for them in the optimized PA
(!IsIndexedT && (
(!IsIndexedT::value && (
topo == TOP_LINE_LIST_ADJ || topo == TOP_LISTSTRIP_ADJ || topo == TOP_TRI_LIST_ADJ || topo == TOP_TRI_STRIP_ADJ)))
{
memset(&indexStore, 0, sizeof(indexStore));

View file

@ -28,6 +28,7 @@
#pragma once
#include <string.h>
#include <type_traits>
#include "common/os.h"
#include "common/simdintrin.h"
#include "common/swr_assert.h"
@ -834,3 +835,33 @@ public:
return T(word & ELEMENT_MASK);
}
};
// Recursive template used to auto-nest conditionals. Converts dynamic boolean function
// arguments to static template arguments.
template <typename TermT, typename... ArgsB>
struct TemplateArgUnroller
{
// Last Arg Terminator
static typename TermT::FuncType GetFunc(bool bArg)
{
if (bArg)
{
return TermT::template GetFunc<ArgsB..., std::true_type>();
}
return TermT::template GetFunc<ArgsB..., std::false_type>();
}
// Recursively parse args
template <typename... TArgsT>
static typename TermT::FuncType GetFunc(bool bArg, TArgsT... remainingArgs)
{
if (bArg)
{
return TemplateArgUnroller<TermT, ArgsB..., std::true_type>::GetFunc(remainingArgs...);
}
return TemplateArgUnroller<TermT, ArgsB..., std::false_type>::GetFunc(remainingArgs...);
}
};