swr: [rasterizer core/scripts] Autogen backend initialization function(s)

Autogen functions that instantiates different BackendPixelRate templates.
Functions get split into separate files after reaching a user defined
threshold (currently 512 per file) to speed up compilation.

This change will enable the addition of more template flags in the pixel
back end.

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
Tim Rowley 2017-02-15 13:45:16 -08:00
parent 2c820d22cf
commit d2759c1eb3
7 changed files with 398 additions and 226 deletions

View file

@ -10,3 +10,4 @@ rasterizer/jitter/builder_x86.h
rasterizer/jitter/state_llvm.h
rasterizer/scripts/gen_knobs.cpp
rasterizer/scripts/gen_knobs.h
rasterizer/core/BackendPixelRate0.cpp

View file

@ -61,7 +61,8 @@ BUILT_SOURCES = \
rasterizer/archrast/gen_ar_event.h \
rasterizer/archrast/gen_ar_event.cpp \
rasterizer/archrast/gen_ar_eventhandler.h \
rasterizer/archrast/gen_ar_eventhandlerfile.h
rasterizer/archrast/gen_ar_eventhandlerfile.h \
rasterizer/core/BackendPixelRate0.cpp
MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
@ -156,6 +157,21 @@ rasterizer/archrast/gen_ar_eventhandlerfile.h: rasterizer/scripts/gen_archrast.p
--output rasterizer/archrast/gen_ar_eventhandlerfile.h \
--gen_eventhandlerfile_h
# 5 SWR_MULTISAMPLE_TYPE_COUNT
# 2 SWR_MSAA_SAMPLE_PATTERN_COUNT
# 3 SWR_INPUT_COVERAGE_COUNT
# 2 centroid
# 2 forcedSampleCount
# 2 canEarlyZ
rasterizer/core/BackendPixelRate0.cpp: rasterizer/scripts/gen_backends.py rasterizer/scripts/templates/backend_template.cpp
$(MKDIR_GEN)
$(PYTHON_GEN) \
$(srcdir)/rasterizer/scripts/gen_backends.py \
--outdir rasterizer/core \
--dim 5 2 3 2 2 2 \
--split 0 \
--cpp
COMMON_LIBADD = \
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(top_builddir)/src/mesa/libmesagallium.la \
@ -250,6 +266,7 @@ EXTRA_DIST = \
rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
rasterizer/jitter/scripts/gen_llvm_types.py \
rasterizer/scripts/gen_archrast.py \
rasterizer/scripts/gen_backends.py \
rasterizer/scripts/gen_knobs.py \
rasterizer/scripts/knob_defs.py \
rasterizer/scripts/mako/ast.py \
@ -273,4 +290,5 @@ EXTRA_DIST = \
rasterizer/scripts/templates/ar_event_h.template \
rasterizer/scripts/templates/ar_event_cpp.template \
rasterizer/scripts/templates/ar_eventhandler_h.template \
rasterizer/scripts/templates/ar_eventhandlerfile_h.template
rasterizer/scripts/templates/ar_eventhandlerfile_h.template \
rasterizer/scripts/templates/backend_template.cpp

View file

@ -132,12 +132,25 @@ env.CodeGenerate(
command = python_cmd + ' $SCRIPT --proto $SOURCE --output $TARGET --gen_eventhandlerfile_h'
)
# 5 SWR_MULTISAMPLE_TYPE_COUNT
# 2 SWR_MSAA_SAMPLE_PATTERN_COUNT
# 3 SWR_INPUT_COVERAGE_COUNT
# 2 centroid
# 2 forcedSampleCount
# 2 canEarlyZ
env.CodeGenerate(
target = 'rasterizer/core/BackendPixelRate0.cpp',
script = swrroot + 'rasterizer/scripts/gen_backends.py',
command = python_cmd + ' $SCRIPT --output rasterizer/core --dim 5 2 3 2 2 2 --split 0 --cpp'
)
# Auto-generated .cpp files (that need to generate object files)
built_sources = [
'rasterizer/scripts/gen_knobs.cpp',
'rasterizer/jitter/builder_gen.cpp',
'rasterizer/jitter/builder_x86.cpp',
'rasterizer/archrast/gen_ar_event.cpp',
'rasterizer/core/BackendPixelRate0.cpp',
]
source = built_sources

View file

@ -30,7 +30,6 @@
#include <smmintrin.h>
#include "backend.h"
#include "depthstencil.h"
#include "tilemgr.h"
#include "memory/tilingtraits.h"
#include "core/multisample.h"
@ -862,203 +861,6 @@ Endtile:
AR_END(BESampleRateBackend, 0);
}
template<typename T>
void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(BEPixelRateBackend, pDC->drawId);
AR_BEGIN(BESetup, pDC->drawId);
const API_STATE &state = GetApiState(pDC);
BarycentricCoeffs coeffs;
SetupBarycentricCoeffs(&coeffs, work);
uint8_t *pColorBuffer[SWR_NUM_RENDERTARGETS], *pDepthBuffer, *pStencilBuffer;
SetupRenderBuffers(pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
SWR_PS_CONTEXT psContext;
SetupPixelShaderContext<T>(&psContext, work);
AR_END(BESetup, 0);
PixelRateZTestLoop<T> PixelRateZTest(pDC, workerId, work, coeffs, state, pDepthBuffer, pStencilBuffer, state.rastState.clipDistanceMask);
psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
for(uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
{
psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps(static_cast<float>(x)));
const simdscalar dx = _simd_set1_ps(static_cast<float>(SIMD_TILE_X_DIM));
for(uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
{
#if USE_8x2_TILE_BACKEND
const bool useAlternateOffset = ((xx & SIMD_TILE_X_DIM) != 0);
#endif
simdscalar activeLanes;
if(!(work.anyCoveredSamples & MASK)) {goto Endtile;};
activeLanes = vMask(work.anyCoveredSamples & MASK);
if (T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
{
const uint64_t* pCoverageMask = (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE) ? &work.innerCoverageMask : &work.coverageMask[0];
generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
}
AR_BEGIN(BEBarycentric, pDC->drawId);
CalcPixelBarycentrics(coeffs, psContext);
CalcCentroid<T, false>(&psContext, coeffs, work.coverageMask, state.blendState.sampleMask);
AR_END(BEBarycentric, 0);
if(T::bForcedSampleCount)
{
// candidate pixels (that passed coverage) will cause shader invocation if any bits in the samplemask are set
const simdscalar vSampleMask = _simd_castsi_ps(_simd_cmpgt_epi32(_simd_set1_epi32(state.blendState.sampleMask), _simd_setzero_si()));
activeLanes = _simd_and_ps(activeLanes, vSampleMask);
}
// Early-Z?
if(T::bCanEarlyZ && !T::bForcedSampleCount)
{
uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BEEarlyDepthTest);
UPDATE_STAT_BE(DepthPassCount, depthPassCount);
AR_EVENT(EarlyDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes)));
}
// if we have no covered samples that passed depth at this point, go to next tile
if(!_simd_movemask_ps(activeLanes)) { goto Endtile; };
if(state.psState.usesSourceDepth)
{
AR_BEGIN(BEBarycentric, pDC->drawId);
// interpolate and quantize z
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
AR_END(BEBarycentric, 0);
}
// pixels that are currently active
psContext.activeMask = _simd_castps_si(activeLanes);
psContext.oMask = T::MultisampleT::FullSampleMask();
// execute pixel shader
AR_BEGIN(BEPixelShader, pDC->drawId);
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
AR_END(BEPixelShader, 0);
// update active lanes to remove any discarded or oMask'd pixels
activeLanes = _simd_castsi_ps(_simd_and_si(psContext.activeMask, _simd_cmpgt_epi32(psContext.oMask, _simd_setzero_si())));
if(!_simd_movemask_ps(activeLanes)) { goto Endtile; };
// late-Z
if(!T::bCanEarlyZ && !T::bForcedSampleCount)
{
uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BELateDepthTest);
UPDATE_STAT_BE(DepthPassCount, depthPassCount);
AR_EVENT(LateDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes)));
}
// if we have no covered samples that passed depth at this point, skip OM and go to next tile
if(!_simd_movemask_ps(activeLanes)) { goto Endtile; };
// output merger
// loop over all samples, broadcasting the results of the PS to all passing pixels
for(uint32_t sample = 0; sample < GetNumOMSamples<T>(state.blendState.sampleCount); sample++)
{
AR_BEGIN(BEOutputMerger, pDC->drawId);
// center pattern does a single coverage/depth/stencil test, standard pattern tests all samples
uint32_t coverageSampleNum = (T::bIsStandardPattern) ? sample : 0;
simdscalar coverageMask, depthMask;
if(T::bForcedSampleCount)
{
coverageMask = depthMask = activeLanes;
}
else
{
coverageMask = PixelRateZTest.vCoverageMask[coverageSampleNum];
depthMask = PixelRateZTest.depthPassMask[coverageSampleNum];
if(!_simd_movemask_ps(depthMask))
{
// stencil should already have been written in early/lateZ tests
AR_END(BEOutputMerger, 0);
continue;
}
}
// broadcast the results of the PS to all passing pixels
#if USE_8x2_TILE_BACKEND
OutputMerger8x2(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
#else
OutputMerger4x2(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets);
#endif
if(!state.psState.forceEarlyZ && !T::bForcedSampleCount)
{
uint8_t *pDepthSample = pDepthBuffer + RasterTileDepthOffset(sample);
uint8_t * pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, PixelRateZTest.vZ[coverageSampleNum],
pDepthSample, depthMask, coverageMask, pStencilSample, PixelRateZTest.stencilPassMask[coverageSampleNum]);
}
AR_END(BEOutputMerger, 0);
}
Endtile:
AR_BEGIN(BEEndTile, pDC->drawId);
for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
{
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
}
if(T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
{
work.innerCoverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
}
work.anyCoveredSamples >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
#if USE_8x2_TILE_BACKEND
if (useAlternateOffset)
{
for (uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
{
pColorBuffer[rt] += (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
}
#else
for(uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
{
pColorBuffer[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
#endif
AR_END(BEEndTile, 0);
psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
}
psContext.vY.UL = _simd_add_ps(psContext.vY.UL, dy);
psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
}
AR_END(BEPixelRateBackend, 0);
}
// optimized backend flow with NULL PS
template<uint32_t sampleCountT>
void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
@ -1302,31 +1104,6 @@ void InitBackendSingleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_INPUT_COVERAGE_COU
}
}
void InitBackendPixelFuncTable(PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_COUNT][SWR_MSAA_SAMPLE_PATTERN_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2][2])
{
for(uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < SWR_MULTISAMPLE_TYPE_COUNT; sampleCount++)
{
for(uint32_t samplePattern = SWR_MSAA_CENTER_PATTERN; samplePattern < SWR_MSAA_SAMPLE_PATTERN_COUNT; samplePattern++)
{
for(uint32_t inputCoverage = 0; inputCoverage < SWR_INPUT_COVERAGE_COUNT; inputCoverage++)
{
for(uint32_t isCentroid = 0; isCentroid < 2; isCentroid++)
{
for(uint32_t forcedSampleCount = 0; forcedSampleCount < 2; forcedSampleCount++)
{
for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
{
table[sampleCount][samplePattern][inputCoverage][isCentroid][forcedSampleCount][canEarlyZ] =
BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, (SWR_MSAA_SAMPLE_PATTERN)samplePattern, (SWR_INPUT_COVERAGE)inputCoverage,
(isCentroid > 0), (forcedSampleCount > 0), (canEarlyZ > 0), SWR_BACKEND_MSAA_PIXEL_RATE);
}
}
}
}
}
}
}
void InitBackendSampleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2])
{
for(uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < SWR_MULTISAMPLE_TYPE_COUNT; sampleCount++)
@ -1346,10 +1123,11 @@ void InitBackendSampleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_C
}
}
void InitBackendPixelRate0();
void InitBackendFuncTables()
{
InitBackendSingleFuncTable(gBackendSingleSample);
InitBackendPixelFuncTable(gBackendPixelRateTable);
InitBackendPixelRate0();
InitBackendSampleFuncTable(gBackendSampleRateTable);
gBackendNullPs[SWR_MULTISAMPLE_1X] = &BackendNullPS < SWR_MULTISAMPLE_1X > ;

View file

@ -31,6 +31,7 @@
#include "common/os.h"
#include "core/context.h"
#include "core/multisample.h"
#include "depthstencil.h"
#include "rdtsc_core.h"
void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer);
@ -835,6 +836,204 @@ INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW
}
#endif
template<typename T>
void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(BEPixelRateBackend, pDC->drawId);
AR_BEGIN(BESetup, pDC->drawId);
const API_STATE &state = GetApiState(pDC);
BarycentricCoeffs coeffs;
SetupBarycentricCoeffs(&coeffs, work);
uint8_t *pColorBuffer[SWR_NUM_RENDERTARGETS], *pDepthBuffer, *pStencilBuffer;
SetupRenderBuffers(pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
SWR_PS_CONTEXT psContext;
SetupPixelShaderContext<T>(&psContext, work);
AR_END(BESetup, 0);
PixelRateZTestLoop<T> PixelRateZTest(pDC, workerId, work, coeffs, state, pDepthBuffer, pStencilBuffer, state.rastState.clipDistanceMask);
psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
for(uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
{
psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps(static_cast<float>(x)));
const simdscalar dx = _simd_set1_ps(static_cast<float>(SIMD_TILE_X_DIM));
for(uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
{
#if USE_8x2_TILE_BACKEND
const bool useAlternateOffset = ((xx & SIMD_TILE_X_DIM) != 0);
#endif
simdscalar activeLanes;
if(!(work.anyCoveredSamples & MASK)) {goto Endtile;};
activeLanes = vMask(work.anyCoveredSamples & MASK);
if (T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
{
const uint64_t* pCoverageMask = (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE) ? &work.innerCoverageMask : &work.coverageMask[0];
generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
}
AR_BEGIN(BEBarycentric, pDC->drawId);
CalcPixelBarycentrics(coeffs, psContext);
CalcCentroid<T, false>(&psContext, coeffs, work.coverageMask, state.blendState.sampleMask);
AR_END(BEBarycentric, 0);
if(T::bForcedSampleCount)
{
// candidate pixels (that passed coverage) will cause shader invocation if any bits in the samplemask are set
const simdscalar vSampleMask = _simd_castsi_ps(_simd_cmpgt_epi32(_simd_set1_epi32(state.blendState.sampleMask), _simd_setzero_si()));
activeLanes = _simd_and_ps(activeLanes, vSampleMask);
}
// Early-Z?
if(T::bCanEarlyZ && !T::bForcedSampleCount)
{
uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BEEarlyDepthTest);
UPDATE_STAT_BE(DepthPassCount, depthPassCount);
AR_EVENT(EarlyDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes)));
}
// if we have no covered samples that passed depth at this point, go to next tile
if(!_simd_movemask_ps(activeLanes)) { goto Endtile; };
if(state.psState.usesSourceDepth)
{
AR_BEGIN(BEBarycentric, pDC->drawId);
// interpolate and quantize z
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
AR_END(BEBarycentric, 0);
}
// pixels that are currently active
psContext.activeMask = _simd_castps_si(activeLanes);
psContext.oMask = T::MultisampleT::FullSampleMask();
// execute pixel shader
AR_BEGIN(BEPixelShader, pDC->drawId);
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
AR_END(BEPixelShader, 0);
// update active lanes to remove any discarded or oMask'd pixels
activeLanes = _simd_castsi_ps(_simd_and_si(psContext.activeMask, _simd_cmpgt_epi32(psContext.oMask, _simd_setzero_si())));
if(!_simd_movemask_ps(activeLanes)) { goto Endtile; };
// late-Z
if(!T::bCanEarlyZ && !T::bForcedSampleCount)
{
uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BELateDepthTest);
UPDATE_STAT_BE(DepthPassCount, depthPassCount);
AR_EVENT(LateDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes)));
}
// if we have no covered samples that passed depth at this point, skip OM and go to next tile
if(!_simd_movemask_ps(activeLanes)) { goto Endtile; };
// output merger
// loop over all samples, broadcasting the results of the PS to all passing pixels
for(uint32_t sample = 0; sample < GetNumOMSamples<T>(state.blendState.sampleCount); sample++)
{
AR_BEGIN(BEOutputMerger, pDC->drawId);
// center pattern does a single coverage/depth/stencil test, standard pattern tests all samples
uint32_t coverageSampleNum = (T::bIsStandardPattern) ? sample : 0;
simdscalar coverageMask, depthMask;
if(T::bForcedSampleCount)
{
coverageMask = depthMask = activeLanes;
}
else
{
coverageMask = PixelRateZTest.vCoverageMask[coverageSampleNum];
depthMask = PixelRateZTest.depthPassMask[coverageSampleNum];
if(!_simd_movemask_ps(depthMask))
{
// stencil should already have been written in early/lateZ tests
AR_END(BEOutputMerger, 0);
continue;
}
}
// broadcast the results of the PS to all passing pixels
#if USE_8x2_TILE_BACKEND
OutputMerger8x2(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
#else
OutputMerger4x2(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets);
#endif
if(!state.psState.forceEarlyZ && !T::bForcedSampleCount)
{
uint8_t *pDepthSample = pDepthBuffer + RasterTileDepthOffset(sample);
uint8_t * pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, PixelRateZTest.vZ[coverageSampleNum],
pDepthSample, depthMask, coverageMask, pStencilSample, PixelRateZTest.stencilPassMask[coverageSampleNum]);
}
AR_END(BEOutputMerger, 0);
}
Endtile:
AR_BEGIN(BEEndTile, pDC->drawId);
for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
{
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
}
if(T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
{
work.innerCoverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
}
work.anyCoveredSamples >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
#if USE_8x2_TILE_BACKEND
if (useAlternateOffset)
{
for (uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
{
pColorBuffer[rt] += (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
}
#else
for(uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
{
pColorBuffer[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
#endif
AR_END(BEEndTile, 0);
psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
}
psContext.vY.UL = _simd_add_ps(psContext.vY.UL, dy);
psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
}
AR_END(BEPixelRateBackend, 0);
}
template<uint32_t sampleCountT = SWR_MULTISAMPLE_1X, uint32_t samplePattern = SWR_MSAA_STANDARD_PATTERN,
uint32_t coverage = 0, uint32_t centroid = 0, uint32_t forced = 0, uint32_t canEarlyZ = 0>
struct SwrBackendTraits

View file

@ -0,0 +1,125 @@
# Copyright (C) 2017 Intel Corporation. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
# Python source
# Compatible with Python2.X and Python3.X
from __future__ import print_function
import itertools
import math
import argparse
import os
import sys
from mako.template import Template
from mako.exceptions import RichTraceback
def write_template_to_string(template_filename, **kwargs):
try:
template = Template(filename=os.path.abspath(template_filename))
# Split + Join fixes line-endings for whatever platform you are using
return '\n'.join(template.render(**kwargs).splitlines())
except:
traceback = RichTraceback()
for (filename, lineno, function, line) in traceback.traceback:
print("File %s, line %s, in %s" % (filename, lineno, function))
print(line, "\n")
print("%s: %s" % (str(traceback.error.__class__.__name__), traceback.error))
def write_template_to_file(template_filename, output_filename, **kwargs):
output_dirname = os.path.dirname(output_filename)
if not os.path.exists(output_dirname):
os.makedirs(output_dirname)
with open(output_filename, "w") as outfile:
print(write_template_to_string(template_filename, **kwargs), file=outfile)
def main(args=sys.argv[1:]):
thisDir = os.path.dirname(os.path.realpath(__file__))
parser = argparse.ArgumentParser("Generate files and initialization functions for all permutuations of BackendPixelRate.")
parser.add_argument('--dim', help="gBackendPixelRateTable array dimensions", nargs='+', type=int, required=True)
parser.add_argument('--outdir', help="output directory", nargs='?', type=str, default=thisDir)
parser.add_argument('--split', help="how many lines of initialization per file [0=no split]", nargs='?', type=int, default='512')
parser.add_argument('--cpp', help="Generate cpp file(s)", action='store_true', default=False)
parser.add_argument('--cmake', help="Generate cmake file", action='store_true', default=False)
args = parser.parse_args(args);
output_list = []
for x in args.dim:
output_list.append(list(range(x)))
# generate all permutations possible for template paremeter inputs
output_combinations = list(itertools.product(*output_list))
output_list = []
# for each permutation
for x in range(len(output_combinations)):
# separate each template peram into its own list member
new_list = [output_combinations[x][i] for i in range(len(output_combinations[x]))]
tempStr = 'gBackendPixelRateTable'
#print each list member as an index in the multidimensional array
for i in new_list:
tempStr += '[' + str(i) + ']'
#map each entry in the permuation as its own string member, store as the template instantiation string
tempStr += " = BackendPixelRate<SwrBackendTraits<" + ','.join(map(str, output_combinations[x])) + '>>;'
#append the line of c++ code in the list of output lines
output_list.append(tempStr)
# how many files should we split the global template initialization into?
if (args.split == 0):
numFiles = 1
else:
numFiles = (len(output_list) + args.split - 1) // args.split
linesPerFile = (len(output_list) + numFiles - 1) // numFiles
chunkedList = [output_list[x:x+linesPerFile] for x in range(0, len(output_list), linesPerFile)]
# generate .cpp files
if args.cpp:
baseCppName = os.path.join(args.outdir, 'BackendPixelRate%s.cpp')
templateCpp = os.path.join(thisDir, 'templates', 'backend_template.cpp')
for fileNum in range(numFiles):
filename = baseCppName % str(fileNum)
print('Generating', filename)
write_template_to_file(
templateCpp,
baseCppName % str(fileNum),
fileNum=fileNum,
funcList=chunkedList[fileNum])
# generate gen_backend.cmake file
if args.cmake:
templateCmake = os.path.join(thisDir, 'templates', 'backend_template.cmake')
cmakeFile = os.path.join(args.outdir, 'gen_backends.cmake')
print('Generating', cmakeFile)
write_template_to_file(
templateCmake,
cmakeFile,
numFiles=numFiles,
baseCppName=baseCppName.replace('\\','/'))
print("Generated %d template instantiations in %d files" % (len(output_list), numFiles))
return 0
if __name__ == '__main__':
sys.exit(main())

View file

@ -0,0 +1,38 @@
/****************************************************************************
* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* @file BackendPixelRate${fileNum}.cpp
*
* @brief auto-generated file
*
* DO NOT EDIT
*
******************************************************************************/
#include "core/backend.h"
void InitBackendPixelRate${fileNum}()
{
%for func in funcList:
${func}
%endfor
}