mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 06:30:10 +01:00
swr: [rasterizer core/scripts] Autogen backend initialization function(s)
Autogen functions that instantiates different BackendPixelRate templates. Functions get split into separate files after reaching a user defined threshold (currently 512 per file) to speed up compilation. This change will enable the addition of more template flags in the pixel back end. Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
2c820d22cf
commit
d2759c1eb3
7 changed files with 398 additions and 226 deletions
1
src/gallium/drivers/swr/.gitignore
vendored
1
src/gallium/drivers/swr/.gitignore
vendored
|
|
@ -10,3 +10,4 @@ rasterizer/jitter/builder_x86.h
|
|||
rasterizer/jitter/state_llvm.h
|
||||
rasterizer/scripts/gen_knobs.cpp
|
||||
rasterizer/scripts/gen_knobs.h
|
||||
rasterizer/core/BackendPixelRate0.cpp
|
||||
|
|
|
|||
|
|
@ -61,7 +61,8 @@ BUILT_SOURCES = \
|
|||
rasterizer/archrast/gen_ar_event.h \
|
||||
rasterizer/archrast/gen_ar_event.cpp \
|
||||
rasterizer/archrast/gen_ar_eventhandler.h \
|
||||
rasterizer/archrast/gen_ar_eventhandlerfile.h
|
||||
rasterizer/archrast/gen_ar_eventhandlerfile.h \
|
||||
rasterizer/core/BackendPixelRate0.cpp
|
||||
|
||||
MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
|
||||
PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
|
||||
|
|
@ -156,6 +157,21 @@ rasterizer/archrast/gen_ar_eventhandlerfile.h: rasterizer/scripts/gen_archrast.p
|
|||
--output rasterizer/archrast/gen_ar_eventhandlerfile.h \
|
||||
--gen_eventhandlerfile_h
|
||||
|
||||
# 5 SWR_MULTISAMPLE_TYPE_COUNT
|
||||
# 2 SWR_MSAA_SAMPLE_PATTERN_COUNT
|
||||
# 3 SWR_INPUT_COVERAGE_COUNT
|
||||
# 2 centroid
|
||||
# 2 forcedSampleCount
|
||||
# 2 canEarlyZ
|
||||
rasterizer/core/BackendPixelRate0.cpp: rasterizer/scripts/gen_backends.py rasterizer/scripts/templates/backend_template.cpp
|
||||
$(MKDIR_GEN)
|
||||
$(PYTHON_GEN) \
|
||||
$(srcdir)/rasterizer/scripts/gen_backends.py \
|
||||
--outdir rasterizer/core \
|
||||
--dim 5 2 3 2 2 2 \
|
||||
--split 0 \
|
||||
--cpp
|
||||
|
||||
COMMON_LIBADD = \
|
||||
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
|
||||
$(top_builddir)/src/mesa/libmesagallium.la \
|
||||
|
|
@ -250,6 +266,7 @@ EXTRA_DIST = \
|
|||
rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
|
||||
rasterizer/jitter/scripts/gen_llvm_types.py \
|
||||
rasterizer/scripts/gen_archrast.py \
|
||||
rasterizer/scripts/gen_backends.py \
|
||||
rasterizer/scripts/gen_knobs.py \
|
||||
rasterizer/scripts/knob_defs.py \
|
||||
rasterizer/scripts/mako/ast.py \
|
||||
|
|
@ -273,4 +290,5 @@ EXTRA_DIST = \
|
|||
rasterizer/scripts/templates/ar_event_h.template \
|
||||
rasterizer/scripts/templates/ar_event_cpp.template \
|
||||
rasterizer/scripts/templates/ar_eventhandler_h.template \
|
||||
rasterizer/scripts/templates/ar_eventhandlerfile_h.template
|
||||
rasterizer/scripts/templates/ar_eventhandlerfile_h.template \
|
||||
rasterizer/scripts/templates/backend_template.cpp
|
||||
|
|
|
|||
|
|
@ -132,12 +132,25 @@ env.CodeGenerate(
|
|||
command = python_cmd + ' $SCRIPT --proto $SOURCE --output $TARGET --gen_eventhandlerfile_h'
|
||||
)
|
||||
|
||||
# 5 SWR_MULTISAMPLE_TYPE_COUNT
|
||||
# 2 SWR_MSAA_SAMPLE_PATTERN_COUNT
|
||||
# 3 SWR_INPUT_COVERAGE_COUNT
|
||||
# 2 centroid
|
||||
# 2 forcedSampleCount
|
||||
# 2 canEarlyZ
|
||||
env.CodeGenerate(
|
||||
target = 'rasterizer/core/BackendPixelRate0.cpp',
|
||||
script = swrroot + 'rasterizer/scripts/gen_backends.py',
|
||||
command = python_cmd + ' $SCRIPT --output rasterizer/core --dim 5 2 3 2 2 2 --split 0 --cpp'
|
||||
)
|
||||
|
||||
# Auto-generated .cpp files (that need to generate object files)
|
||||
built_sources = [
|
||||
'rasterizer/scripts/gen_knobs.cpp',
|
||||
'rasterizer/jitter/builder_gen.cpp',
|
||||
'rasterizer/jitter/builder_x86.cpp',
|
||||
'rasterizer/archrast/gen_ar_event.cpp',
|
||||
'rasterizer/core/BackendPixelRate0.cpp',
|
||||
]
|
||||
|
||||
source = built_sources
|
||||
|
|
|
|||
|
|
@ -30,7 +30,6 @@
|
|||
#include <smmintrin.h>
|
||||
|
||||
#include "backend.h"
|
||||
#include "depthstencil.h"
|
||||
#include "tilemgr.h"
|
||||
#include "memory/tilingtraits.h"
|
||||
#include "core/multisample.h"
|
||||
|
|
@ -862,203 +861,6 @@ Endtile:
|
|||
|
||||
AR_END(BESampleRateBackend, 0);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
AR_BEGIN(BEPixelRateBackend, pDC->drawId);
|
||||
AR_BEGIN(BESetup, pDC->drawId);
|
||||
|
||||
const API_STATE &state = GetApiState(pDC);
|
||||
|
||||
BarycentricCoeffs coeffs;
|
||||
SetupBarycentricCoeffs(&coeffs, work);
|
||||
|
||||
uint8_t *pColorBuffer[SWR_NUM_RENDERTARGETS], *pDepthBuffer, *pStencilBuffer;
|
||||
SetupRenderBuffers(pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
|
||||
|
||||
SWR_PS_CONTEXT psContext;
|
||||
SetupPixelShaderContext<T>(&psContext, work);
|
||||
|
||||
AR_END(BESetup, 0);
|
||||
|
||||
PixelRateZTestLoop<T> PixelRateZTest(pDC, workerId, work, coeffs, state, pDepthBuffer, pStencilBuffer, state.rastState.clipDistanceMask);
|
||||
|
||||
psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
|
||||
psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
|
||||
|
||||
const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
|
||||
|
||||
for(uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
|
||||
{
|
||||
psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
|
||||
psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps(static_cast<float>(x)));
|
||||
|
||||
const simdscalar dx = _simd_set1_ps(static_cast<float>(SIMD_TILE_X_DIM));
|
||||
|
||||
for(uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
|
||||
{
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
const bool useAlternateOffset = ((xx & SIMD_TILE_X_DIM) != 0);
|
||||
|
||||
#endif
|
||||
simdscalar activeLanes;
|
||||
if(!(work.anyCoveredSamples & MASK)) {goto Endtile;};
|
||||
activeLanes = vMask(work.anyCoveredSamples & MASK);
|
||||
|
||||
if (T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
|
||||
{
|
||||
const uint64_t* pCoverageMask = (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE) ? &work.innerCoverageMask : &work.coverageMask[0];
|
||||
|
||||
generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
|
||||
}
|
||||
|
||||
AR_BEGIN(BEBarycentric, pDC->drawId);
|
||||
|
||||
CalcPixelBarycentrics(coeffs, psContext);
|
||||
|
||||
CalcCentroid<T, false>(&psContext, coeffs, work.coverageMask, state.blendState.sampleMask);
|
||||
|
||||
AR_END(BEBarycentric, 0);
|
||||
|
||||
if(T::bForcedSampleCount)
|
||||
{
|
||||
// candidate pixels (that passed coverage) will cause shader invocation if any bits in the samplemask are set
|
||||
const simdscalar vSampleMask = _simd_castsi_ps(_simd_cmpgt_epi32(_simd_set1_epi32(state.blendState.sampleMask), _simd_setzero_si()));
|
||||
activeLanes = _simd_and_ps(activeLanes, vSampleMask);
|
||||
}
|
||||
|
||||
// Early-Z?
|
||||
if(T::bCanEarlyZ && !T::bForcedSampleCount)
|
||||
{
|
||||
uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BEEarlyDepthTest);
|
||||
UPDATE_STAT_BE(DepthPassCount, depthPassCount);
|
||||
AR_EVENT(EarlyDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes)));
|
||||
}
|
||||
|
||||
// if we have no covered samples that passed depth at this point, go to next tile
|
||||
if(!_simd_movemask_ps(activeLanes)) { goto Endtile; };
|
||||
|
||||
if(state.psState.usesSourceDepth)
|
||||
{
|
||||
AR_BEGIN(BEBarycentric, pDC->drawId);
|
||||
// interpolate and quantize z
|
||||
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
|
||||
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
|
||||
AR_END(BEBarycentric, 0);
|
||||
}
|
||||
|
||||
// pixels that are currently active
|
||||
psContext.activeMask = _simd_castps_si(activeLanes);
|
||||
psContext.oMask = T::MultisampleT::FullSampleMask();
|
||||
|
||||
// execute pixel shader
|
||||
AR_BEGIN(BEPixelShader, pDC->drawId);
|
||||
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
|
||||
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
|
||||
AR_END(BEPixelShader, 0);
|
||||
|
||||
// update active lanes to remove any discarded or oMask'd pixels
|
||||
activeLanes = _simd_castsi_ps(_simd_and_si(psContext.activeMask, _simd_cmpgt_epi32(psContext.oMask, _simd_setzero_si())));
|
||||
if(!_simd_movemask_ps(activeLanes)) { goto Endtile; };
|
||||
|
||||
// late-Z
|
||||
if(!T::bCanEarlyZ && !T::bForcedSampleCount)
|
||||
{
|
||||
uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BELateDepthTest);
|
||||
UPDATE_STAT_BE(DepthPassCount, depthPassCount);
|
||||
AR_EVENT(LateDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes)));
|
||||
}
|
||||
|
||||
// if we have no covered samples that passed depth at this point, skip OM and go to next tile
|
||||
if(!_simd_movemask_ps(activeLanes)) { goto Endtile; };
|
||||
|
||||
// output merger
|
||||
// loop over all samples, broadcasting the results of the PS to all passing pixels
|
||||
for(uint32_t sample = 0; sample < GetNumOMSamples<T>(state.blendState.sampleCount); sample++)
|
||||
{
|
||||
AR_BEGIN(BEOutputMerger, pDC->drawId);
|
||||
// center pattern does a single coverage/depth/stencil test, standard pattern tests all samples
|
||||
uint32_t coverageSampleNum = (T::bIsStandardPattern) ? sample : 0;
|
||||
simdscalar coverageMask, depthMask;
|
||||
if(T::bForcedSampleCount)
|
||||
{
|
||||
coverageMask = depthMask = activeLanes;
|
||||
}
|
||||
else
|
||||
{
|
||||
coverageMask = PixelRateZTest.vCoverageMask[coverageSampleNum];
|
||||
depthMask = PixelRateZTest.depthPassMask[coverageSampleNum];
|
||||
if(!_simd_movemask_ps(depthMask))
|
||||
{
|
||||
// stencil should already have been written in early/lateZ tests
|
||||
AR_END(BEOutputMerger, 0);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// broadcast the results of the PS to all passing pixels
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
OutputMerger8x2(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
|
||||
#else
|
||||
OutputMerger4x2(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets);
|
||||
#endif
|
||||
|
||||
if(!state.psState.forceEarlyZ && !T::bForcedSampleCount)
|
||||
{
|
||||
uint8_t *pDepthSample = pDepthBuffer + RasterTileDepthOffset(sample);
|
||||
uint8_t * pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
|
||||
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, PixelRateZTest.vZ[coverageSampleNum],
|
||||
pDepthSample, depthMask, coverageMask, pStencilSample, PixelRateZTest.stencilPassMask[coverageSampleNum]);
|
||||
}
|
||||
AR_END(BEOutputMerger, 0);
|
||||
}
|
||||
Endtile:
|
||||
AR_BEGIN(BEEndTile, pDC->drawId);
|
||||
|
||||
for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
|
||||
{
|
||||
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
}
|
||||
|
||||
if(T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
|
||||
{
|
||||
work.innerCoverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
}
|
||||
work.anyCoveredSamples >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
if (useAlternateOffset)
|
||||
{
|
||||
for (uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
|
||||
{
|
||||
pColorBuffer[rt] += (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
}
|
||||
}
|
||||
#else
|
||||
for(uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
|
||||
{
|
||||
pColorBuffer[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
}
|
||||
pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
#endif
|
||||
|
||||
AR_END(BEEndTile, 0);
|
||||
|
||||
psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
|
||||
psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
|
||||
}
|
||||
|
||||
psContext.vY.UL = _simd_add_ps(psContext.vY.UL, dy);
|
||||
psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
|
||||
}
|
||||
|
||||
AR_END(BEPixelRateBackend, 0);
|
||||
}
|
||||
// optimized backend flow with NULL PS
|
||||
template<uint32_t sampleCountT>
|
||||
void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
|
||||
|
|
@ -1302,31 +1104,6 @@ void InitBackendSingleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_INPUT_COVERAGE_COU
|
|||
}
|
||||
}
|
||||
|
||||
void InitBackendPixelFuncTable(PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_COUNT][SWR_MSAA_SAMPLE_PATTERN_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2][2])
|
||||
{
|
||||
for(uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < SWR_MULTISAMPLE_TYPE_COUNT; sampleCount++)
|
||||
{
|
||||
for(uint32_t samplePattern = SWR_MSAA_CENTER_PATTERN; samplePattern < SWR_MSAA_SAMPLE_PATTERN_COUNT; samplePattern++)
|
||||
{
|
||||
for(uint32_t inputCoverage = 0; inputCoverage < SWR_INPUT_COVERAGE_COUNT; inputCoverage++)
|
||||
{
|
||||
for(uint32_t isCentroid = 0; isCentroid < 2; isCentroid++)
|
||||
{
|
||||
for(uint32_t forcedSampleCount = 0; forcedSampleCount < 2; forcedSampleCount++)
|
||||
{
|
||||
for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
|
||||
{
|
||||
table[sampleCount][samplePattern][inputCoverage][isCentroid][forcedSampleCount][canEarlyZ] =
|
||||
BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, (SWR_MSAA_SAMPLE_PATTERN)samplePattern, (SWR_INPUT_COVERAGE)inputCoverage,
|
||||
(isCentroid > 0), (forcedSampleCount > 0), (canEarlyZ > 0), SWR_BACKEND_MSAA_PIXEL_RATE);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InitBackendSampleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2])
|
||||
{
|
||||
for(uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < SWR_MULTISAMPLE_TYPE_COUNT; sampleCount++)
|
||||
|
|
@ -1346,10 +1123,11 @@ void InitBackendSampleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_C
|
|||
}
|
||||
}
|
||||
|
||||
void InitBackendPixelRate0();
|
||||
void InitBackendFuncTables()
|
||||
{
|
||||
InitBackendSingleFuncTable(gBackendSingleSample);
|
||||
InitBackendPixelFuncTable(gBackendPixelRateTable);
|
||||
InitBackendPixelRate0();
|
||||
InitBackendSampleFuncTable(gBackendSampleRateTable);
|
||||
|
||||
gBackendNullPs[SWR_MULTISAMPLE_1X] = &BackendNullPS < SWR_MULTISAMPLE_1X > ;
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
#include "common/os.h"
|
||||
#include "core/context.h"
|
||||
#include "core/multisample.h"
|
||||
#include "depthstencil.h"
|
||||
#include "rdtsc_core.h"
|
||||
|
||||
void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer);
|
||||
|
|
@ -835,6 +836,204 @@ INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW
|
|||
}
|
||||
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
AR_BEGIN(BEPixelRateBackend, pDC->drawId);
|
||||
AR_BEGIN(BESetup, pDC->drawId);
|
||||
|
||||
const API_STATE &state = GetApiState(pDC);
|
||||
|
||||
BarycentricCoeffs coeffs;
|
||||
SetupBarycentricCoeffs(&coeffs, work);
|
||||
|
||||
uint8_t *pColorBuffer[SWR_NUM_RENDERTARGETS], *pDepthBuffer, *pStencilBuffer;
|
||||
SetupRenderBuffers(pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
|
||||
|
||||
SWR_PS_CONTEXT psContext;
|
||||
SetupPixelShaderContext<T>(&psContext, work);
|
||||
|
||||
AR_END(BESetup, 0);
|
||||
|
||||
PixelRateZTestLoop<T> PixelRateZTest(pDC, workerId, work, coeffs, state, pDepthBuffer, pStencilBuffer, state.rastState.clipDistanceMask);
|
||||
|
||||
psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
|
||||
psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
|
||||
|
||||
const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
|
||||
|
||||
for(uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
|
||||
{
|
||||
psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
|
||||
psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps(static_cast<float>(x)));
|
||||
|
||||
const simdscalar dx = _simd_set1_ps(static_cast<float>(SIMD_TILE_X_DIM));
|
||||
|
||||
for(uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
|
||||
{
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
const bool useAlternateOffset = ((xx & SIMD_TILE_X_DIM) != 0);
|
||||
|
||||
#endif
|
||||
simdscalar activeLanes;
|
||||
if(!(work.anyCoveredSamples & MASK)) {goto Endtile;};
|
||||
activeLanes = vMask(work.anyCoveredSamples & MASK);
|
||||
|
||||
if (T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
|
||||
{
|
||||
const uint64_t* pCoverageMask = (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE) ? &work.innerCoverageMask : &work.coverageMask[0];
|
||||
|
||||
generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
|
||||
}
|
||||
|
||||
AR_BEGIN(BEBarycentric, pDC->drawId);
|
||||
|
||||
CalcPixelBarycentrics(coeffs, psContext);
|
||||
|
||||
CalcCentroid<T, false>(&psContext, coeffs, work.coverageMask, state.blendState.sampleMask);
|
||||
|
||||
AR_END(BEBarycentric, 0);
|
||||
|
||||
if(T::bForcedSampleCount)
|
||||
{
|
||||
// candidate pixels (that passed coverage) will cause shader invocation if any bits in the samplemask are set
|
||||
const simdscalar vSampleMask = _simd_castsi_ps(_simd_cmpgt_epi32(_simd_set1_epi32(state.blendState.sampleMask), _simd_setzero_si()));
|
||||
activeLanes = _simd_and_ps(activeLanes, vSampleMask);
|
||||
}
|
||||
|
||||
// Early-Z?
|
||||
if(T::bCanEarlyZ && !T::bForcedSampleCount)
|
||||
{
|
||||
uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BEEarlyDepthTest);
|
||||
UPDATE_STAT_BE(DepthPassCount, depthPassCount);
|
||||
AR_EVENT(EarlyDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes)));
|
||||
}
|
||||
|
||||
// if we have no covered samples that passed depth at this point, go to next tile
|
||||
if(!_simd_movemask_ps(activeLanes)) { goto Endtile; };
|
||||
|
||||
if(state.psState.usesSourceDepth)
|
||||
{
|
||||
AR_BEGIN(BEBarycentric, pDC->drawId);
|
||||
// interpolate and quantize z
|
||||
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
|
||||
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
|
||||
AR_END(BEBarycentric, 0);
|
||||
}
|
||||
|
||||
// pixels that are currently active
|
||||
psContext.activeMask = _simd_castps_si(activeLanes);
|
||||
psContext.oMask = T::MultisampleT::FullSampleMask();
|
||||
|
||||
// execute pixel shader
|
||||
AR_BEGIN(BEPixelShader, pDC->drawId);
|
||||
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
|
||||
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
|
||||
AR_END(BEPixelShader, 0);
|
||||
|
||||
// update active lanes to remove any discarded or oMask'd pixels
|
||||
activeLanes = _simd_castsi_ps(_simd_and_si(psContext.activeMask, _simd_cmpgt_epi32(psContext.oMask, _simd_setzero_si())));
|
||||
if(!_simd_movemask_ps(activeLanes)) { goto Endtile; };
|
||||
|
||||
// late-Z
|
||||
if(!T::bCanEarlyZ && !T::bForcedSampleCount)
|
||||
{
|
||||
uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BELateDepthTest);
|
||||
UPDATE_STAT_BE(DepthPassCount, depthPassCount);
|
||||
AR_EVENT(LateDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes)));
|
||||
}
|
||||
|
||||
// if we have no covered samples that passed depth at this point, skip OM and go to next tile
|
||||
if(!_simd_movemask_ps(activeLanes)) { goto Endtile; };
|
||||
|
||||
// output merger
|
||||
// loop over all samples, broadcasting the results of the PS to all passing pixels
|
||||
for(uint32_t sample = 0; sample < GetNumOMSamples<T>(state.blendState.sampleCount); sample++)
|
||||
{
|
||||
AR_BEGIN(BEOutputMerger, pDC->drawId);
|
||||
// center pattern does a single coverage/depth/stencil test, standard pattern tests all samples
|
||||
uint32_t coverageSampleNum = (T::bIsStandardPattern) ? sample : 0;
|
||||
simdscalar coverageMask, depthMask;
|
||||
if(T::bForcedSampleCount)
|
||||
{
|
||||
coverageMask = depthMask = activeLanes;
|
||||
}
|
||||
else
|
||||
{
|
||||
coverageMask = PixelRateZTest.vCoverageMask[coverageSampleNum];
|
||||
depthMask = PixelRateZTest.depthPassMask[coverageSampleNum];
|
||||
if(!_simd_movemask_ps(depthMask))
|
||||
{
|
||||
// stencil should already have been written in early/lateZ tests
|
||||
AR_END(BEOutputMerger, 0);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// broadcast the results of the PS to all passing pixels
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
OutputMerger8x2(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
|
||||
#else
|
||||
OutputMerger4x2(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets);
|
||||
#endif
|
||||
|
||||
if(!state.psState.forceEarlyZ && !T::bForcedSampleCount)
|
||||
{
|
||||
uint8_t *pDepthSample = pDepthBuffer + RasterTileDepthOffset(sample);
|
||||
uint8_t * pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
|
||||
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, PixelRateZTest.vZ[coverageSampleNum],
|
||||
pDepthSample, depthMask, coverageMask, pStencilSample, PixelRateZTest.stencilPassMask[coverageSampleNum]);
|
||||
}
|
||||
AR_END(BEOutputMerger, 0);
|
||||
}
|
||||
Endtile:
|
||||
AR_BEGIN(BEEndTile, pDC->drawId);
|
||||
|
||||
for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
|
||||
{
|
||||
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
}
|
||||
|
||||
if(T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
|
||||
{
|
||||
work.innerCoverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
}
|
||||
work.anyCoveredSamples >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
if (useAlternateOffset)
|
||||
{
|
||||
for (uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
|
||||
{
|
||||
pColorBuffer[rt] += (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
}
|
||||
}
|
||||
#else
|
||||
for(uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
|
||||
{
|
||||
pColorBuffer[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
}
|
||||
pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
#endif
|
||||
|
||||
AR_END(BEEndTile, 0);
|
||||
|
||||
psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
|
||||
psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
|
||||
}
|
||||
|
||||
psContext.vY.UL = _simd_add_ps(psContext.vY.UL, dy);
|
||||
psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
|
||||
}
|
||||
|
||||
AR_END(BEPixelRateBackend, 0);
|
||||
}
|
||||
|
||||
template<uint32_t sampleCountT = SWR_MULTISAMPLE_1X, uint32_t samplePattern = SWR_MSAA_STANDARD_PATTERN,
|
||||
uint32_t coverage = 0, uint32_t centroid = 0, uint32_t forced = 0, uint32_t canEarlyZ = 0>
|
||||
struct SwrBackendTraits
|
||||
|
|
|
|||
125
src/gallium/drivers/swr/rasterizer/scripts/gen_backends.py
Normal file
125
src/gallium/drivers/swr/rasterizer/scripts/gen_backends.py
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
# Copyright (C) 2017 Intel Corporation. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
# Python source
|
||||
# Compatible with Python2.X and Python3.X
|
||||
|
||||
from __future__ import print_function
|
||||
import itertools
|
||||
import math
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
from mako.template import Template
|
||||
from mako.exceptions import RichTraceback
|
||||
|
||||
def write_template_to_string(template_filename, **kwargs):
|
||||
try:
|
||||
template = Template(filename=os.path.abspath(template_filename))
|
||||
# Split + Join fixes line-endings for whatever platform you are using
|
||||
return '\n'.join(template.render(**kwargs).splitlines())
|
||||
except:
|
||||
traceback = RichTraceback()
|
||||
for (filename, lineno, function, line) in traceback.traceback:
|
||||
print("File %s, line %s, in %s" % (filename, lineno, function))
|
||||
print(line, "\n")
|
||||
print("%s: %s" % (str(traceback.error.__class__.__name__), traceback.error))
|
||||
|
||||
def write_template_to_file(template_filename, output_filename, **kwargs):
|
||||
output_dirname = os.path.dirname(output_filename)
|
||||
if not os.path.exists(output_dirname):
|
||||
os.makedirs(output_dirname)
|
||||
with open(output_filename, "w") as outfile:
|
||||
print(write_template_to_string(template_filename, **kwargs), file=outfile)
|
||||
|
||||
|
||||
def main(args=sys.argv[1:]):
|
||||
thisDir = os.path.dirname(os.path.realpath(__file__))
|
||||
parser = argparse.ArgumentParser("Generate files and initialization functions for all permutuations of BackendPixelRate.")
|
||||
parser.add_argument('--dim', help="gBackendPixelRateTable array dimensions", nargs='+', type=int, required=True)
|
||||
parser.add_argument('--outdir', help="output directory", nargs='?', type=str, default=thisDir)
|
||||
parser.add_argument('--split', help="how many lines of initialization per file [0=no split]", nargs='?', type=int, default='512')
|
||||
parser.add_argument('--cpp', help="Generate cpp file(s)", action='store_true', default=False)
|
||||
parser.add_argument('--cmake', help="Generate cmake file", action='store_true', default=False)
|
||||
|
||||
|
||||
args = parser.parse_args(args);
|
||||
|
||||
output_list = []
|
||||
for x in args.dim:
|
||||
output_list.append(list(range(x)))
|
||||
|
||||
# generate all permutations possible for template paremeter inputs
|
||||
output_combinations = list(itertools.product(*output_list))
|
||||
output_list = []
|
||||
|
||||
# for each permutation
|
||||
for x in range(len(output_combinations)):
|
||||
# separate each template peram into its own list member
|
||||
new_list = [output_combinations[x][i] for i in range(len(output_combinations[x]))]
|
||||
tempStr = 'gBackendPixelRateTable'
|
||||
#print each list member as an index in the multidimensional array
|
||||
for i in new_list:
|
||||
tempStr += '[' + str(i) + ']'
|
||||
#map each entry in the permuation as its own string member, store as the template instantiation string
|
||||
tempStr += " = BackendPixelRate<SwrBackendTraits<" + ','.join(map(str, output_combinations[x])) + '>>;'
|
||||
#append the line of c++ code in the list of output lines
|
||||
output_list.append(tempStr)
|
||||
|
||||
# how many files should we split the global template initialization into?
|
||||
if (args.split == 0):
|
||||
numFiles = 1
|
||||
else:
|
||||
numFiles = (len(output_list) + args.split - 1) // args.split
|
||||
linesPerFile = (len(output_list) + numFiles - 1) // numFiles
|
||||
chunkedList = [output_list[x:x+linesPerFile] for x in range(0, len(output_list), linesPerFile)]
|
||||
|
||||
# generate .cpp files
|
||||
if args.cpp:
|
||||
baseCppName = os.path.join(args.outdir, 'BackendPixelRate%s.cpp')
|
||||
templateCpp = os.path.join(thisDir, 'templates', 'backend_template.cpp')
|
||||
|
||||
for fileNum in range(numFiles):
|
||||
filename = baseCppName % str(fileNum)
|
||||
print('Generating', filename)
|
||||
write_template_to_file(
|
||||
templateCpp,
|
||||
baseCppName % str(fileNum),
|
||||
fileNum=fileNum,
|
||||
funcList=chunkedList[fileNum])
|
||||
|
||||
# generate gen_backend.cmake file
|
||||
if args.cmake:
|
||||
templateCmake = os.path.join(thisDir, 'templates', 'backend_template.cmake')
|
||||
cmakeFile = os.path.join(args.outdir, 'gen_backends.cmake')
|
||||
print('Generating', cmakeFile)
|
||||
write_template_to_file(
|
||||
templateCmake,
|
||||
cmakeFile,
|
||||
numFiles=numFiles,
|
||||
baseCppName=baseCppName.replace('\\','/'))
|
||||
|
||||
print("Generated %d template instantiations in %d files" % (len(output_list), numFiles))
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file BackendPixelRate${fileNum}.cpp
|
||||
*
|
||||
* @brief auto-generated file
|
||||
*
|
||||
* DO NOT EDIT
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#include "core/backend.h"
|
||||
|
||||
void InitBackendPixelRate${fileNum}()
|
||||
{
|
||||
%for func in funcList:
|
||||
${func}
|
||||
%endfor
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue