mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 16:08:04 +02:00
swr: [rasterizer core] conservative rasterization frontend support
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
c867c22d85
commit
c7cd33b605
10 changed files with 325 additions and 63 deletions
|
|
@ -67,6 +67,7 @@ CORE_CXX_SOURCES := \
|
|||
rasterizer/core/blend.h \
|
||||
rasterizer/core/clip.cpp \
|
||||
rasterizer/core/clip.h \
|
||||
rasterizer/core/conservativeRast.h \
|
||||
rasterizer/core/context.h \
|
||||
rasterizer/core/depthstencil.h \
|
||||
rasterizer/core/fifo.hpp \
|
||||
|
|
|
|||
|
|
@ -780,10 +780,7 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
|
|||
const bool bMultisampleEnable = ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || rastState.forcedSampleCount) ? 1 : 0;
|
||||
const uint32_t centroid = ((psState.barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0) ? 1 : 0;
|
||||
const uint32_t canEarlyZ = (psState.forceEarlyZ || (!psState.writesODepth && !psState.usesSourceDepth && !psState.usesUAV)) ? 1 : 0;
|
||||
|
||||
// currently only support 'normal' input coverage
|
||||
SWR_ASSERT(psState.inputCoverage == SWR_INPUT_COVERAGE_NORMAL ||
|
||||
psState.inputCoverage == SWR_INPUT_COVERAGE_NONE);
|
||||
const uint32_t inputCoverage = (psState.inputCoverage != SWR_INPUT_COVERAGE_NONE);
|
||||
|
||||
SWR_BARYCENTRICS_MASK barycentricsMask = (SWR_BARYCENTRICS_MASK)psState.barycentricsMask;
|
||||
|
||||
|
|
@ -795,20 +792,20 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
|
|||
{
|
||||
// always need to generate I & J per sample for Z interpolation
|
||||
barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
|
||||
backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.samplePattern][psState.inputCoverage][centroid][forcedSampleCount][canEarlyZ];
|
||||
backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.samplePattern][inputCoverage][centroid][forcedSampleCount][canEarlyZ];
|
||||
}
|
||||
else
|
||||
{
|
||||
// always need to generate I & J per pixel for Z interpolation
|
||||
barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_PIXEL_MASK);
|
||||
backendFuncs.pfnBackend = gBackendSingleSample[psState.inputCoverage][centroid][canEarlyZ];
|
||||
backendFuncs.pfnBackend = gBackendSingleSample[inputCoverage][centroid][canEarlyZ];
|
||||
}
|
||||
break;
|
||||
case SWR_SHADING_RATE_SAMPLE:
|
||||
SWR_ASSERT(rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN);
|
||||
// always need to generate I & J per sample for Z interpolation
|
||||
barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
|
||||
backendFuncs.pfnBackend = gBackendSampleRateTable[rastState.sampleCount][psState.inputCoverage][centroid][canEarlyZ];
|
||||
backendFuncs.pfnBackend = gBackendSampleRateTable[rastState.sampleCount][inputCoverage][centroid][canEarlyZ];
|
||||
break;
|
||||
default:
|
||||
SWR_ASSERT(0 && "Invalid shading rate");
|
||||
|
|
@ -833,7 +830,7 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
|
|||
break;
|
||||
default:
|
||||
pState->pfnProcessPrims = ClipTriangles;
|
||||
pfnBinner = BinTriangles;
|
||||
pfnBinner = GetBinTrianglesFunc((rastState.conservativeRast > 0));
|
||||
break;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -385,7 +385,7 @@ public:
|
|||
PRIMITIVE_TOPOLOGY clipTopology = TOP_UNKNOWN;
|
||||
if (NumVertsPerPrim == 3)
|
||||
{
|
||||
pfnBinFunc = BinTriangles;
|
||||
pfnBinFunc = GetBinTrianglesFunc((pa.pDC->pState->state.rastState.conservativeRast > 0));
|
||||
clipTopology = TOP_TRIANGLE_FAN;
|
||||
|
||||
// so that the binner knows to bloat wide points later
|
||||
|
|
@ -519,7 +519,7 @@ public:
|
|||
pfnBinner = BinLines;
|
||||
break;
|
||||
default:
|
||||
pfnBinner = BinTriangles;
|
||||
pfnBinner = GetBinTrianglesFunc((pa.pDC->pState->state.rastState.conservativeRast > 0));
|
||||
break;
|
||||
};
|
||||
|
||||
|
|
|
|||
120
src/gallium/drivers/swr/rasterizer/core/conservativeRast.h
Normal file
120
src/gallium/drivers/swr/rasterizer/core/conservativeRast.h
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file conservativerast.h
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
#include <type_traits>
|
||||
#include "common/simdintrin.h"
|
||||
|
||||
enum FixedPointFmt
|
||||
{
|
||||
FP_UNINIT,
|
||||
_16_8,
|
||||
_16_9
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief convenience typedefs for supported Fixed Point precisions
|
||||
typedef std::integral_constant<uint32_t, FP_UNINIT> Fixed_Uninit;
|
||||
typedef std::integral_constant<uint32_t, _16_8> Fixed_16_8;
|
||||
typedef std::integral_constant<uint32_t, _16_9> Fixed_16_9;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @struct FixedPointTraits
|
||||
/// @brief holds constants relating to converting between FP and Fixed point
|
||||
/// @tparam FT: fixed precision type
|
||||
template<typename FT>
|
||||
struct FixedPointTraits{};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Fixed_16_8 specialization of FixedPointTraits
|
||||
template<>
|
||||
struct FixedPointTraits<Fixed_16_8>
|
||||
{
|
||||
/// multiplier to go from FP32 to Fixed Point 16.8
|
||||
typedef std::integral_constant<uint32_t, 256> FixedPointScaleT;
|
||||
/// number of bits to shift to go from 16.8 fixed => int32
|
||||
typedef std::integral_constant<uint32_t, 8> FixedPointShiftT;
|
||||
typedef Fixed_16_8 TypeT;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Fixed_16_9 specialization of FixedPointTraits
|
||||
template<>
|
||||
struct FixedPointTraits<Fixed_16_9>
|
||||
{
|
||||
/// multiplier to go from FP32 to Fixed Point 16.9
|
||||
typedef std::integral_constant<uint32_t, 512> FixedPointScaleT;
|
||||
/// number of bits to shift to go from 16.9 fixed => int32
|
||||
typedef std::integral_constant<uint32_t, 9> FixedPointShiftT;
|
||||
typedef Fixed_16_9 TypeT;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief convenience typedefs for conservative rasterization modes
|
||||
typedef std::false_type StandardRastT;
|
||||
typedef std::true_type ConservativeRastT;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief convenience typedefs for Input Coverage rasterization modes
|
||||
typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_NONE> NoInputCoverageT;
|
||||
typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_NORMAL> OuterConservativeCoverageT;
|
||||
typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_INNER_CONSERVATIVE> InnerConservativeCoverageT;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @struct ConservativeRastTraits
|
||||
/// @brief primary ConservativeRastTraits template. Shouldn't be instantiated
|
||||
/// @tparam ConservativeT: type of conservative rasterization
|
||||
template <typename ConservativeT>
|
||||
struct ConservativeRastFETraits {};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief StandardRast specialization of ConservativeRastTraits
|
||||
template <>
|
||||
struct ConservativeRastFETraits<StandardRastT>
|
||||
{
|
||||
typedef std::false_type IsConservativeT;
|
||||
typedef FixedPointTraits<Fixed_16_8> BBoxPrecisionT;
|
||||
typedef FixedPointTraits<Fixed_16_8> ZeroAreaPrecisionT;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief ConservativeRastT specialization of ConservativeRastTraits
|
||||
template <>
|
||||
struct ConservativeRastFETraits<ConservativeRastT>
|
||||
{
|
||||
typedef std::true_type IsConservativeT;
|
||||
typedef FixedPointTraits<Fixed_16_8> ZeroAreaPrecisionT;
|
||||
|
||||
/// Conservative bounding box needs to expand the area around each vertex by 1/512, which
|
||||
/// is the potential snapping error when going from FP-> 16.8 fixed
|
||||
typedef FixedPointTraits<Fixed_16_9> BBoxPrecisionT;
|
||||
typedef std::integral_constant<uint32_t, 1> BoundingBoxOffsetT;
|
||||
typedef std::integral_constant<uint32_t, 1> BoundingBoxShiftT;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief convenience typedefs for ConservativeRastFETraits
|
||||
typedef ConservativeRastFETraits<StandardRastT> FEStandardRastT;
|
||||
typedef ConservativeRastFETraits<ConservativeRastT> FEConservativeRastT;
|
||||
|
|
@ -83,6 +83,8 @@ struct SWR_TRIANGLE_DESC
|
|||
float *pUserClipBuffer;
|
||||
|
||||
uint64_t coverageMask[SWR_MAX_NUM_MULTISAMPLES];
|
||||
uint64_t conservativeCoverageMask;
|
||||
uint64_t innerConservativeCoverageMask;
|
||||
uint64_t anyCoveredSamples;
|
||||
|
||||
TRI_FLAGS triFlags;
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@
|
|||
#include "context.h"
|
||||
#include "rdtsc_core.h"
|
||||
#include "rasterizer.h"
|
||||
#include "conservativeRast.h"
|
||||
#include "utils.h"
|
||||
#include "threads.h"
|
||||
#include "pa.h"
|
||||
|
|
@ -1589,6 +1590,132 @@ void ProcessUserClipDist(PA_STATE& pa, uint32_t primIndex, uint8_t clipDistMask,
|
|||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Convert the X,Y coords of a triangle to the requested Fixed
|
||||
/// Point precision from FP32.
|
||||
template <typename PT = FixedPointTraits<Fixed_16_8>>
|
||||
INLINE simdscalari fpToFixedPointVertical(const simdscalar vIn)
|
||||
{
|
||||
simdscalar vFixed = _simd_mul_ps(vIn, _simd_set1_ps(PT::FixedPointScaleT::value));
|
||||
return _simd_cvtps_epi32(vFixed);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Helper function to set the X,Y coords of a triangle to the
|
||||
/// requested Fixed Point precision from FP32. If the RequestedT
|
||||
/// FixedPointTraits precision is the same as the CurrentT, no extra
|
||||
/// conversions will be done. If they are different, convert from FP32
|
||||
/// to the Requested precision and set vXi, vYi
|
||||
/// @tparam RequestedT: requested FixedPointTraits type
|
||||
/// @tparam CurrentT: FixedPointTraits type of the last
|
||||
template<typename RequestedT, typename CurrentT = FixedPointTraits<Fixed_Uninit>>
|
||||
struct FPToFixedPoint
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @param tri: simdvector[3] of FP triangle verts
|
||||
/// @param vXi: fixed point X coords of tri verts
|
||||
/// @param vYi: fixed point Y coords of tri verts
|
||||
INLINE static void Set(const simdvector * const tri, simdscalari (&vXi)[3], simdscalari (&vYi)[3])
|
||||
{
|
||||
vXi[0] = fpToFixedPointVertical<RequestedT>(tri[0].x);
|
||||
vYi[0] = fpToFixedPointVertical<RequestedT>(tri[0].y);
|
||||
vXi[1] = fpToFixedPointVertical<RequestedT>(tri[1].x);
|
||||
vYi[1] = fpToFixedPointVertical<RequestedT>(tri[1].y);
|
||||
vXi[2] = fpToFixedPointVertical<RequestedT>(tri[2].x);
|
||||
vYi[2] = fpToFixedPointVertical<RequestedT>(tri[2].y);
|
||||
};
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief In the case where the RequestedT and CurrentT fixed point
|
||||
/// precisions are the same, do nothing.
|
||||
template<typename RequestedT>
|
||||
struct FPToFixedPoint<RequestedT, RequestedT>
|
||||
{
|
||||
INLINE static void Set(const simdvector * const tri, simdscalari (&vXi)[3], simdscalari (&vYi)[3]){};
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Calculate bounding box for current triangle
|
||||
/// @tparam CT: ConservativeRastFETraits type
|
||||
/// @param vX: fixed point X position for triangle verts
|
||||
/// @param vY: fixed point Y position for triangle verts
|
||||
/// @param bbox: fixed point bbox
|
||||
/// *Note*: expects vX, vY to be in the correct precision for the type
|
||||
/// of rasterization. This avoids unnecessary FP->fixed conversions.
|
||||
template <typename CT>
|
||||
INLINE void calcBoundingBoxIntVertical(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox){}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief FEStandardRastT specialization of calcBoundingBoxIntVertical
|
||||
template <>
|
||||
INLINE void calcBoundingBoxIntVertical<FEStandardRastT>(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox)
|
||||
{
|
||||
// FE conservative rast traits
|
||||
typedef FEStandardRastT CT;
|
||||
|
||||
static_assert(std::is_same<CT::BBoxPrecisionT, FixedPointTraits<Fixed_16_8>>::value, "Standard rast BBox calculation needs to be in 16.8 precision");
|
||||
// Update vXi, vYi fixed point precision for BBox calculation if necessary
|
||||
FPToFixedPoint<CT::BBoxPrecisionT, CT::ZeroAreaPrecisionT>::Set(tri, vX, vY);
|
||||
|
||||
simdscalari vMinX = vX[0];
|
||||
vMinX = _simd_min_epi32(vMinX, vX[1]);
|
||||
vMinX = _simd_min_epi32(vMinX, vX[2]);
|
||||
|
||||
simdscalari vMaxX = vX[0];
|
||||
vMaxX = _simd_max_epi32(vMaxX, vX[1]);
|
||||
vMaxX = _simd_max_epi32(vMaxX, vX[2]);
|
||||
|
||||
simdscalari vMinY = vY[0];
|
||||
vMinY = _simd_min_epi32(vMinY, vY[1]);
|
||||
vMinY = _simd_min_epi32(vMinY, vY[2]);
|
||||
|
||||
simdscalari vMaxY = vY[0];
|
||||
vMaxY = _simd_max_epi32(vMaxY, vY[1]);
|
||||
vMaxY = _simd_max_epi32(vMaxY, vY[2]);
|
||||
|
||||
bbox.left = vMinX;
|
||||
bbox.right = vMaxX;
|
||||
bbox.top = vMinY;
|
||||
bbox.bottom = vMaxY;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief FEConservativeRastT specialization of calcBoundingBoxIntVertical
|
||||
/// Offsets BBox for conservative rast
|
||||
template <>
|
||||
INLINE void calcBoundingBoxIntVertical<FEConservativeRastT>(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox)
|
||||
{
|
||||
// FE conservative rast traits
|
||||
typedef FEConservativeRastT CT;
|
||||
|
||||
static_assert(std::is_same<CT::BBoxPrecisionT, FixedPointTraits<Fixed_16_9>>::value, "Conservative rast BBox calculation needs to be in 16.9 precision");
|
||||
// Update vXi, vYi fixed point precision for BBox calculation if necessary
|
||||
FPToFixedPoint<CT::BBoxPrecisionT, CT::ZeroAreaPrecisionT>::Set(tri, vX, vY);
|
||||
|
||||
simdscalari vMinX = vX[0];
|
||||
vMinX = _simd_min_epi32(vMinX, vX[1]);
|
||||
vMinX = _simd_min_epi32(vMinX, vX[2]);
|
||||
|
||||
simdscalari vMaxX = vX[0];
|
||||
vMaxX = _simd_max_epi32(vMaxX, vX[1]);
|
||||
vMaxX = _simd_max_epi32(vMaxX, vX[2]);
|
||||
|
||||
simdscalari vMinY = vY[0];
|
||||
vMinY = _simd_min_epi32(vMinY, vY[1]);
|
||||
vMinY = _simd_min_epi32(vMinY, vY[2]);
|
||||
|
||||
simdscalari vMaxY = vY[0];
|
||||
vMaxY = _simd_max_epi32(vMaxY, vY[1]);
|
||||
vMaxY = _simd_max_epi32(vMaxY, vY[2]);
|
||||
|
||||
/// Bounding box needs to be expanded by 1/512 before snapping to 16.8 for conservative rasterization
|
||||
bbox.left = _simd_srli_epi32(_simd_sub_epi32(vMinX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
|
||||
bbox.right = _simd_srli_epi32(_simd_add_epi32(vMaxX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
|
||||
bbox.top = _simd_srli_epi32(_simd_sub_epi32(vMinY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
|
||||
bbox.bottom = _simd_srli_epi32(_simd_add_epi32(vMaxY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Bin triangle primitives to macro tiles. Performs setup, clipping
|
||||
/// culling, viewport transform, etc.
|
||||
|
|
@ -1597,6 +1724,8 @@ void ProcessUserClipDist(PA_STATE& pa, uint32_t primIndex, uint8_t clipDistMask,
|
|||
/// @param workerId - thread's worker id. Even thread has a unique id.
|
||||
/// @param tri - Contains triangle position data for SIMDs worth of triangles.
|
||||
/// @param primID - Primitive ID for each triangle.
|
||||
/// @tparam CT - ConservativeRastFETraits
|
||||
template <typename CT>
|
||||
void BinTriangles(
|
||||
DRAW_CONTEXT *pDC,
|
||||
PA_STATE& pa,
|
||||
|
|
@ -1652,14 +1781,9 @@ void BinTriangles(
|
|||
tri[2].x = _simd_add_ps(tri[2].x, offset);
|
||||
tri[2].y = _simd_add_ps(tri[2].y, offset);
|
||||
|
||||
// convert to fixed point
|
||||
simdscalari vXi[3], vYi[3];
|
||||
vXi[0] = fpToFixedPointVertical(tri[0].x);
|
||||
vYi[0] = fpToFixedPointVertical(tri[0].y);
|
||||
vXi[1] = fpToFixedPointVertical(tri[1].x);
|
||||
vYi[1] = fpToFixedPointVertical(tri[1].y);
|
||||
vXi[2] = fpToFixedPointVertical(tri[2].x);
|
||||
vYi[2] = fpToFixedPointVertical(tri[2].y);
|
||||
// Set vXi, vYi to fixed point precision required for degenerate triangle check
|
||||
FPToFixedPoint<typename CT::ZeroAreaPrecisionT>::Set(tri, vXi, vYi);
|
||||
|
||||
// triangle setup
|
||||
simdscalari vAi[3], vBi[3];
|
||||
|
|
@ -1669,6 +1793,8 @@ void BinTriangles(
|
|||
simdscalari vDet[2];
|
||||
calcDeterminantIntVertical(vAi, vBi, vDet);
|
||||
|
||||
/// todo: handle degen tri's for Conservative Rast.
|
||||
|
||||
// cull zero area
|
||||
int maskLo = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpeq_epi64(vDet[0], _simd_setzero_si())));
|
||||
int maskHi = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpeq_epi64(vDet[1], _simd_setzero_si())));
|
||||
|
|
@ -1713,6 +1839,7 @@ void BinTriangles(
|
|||
RDTSC_EVENT(FECullZeroAreaAndBackface, _mm_popcnt_u32(origTriMask ^ triMask), 0);
|
||||
}
|
||||
|
||||
/// Note: these variable initializations must stay above any 'goto endBenTriangles'
|
||||
// compute per tri backface
|
||||
uint32_t frontFaceMask = frontWindingTris;
|
||||
|
||||
|
|
@ -1726,14 +1853,13 @@ void BinTriangles(
|
|||
|
||||
// Calc bounding box of triangles
|
||||
simdBBox bbox;
|
||||
calcBoundingBoxIntVertical(vXi, vYi, bbox);
|
||||
calcBoundingBoxIntVertical<CT>(tri, vXi, vYi, bbox);
|
||||
|
||||
// determine if triangle falls between pixel centers and discard
|
||||
// only discard for non-MSAA case
|
||||
// only discard for non-MSAA case and when conservative rast is disabled
|
||||
// (left + 127) & ~255
|
||||
// (right + 128) & ~255
|
||||
|
||||
if(rastState.sampleCount == SWR_MULTISAMPLE_1X)
|
||||
if(rastState.sampleCount == SWR_MULTISAMPLE_1X && (!CT::IsConservativeT::value))
|
||||
{
|
||||
origTriMask = triMask;
|
||||
|
||||
|
|
@ -1891,7 +2017,22 @@ endBinTriangles:
|
|||
RDTSC_STOP(FEBinTriangles, 1, 0);
|
||||
}
|
||||
|
||||
struct FEBinTrianglesChooser
|
||||
{
|
||||
typedef PFN_PROCESS_PRIMS FuncType;
|
||||
|
||||
template <typename... ArgsB>
|
||||
static FuncType GetFunc()
|
||||
{
|
||||
return BinTriangles<ConservativeRastFETraits<ArgsB...>>;
|
||||
}
|
||||
};
|
||||
|
||||
// Selector for correct templated Draw front-end function
|
||||
PFN_PROCESS_PRIMS GetBinTrianglesFunc(bool IsConservative)
|
||||
{
|
||||
return TemplateArgUnroller<FEBinTrianglesChooser>::GetFunc(IsConservative);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Bin SIMD points to the backend. Only supports point size of 1
|
||||
|
|
|
|||
|
|
@ -30,21 +30,6 @@
|
|||
#include "context.h"
|
||||
#include <type_traits>
|
||||
|
||||
INLINE
|
||||
__m128i fpToFixedPoint(const __m128 vIn)
|
||||
{
|
||||
__m128 vFixed = _mm_mul_ps(vIn, _mm_set1_ps(FIXED_POINT_SCALE));
|
||||
return _mm_cvtps_epi32(vFixed);
|
||||
}
|
||||
|
||||
INLINE
|
||||
simdscalari fpToFixedPointVertical(const simdscalar vIn)
|
||||
{
|
||||
simdscalar vFixed = _simd_mul_ps(vIn, _simd_set1_ps(FIXED_POINT_SCALE));
|
||||
return _simd_cvtps_epi32(vFixed);
|
||||
}
|
||||
|
||||
|
||||
// Calculates the A and B coefficients for the 3 edges of the triangle
|
||||
//
|
||||
// maths for edge equations:
|
||||
|
|
@ -271,31 +256,6 @@ A = _mm_shuffle_ps(A, B, 1 0 1 0)
|
|||
|
||||
}
|
||||
|
||||
INLINE
|
||||
void calcBoundingBoxIntVertical(const simdscalari (&vX)[3], const simdscalari (&vY)[3], simdBBox &bbox)
|
||||
{
|
||||
simdscalari vMinX = vX[0];
|
||||
vMinX = _simd_min_epi32(vMinX, vX[1]);
|
||||
vMinX = _simd_min_epi32(vMinX, vX[2]);
|
||||
|
||||
simdscalari vMaxX = vX[0];
|
||||
vMaxX = _simd_max_epi32(vMaxX, vX[1]);
|
||||
vMaxX = _simd_max_epi32(vMaxX, vX[2]);
|
||||
|
||||
simdscalari vMinY = vY[0];
|
||||
vMinY = _simd_min_epi32(vMinY, vY[1]);
|
||||
vMinY = _simd_min_epi32(vMinY, vY[2]);
|
||||
|
||||
simdscalari vMaxY = vY[0];
|
||||
vMaxY = _simd_max_epi32(vMaxY, vY[1]);
|
||||
vMaxY = _simd_max_epi32(vMaxY, vY[2]);
|
||||
|
||||
bbox.left = vMinX;
|
||||
bbox.right = vMaxX;
|
||||
bbox.top = vMinY;
|
||||
bbox.bottom = vMaxY;
|
||||
}
|
||||
|
||||
INLINE
|
||||
bool CanUseSimplePoints(DRAW_CONTEXT *pDC)
|
||||
{
|
||||
|
|
@ -334,8 +294,9 @@ void ProcessDiscardInvalidateTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uin
|
|||
void ProcessSync(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
|
||||
void ProcessQueryStats(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
|
||||
|
||||
PFN_PROCESS_PRIMS GetBinTrianglesFunc(bool IsConservative);
|
||||
|
||||
struct PA_STATE_BASE; // forward decl
|
||||
void BinTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector tri[3], uint32_t primMask, simdscalari primID);
|
||||
void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID);
|
||||
void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID);
|
||||
|
||||
|
|
|
|||
|
|
@ -28,8 +28,16 @@
|
|||
#pragma once
|
||||
|
||||
#include "context.h"
|
||||
#include <type_traits>
|
||||
|
||||
extern PFN_WORK_FUNC gRasterizerTable[2][SWR_MULTISAMPLE_TYPE_MAX];
|
||||
void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
|
||||
void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
|
||||
void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
|
||||
|
||||
INLINE
|
||||
__m128i fpToFixedPoint(const __m128 vIn)
|
||||
{
|
||||
__m128 vFixed = _mm_mul_ps(vIn, _mm_set1_ps(FIXED_POINT_SCALE));
|
||||
return _mm_cvtps_epi32(vFixed);
|
||||
}
|
||||
|
|
@ -909,6 +909,7 @@ struct SWR_RASTSTATE
|
|||
uint32_t forcedSampleCount : 1;
|
||||
uint32_t pixelOffset : 1;
|
||||
uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units
|
||||
uint32_t conservativeRast : 1;
|
||||
|
||||
float pointSize;
|
||||
float lineWidth;
|
||||
|
|
@ -989,6 +990,7 @@ enum SWR_INPUT_COVERAGE
|
|||
{
|
||||
SWR_INPUT_COVERAGE_NONE,
|
||||
SWR_INPUT_COVERAGE_NORMAL,
|
||||
SWR_INPUT_COVERAGE_INNER_CONSERVATIVE,
|
||||
SWR_INPUT_COVERAGE_MAX,
|
||||
};
|
||||
|
||||
|
|
@ -1016,7 +1018,7 @@ struct SWR_PS_STATE
|
|||
|
||||
// dword 2
|
||||
uint32_t killsPixel : 1; // pixel shader can kill pixels
|
||||
uint32_t inputCoverage : 1; // type of input coverage PS uses
|
||||
uint32_t inputCoverage : 1; // ps uses input coverage
|
||||
uint32_t writesODepth : 1; // pixel shader writes to depth
|
||||
uint32_t usesSourceDepth : 1; // pixel shader reads depth
|
||||
uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel
|
||||
|
|
|
|||
|
|
@ -849,6 +849,36 @@ struct TemplateArgUnroller
|
|||
|
||||
return TemplateArgUnroller<TermT, ArgsB..., std::false_type>::GetFunc(remainingArgs...);
|
||||
}
|
||||
|
||||
// Last Arg Terminator
|
||||
template <typename... TArgsT>
|
||||
static typename TermT::FuncType GetFunc(uint32_t iArg)
|
||||
{
|
||||
switch(iArg)
|
||||
{
|
||||
case 0: return TermT::template GetFunc<ArgsB..., std::integral_constant<uint32_t, 0>>();
|
||||
case 1: return TermT::template GetFunc<ArgsB..., std::integral_constant<uint32_t, 1>>();
|
||||
case 2: return TermT::template GetFunc<ArgsB..., std::integral_constant<uint32_t, 2>>();
|
||||
case 3: return TermT::template GetFunc<ArgsB..., std::integral_constant<uint32_t, 3>>();
|
||||
case 4: return TermT::template GetFunc<ArgsB..., std::integral_constant<uint32_t, 4>>();
|
||||
default: SWR_ASSUME(false); return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively parse args
|
||||
template <typename... TArgsT>
|
||||
static typename TermT::FuncType GetFunc(uint32_t iArg, TArgsT... remainingArgs)
|
||||
{
|
||||
switch(iArg)
|
||||
{
|
||||
case 0: return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<uint32_t, 0>>::GetFunc(remainingArgs...);
|
||||
case 1: return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<uint32_t, 1>>::GetFunc(remainingArgs...);
|
||||
case 2: return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<uint32_t, 2>>::GetFunc(remainingArgs...);
|
||||
case 3: return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<uint32_t, 3>>::GetFunc(remainingArgs...);
|
||||
case 4: return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<uint32_t, 4>>::GetFunc(remainingArgs...);
|
||||
default: SWR_ASSUME(false); return nullptr;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue