mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 06:50:11 +01:00
swr: [rasterizer core/common/jitter] gl_double support
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99214 Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
b6670157d7
commit
33fa4c99f7
9 changed files with 341 additions and 33 deletions
|
|
@ -149,16 +149,26 @@ const SWR_FORMAT_INFO gFormatInfo[] = {
|
||||||
{ 0.0f, 0.0f, 0.0f, 0.0f },
|
{ 0.0f, 0.0f, 0.0f, 0.0f },
|
||||||
1, 1
|
1, 1
|
||||||
},
|
},
|
||||||
// padding (0x5)
|
// R64G64_FLOAT (0x5)
|
||||||
{
|
{
|
||||||
nullptr,
|
"R64G64_FLOAT",
|
||||||
{ SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
|
{ SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
|
||||||
{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
|
{ 0, 0, 0, 0x3f800000 }, // Defaults for missing components
|
||||||
0, 0, 0, false, false, false, false,
|
{ 0, 1, 0, 0 }, // Swizzle
|
||||||
{ false, false, false, false },
|
{ 64, 64, 0, 0 }, // Bits per component
|
||||||
{ 0.0f, 0.0f, 0.0f, 0.0f },
|
128, // Bits per element
|
||||||
1, 1
|
16, // Bytes per element
|
||||||
|
2, // Num components
|
||||||
|
false, // isSRGB
|
||||||
|
false, // isBC
|
||||||
|
false, // isSubsampled
|
||||||
|
false, // isLuminance
|
||||||
|
{ false, false, false, false }, // Is normalized?
|
||||||
|
{ 1.0f, 1.0f, 0, 0 }, // To float scale factor
|
||||||
|
1, // bcWidth
|
||||||
|
1, // bcHeight
|
||||||
},
|
},
|
||||||
|
|
||||||
// R32G32B32X32_FLOAT (0x6)
|
// R32G32B32X32_FLOAT (0x6)
|
||||||
{
|
{
|
||||||
"R32G32B32X32_FLOAT",
|
"R32G32B32X32_FLOAT",
|
||||||
|
|
@ -1719,16 +1729,26 @@ const SWR_FORMAT_INFO gFormatInfo[] = {
|
||||||
{ 0.0f, 0.0f, 0.0f, 0.0f },
|
{ 0.0f, 0.0f, 0.0f, 0.0f },
|
||||||
1, 1
|
1, 1
|
||||||
},
|
},
|
||||||
// padding (0x8D)
|
// R64_FLOAT (0x8D)
|
||||||
{
|
{
|
||||||
nullptr,
|
"R64_FLOAT",
|
||||||
{ SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
|
{ SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
|
||||||
{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
|
{ 0, 0, 0, 0x3f800000 }, // Defaults for missing components
|
||||||
0, 0, 0, false, false, false, false,
|
{ 0, 0, 0, 0 }, // Swizzle
|
||||||
{ false, false, false, false },
|
{ 64, 0, 0, 0 }, // Bits per component
|
||||||
{ 0.0f, 0.0f, 0.0f, 0.0f },
|
64, // Bits per element
|
||||||
1, 1
|
8, // Bytes per element
|
||||||
|
1, // Num components
|
||||||
|
false, // isSRGB
|
||||||
|
false, // isBC
|
||||||
|
false, // isSubsampled
|
||||||
|
false, // isLuminance
|
||||||
|
{ false, false, false, false }, // Is normalized?
|
||||||
|
{ 1.0f, 0, 0, 0 }, // To float scale factor
|
||||||
|
1, // bcWidth
|
||||||
|
1, // bcHeight
|
||||||
},
|
},
|
||||||
|
|
||||||
// R16G16B16X16_UNORM (0x8E)
|
// R16G16B16X16_UNORM (0x8E)
|
||||||
{
|
{
|
||||||
"R16G16B16X16_UNORM",
|
"R16G16B16X16_UNORM",
|
||||||
|
|
@ -5529,26 +5549,46 @@ const SWR_FORMAT_INFO gFormatInfo[] = {
|
||||||
1, // bcHeight
|
1, // bcHeight
|
||||||
},
|
},
|
||||||
|
|
||||||
// padding (0x197)
|
// R64G64B64A64_FLOAT (0x197)
|
||||||
{
|
{
|
||||||
nullptr,
|
"R64G64B64A64_FLOAT",
|
||||||
{ SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
|
{ SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT },
|
||||||
{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
|
{ 0, 0, 0, 0x3f800000 }, // Defaults for missing components
|
||||||
0, 0, 0, false, false, false, false,
|
{ 0, 1, 2, 3 }, // Swizzle
|
||||||
{ false, false, false, false },
|
{ 64, 64, 64, 64 }, // Bits per component
|
||||||
{ 0.0f, 0.0f, 0.0f, 0.0f },
|
256, // Bits per element
|
||||||
1, 1
|
32, // Bytes per element
|
||||||
|
4, // Num components
|
||||||
|
false, // isSRGB
|
||||||
|
false, // isBC
|
||||||
|
false, // isSubsampled
|
||||||
|
false, // isLuminance
|
||||||
|
{ false, false, false, false }, // Is normalized?
|
||||||
|
{ 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
|
||||||
|
1, // bcWidth
|
||||||
|
1, // bcHeight
|
||||||
},
|
},
|
||||||
// padding (0x198)
|
|
||||||
|
// R64G64B64_FLOAT (0x198)
|
||||||
{
|
{
|
||||||
nullptr,
|
"R64G64B64_FLOAT",
|
||||||
{ SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
|
{ SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN },
|
||||||
{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
|
{ 0, 0, 0, 0x3f800000 }, // Defaults for missing components
|
||||||
0, 0, 0, false, false, false, false,
|
{ 0, 1, 2, 0 }, // Swizzle
|
||||||
{ false, false, false, false },
|
{ 64, 64, 64, 0 }, // Bits per component
|
||||||
{ 0.0f, 0.0f, 0.0f, 0.0f },
|
192, // Bits per element
|
||||||
1, 1
|
24, // Bytes per element
|
||||||
|
3, // Num components
|
||||||
|
false, // isSRGB
|
||||||
|
false, // isBC
|
||||||
|
false, // isSubsampled
|
||||||
|
false, // isLuminance
|
||||||
|
{ false, false, false, false }, // Is normalized?
|
||||||
|
{ 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
|
||||||
|
1, // bcWidth
|
||||||
|
1, // bcHeight
|
||||||
},
|
},
|
||||||
|
|
||||||
// BC4_SNORM (0x199)
|
// BC4_SNORM (0x199)
|
||||||
{
|
{
|
||||||
"BC4_SNORM",
|
"BC4_SNORM",
|
||||||
|
|
|
||||||
|
|
@ -57,6 +57,7 @@ enum SWR_FORMAT
|
||||||
R32G32B32A32_FLOAT = 0x0,
|
R32G32B32A32_FLOAT = 0x0,
|
||||||
R32G32B32A32_SINT = 0x1,
|
R32G32B32A32_SINT = 0x1,
|
||||||
R32G32B32A32_UINT = 0x2,
|
R32G32B32A32_UINT = 0x2,
|
||||||
|
R64G64_FLOAT = 0x5,
|
||||||
R32G32B32X32_FLOAT = 0x6,
|
R32G32B32X32_FLOAT = 0x6,
|
||||||
R32G32B32A32_SSCALED = 0x7,
|
R32G32B32A32_SSCALED = 0x7,
|
||||||
R32G32B32A32_USCALED = 0x8,
|
R32G32B32A32_USCALED = 0x8,
|
||||||
|
|
@ -78,6 +79,7 @@ enum SWR_FORMAT
|
||||||
R32_FLOAT_X8X24_TYPELESS = 0x88,
|
R32_FLOAT_X8X24_TYPELESS = 0x88,
|
||||||
X32_TYPELESS_G8X24_UINT = 0x89,
|
X32_TYPELESS_G8X24_UINT = 0x89,
|
||||||
L32A32_FLOAT = 0x8A,
|
L32A32_FLOAT = 0x8A,
|
||||||
|
R64_FLOAT = 0x8D,
|
||||||
R16G16B16X16_UNORM = 0x8E,
|
R16G16B16X16_UNORM = 0x8E,
|
||||||
R16G16B16X16_FLOAT = 0x8F,
|
R16G16B16X16_FLOAT = 0x8F,
|
||||||
L32X32_FLOAT = 0x91,
|
L32X32_FLOAT = 0x91,
|
||||||
|
|
@ -193,6 +195,8 @@ enum SWR_FORMAT
|
||||||
R8G8B8_SNORM = 0x194,
|
R8G8B8_SNORM = 0x194,
|
||||||
R8G8B8_SSCALED = 0x195,
|
R8G8B8_SSCALED = 0x195,
|
||||||
R8G8B8_USCALED = 0x196,
|
R8G8B8_USCALED = 0x196,
|
||||||
|
R64G64B64A64_FLOAT = 0x197,
|
||||||
|
R64G64B64_FLOAT = 0x198,
|
||||||
BC4_SNORM = 0x199,
|
BC4_SNORM = 0x199,
|
||||||
BC5_SNORM = 0x19A,
|
BC5_SNORM = 0x19A,
|
||||||
R16G16B16_FLOAT = 0x19B,
|
R16G16B16_FLOAT = 0x19B,
|
||||||
|
|
|
||||||
|
|
@ -133,6 +133,28 @@ template<> struct FormatTraits<R32G32B32A32_UINT> :
|
||||||
typedef Format4<32, 32, 32, 32> FormatT;
|
typedef Format4<32, 32, 32, 32> FormatT;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
/// FormatTraits<R64G64_FLOAT> - Format traits specialization for R64G64_FLOAT
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
template<> struct FormatTraits<R64G64_FLOAT> :
|
||||||
|
ComponentTraits<SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64>,
|
||||||
|
FormatSwizzle<0, 1>,
|
||||||
|
Defaults<0, 0, 0, 0x3f800000>
|
||||||
|
{
|
||||||
|
static const uint32_t bpp{ 128 };
|
||||||
|
static const uint32_t numComps{ 2 };
|
||||||
|
static const bool hasAlpha{ false };
|
||||||
|
static const uint32_t alphaComp{ 0 };
|
||||||
|
static const bool isSRGB{ false };
|
||||||
|
static const bool isBC{ false };
|
||||||
|
static const bool isSubsampled{ false };
|
||||||
|
static const uint32_t bcWidth{ 1 };
|
||||||
|
static const uint32_t bcHeight{ 1 };
|
||||||
|
|
||||||
|
typedef Transpose64_64 TransposeT;
|
||||||
|
typedef Format2<64, 64> FormatT;
|
||||||
|
};
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
/// FormatTraits<R32G32B32X32_FLOAT> - Format traits specialization for R32G32B32X32_FLOAT
|
/// FormatTraits<R32G32B32X32_FLOAT> - Format traits specialization for R32G32B32X32_FLOAT
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
|
@ -595,6 +617,28 @@ template<> struct FormatTraits<L32A32_FLOAT> :
|
||||||
typedef Format2<32, 32> FormatT;
|
typedef Format2<32, 32> FormatT;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
/// FormatTraits<R64_FLOAT> - Format traits specialization for R64_FLOAT
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
template<> struct FormatTraits<R64_FLOAT> :
|
||||||
|
ComponentTraits<SWR_TYPE_FLOAT, 64>,
|
||||||
|
FormatSwizzle<0>,
|
||||||
|
Defaults<0, 0, 0, 0x3f800000>
|
||||||
|
{
|
||||||
|
static const uint32_t bpp{ 64 };
|
||||||
|
static const uint32_t numComps{ 1 };
|
||||||
|
static const bool hasAlpha{ false };
|
||||||
|
static const uint32_t alphaComp{ 0 };
|
||||||
|
static const bool isSRGB{ false };
|
||||||
|
static const bool isBC{ false };
|
||||||
|
static const bool isSubsampled{ false };
|
||||||
|
static const uint32_t bcWidth{ 1 };
|
||||||
|
static const uint32_t bcHeight{ 1 };
|
||||||
|
|
||||||
|
typedef TransposeSingleComponent<64> TransposeT;
|
||||||
|
typedef Format1<64> FormatT;
|
||||||
|
};
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
/// FormatTraits<R16G16B16X16_UNORM> - Format traits specialization for R16G16B16X16_UNORM
|
/// FormatTraits<R16G16B16X16_UNORM> - Format traits specialization for R16G16B16X16_UNORM
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
|
@ -3125,6 +3169,50 @@ template<> struct FormatTraits<R8G8B8_USCALED> :
|
||||||
typedef Format3<8, 8, 8> FormatT;
|
typedef Format3<8, 8, 8> FormatT;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
/// FormatTraits<R64G64B64A64_FLOAT> - Format traits specialization for R64G64B64A64_FLOAT
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
template<> struct FormatTraits<R64G64B64A64_FLOAT> :
|
||||||
|
ComponentTraits<SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64>,
|
||||||
|
FormatSwizzle<0, 1, 2, 3>,
|
||||||
|
Defaults<0, 0, 0, 0x3f800000>
|
||||||
|
{
|
||||||
|
static const uint32_t bpp{ 256 };
|
||||||
|
static const uint32_t numComps{ 4 };
|
||||||
|
static const bool hasAlpha{ true };
|
||||||
|
static const uint32_t alphaComp{ 3 };
|
||||||
|
static const bool isSRGB{ false };
|
||||||
|
static const bool isBC{ false };
|
||||||
|
static const bool isSubsampled{ false };
|
||||||
|
static const uint32_t bcWidth{ 1 };
|
||||||
|
static const uint32_t bcHeight{ 1 };
|
||||||
|
|
||||||
|
typedef Transpose64_64_64_64 TransposeT;
|
||||||
|
typedef Format4<64, 64, 64, 64> FormatT;
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
/// FormatTraits<R64G64B64_FLOAT> - Format traits specialization for R64G64B64_FLOAT
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
template<> struct FormatTraits<R64G64B64_FLOAT> :
|
||||||
|
ComponentTraits<SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64>,
|
||||||
|
FormatSwizzle<0, 1, 2>,
|
||||||
|
Defaults<0, 0, 0, 0x3f800000>
|
||||||
|
{
|
||||||
|
static const uint32_t bpp{ 192 };
|
||||||
|
static const uint32_t numComps{ 3 };
|
||||||
|
static const bool hasAlpha{ false };
|
||||||
|
static const uint32_t alphaComp{ 0 };
|
||||||
|
static const bool isSRGB{ false };
|
||||||
|
static const bool isBC{ false };
|
||||||
|
static const bool isSubsampled{ false };
|
||||||
|
static const uint32_t bcWidth{ 1 };
|
||||||
|
static const uint32_t bcHeight{ 1 };
|
||||||
|
|
||||||
|
typedef Transpose64_64_64 TransposeT;
|
||||||
|
typedef Format3<64, 64, 64> FormatT;
|
||||||
|
};
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
/// FormatTraits<BC4_SNORM> - Format traits specialization for BC4_SNORM
|
/// FormatTraits<BC4_SNORM> - Format traits specialization for BC4_SNORM
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
|
|
||||||
|
|
@ -856,6 +856,70 @@ struct Transpose11_11_10
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Transpose64
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
struct Transpose64
|
||||||
|
{
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
/// @brief Performs an SOA to AOS conversion
|
||||||
|
/// @param pSrc - source data in SOA form
|
||||||
|
/// @param pDst - output data in AOS form
|
||||||
|
static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
|
||||||
|
#if ENABLE_AVX512_SIMD16
|
||||||
|
|
||||||
|
static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst) = delete;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Transpose64_64
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
struct Transpose64_64
|
||||||
|
{
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
/// @brief Performs an SOA to AOS conversion
|
||||||
|
/// @param pSrc - source data in SOA form
|
||||||
|
/// @param pDst - output data in AOS form
|
||||||
|
static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
|
||||||
|
#if ENABLE_AVX512_SIMD16
|
||||||
|
|
||||||
|
static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst) = delete;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Transpose64_64_64
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
struct Transpose64_64_64
|
||||||
|
{
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
/// @brief Performs an SOA to AOS conversion
|
||||||
|
/// @param pSrc - source data in SOA form
|
||||||
|
/// @param pDst - output data in AOS form
|
||||||
|
static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
|
||||||
|
#if ENABLE_AVX512_SIMD16
|
||||||
|
|
||||||
|
static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst) = delete;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Transpose64_64_64_64
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
struct Transpose64_64_64_64
|
||||||
|
{
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
/// @brief Performs an SOA to AOS conversion
|
||||||
|
/// @param pSrc - source data in SOA form
|
||||||
|
/// @param pDst - output data in AOS form
|
||||||
|
static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
|
||||||
|
#if ENABLE_AVX512_SIMD16
|
||||||
|
|
||||||
|
static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst) = delete;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
// helper function to unroll loops
|
// helper function to unroll loops
|
||||||
template<int Begin, int End, int Step = 1>
|
template<int Begin, int End, int Step = 1>
|
||||||
struct UnrollerL {
|
struct UnrollerL {
|
||||||
|
|
|
||||||
|
|
@ -631,6 +631,55 @@ namespace SwrJit
|
||||||
return vGather;
|
return vGather;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
/// @brief Generate a masked gather operation in LLVM IR. If not
|
||||||
|
/// supported on the underlying platform, emulate it with loads
|
||||||
|
/// @param vSrc - SIMD wide value that will be loaded if mask is invalid
|
||||||
|
/// @param pBase - Int8* base VB address pointer value
|
||||||
|
/// @param vIndices - SIMD wide value of VB byte offsets
|
||||||
|
/// @param vMask - SIMD wide mask that controls whether to access memory or the src values
|
||||||
|
/// @param scale - value to scale indices by
|
||||||
|
Value *Builder::GATHERPD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, Value* scale)
|
||||||
|
{
|
||||||
|
Value* vGather;
|
||||||
|
|
||||||
|
// use avx2 gather instruction if available
|
||||||
|
if(JM()->mArch.AVX2())
|
||||||
|
{
|
||||||
|
vGather = VGATHERPD(vSrc, pBase, vIndices, vMask, scale);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Value* pStack = STACKSAVE();
|
||||||
|
|
||||||
|
// store vSrc on the stack. this way we can select between a valid load address and the vSrc address
|
||||||
|
Value* vSrcPtr = ALLOCA(vSrc->getType());
|
||||||
|
STORE(vSrc, vSrcPtr);
|
||||||
|
|
||||||
|
vGather = UndefValue::get(VectorType::get(mDoubleTy, 4));
|
||||||
|
Value *vScaleVec = VECTOR_SPLAT(4, Z_EXT(scale,mInt32Ty));
|
||||||
|
Value *vOffsets = MUL(vIndices,vScaleVec);
|
||||||
|
Value *mask = MASK(vMask);
|
||||||
|
for(uint32_t i = 0; i < mVWidth/2; ++i)
|
||||||
|
{
|
||||||
|
// single component byte index
|
||||||
|
Value *offset = VEXTRACT(vOffsets,C(i));
|
||||||
|
// byte pointer to component
|
||||||
|
Value *loadAddress = GEP(pBase,offset);
|
||||||
|
loadAddress = BITCAST(loadAddress,PointerType::get(mDoubleTy,0));
|
||||||
|
// pointer to the value to load if we're masking off a component
|
||||||
|
Value *maskLoadAddress = GEP(vSrcPtr,{C(0), C(i)});
|
||||||
|
Value *selMask = VEXTRACT(mask,C(i));
|
||||||
|
// switch in a safe address to load if we're trying to access a vertex
|
||||||
|
Value *validAddress = SELECT(selMask, loadAddress, maskLoadAddress);
|
||||||
|
Value *val = LOAD(validAddress);
|
||||||
|
vGather = VINSERT(vGather,val,C(i));
|
||||||
|
}
|
||||||
|
STACKRESTORE(pStack);
|
||||||
|
}
|
||||||
|
return vGather;
|
||||||
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
/// @brief convert x86 <N x float> mask to llvm <N x i1> mask
|
/// @brief convert x86 <N x float> mask to llvm <N x i1> mask
|
||||||
Value* Builder::MASK(Value* vmask)
|
Value* Builder::MASK(Value* vmask)
|
||||||
|
|
|
||||||
|
|
@ -113,6 +113,8 @@ Value *GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, Value* sc
|
||||||
void GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
|
void GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
|
||||||
Value* mask, Value* vGatherComponents[], bool bPackedOutput);
|
Value* mask, Value* vGatherComponents[], bool bPackedOutput);
|
||||||
|
|
||||||
|
Value *GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, Value* scale);
|
||||||
|
|
||||||
void SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask);
|
void SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask);
|
||||||
|
|
||||||
void Shuffle8bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput, Value* vGatherOutput[], bool bPackedOutput);
|
void Shuffle8bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput, Value* vGatherOutput[], bool bPackedOutput);
|
||||||
|
|
|
||||||
|
|
@ -519,7 +519,7 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* str
|
||||||
bool FetchJit::IsOddFormat(SWR_FORMAT format)
|
bool FetchJit::IsOddFormat(SWR_FORMAT format)
|
||||||
{
|
{
|
||||||
const SWR_FORMAT_INFO& info = GetFormatInfo(format);
|
const SWR_FORMAT_INFO& info = GetFormatInfo(format);
|
||||||
if (info.bpc[0] != 8 && info.bpc[0] != 16 && info.bpc[0] != 32)
|
if (info.bpc[0] != 8 && info.bpc[0] != 16 && info.bpc[0] != 32 && info.bpc[0] != 64)
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
@ -914,6 +914,58 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case 64:
|
||||||
|
{
|
||||||
|
for (uint32_t i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
if (isComponentEnabled(compMask, i))
|
||||||
|
{
|
||||||
|
// if we need to gather the component
|
||||||
|
if (compCtrl[i] == StoreSrc)
|
||||||
|
{
|
||||||
|
Value *vMaskLo = VSHUFFLE(pMask, VUNDEF(mInt1Ty, 8), C({0, 1, 2, 3}));
|
||||||
|
Value *vMaskHi = VSHUFFLE(pMask, VUNDEF(mInt1Ty, 8), C({4, 5, 6, 7}));
|
||||||
|
vMaskLo = S_EXT(vMaskLo, VectorType::get(mInt64Ty, 4));
|
||||||
|
vMaskHi = S_EXT(vMaskHi, VectorType::get(mInt64Ty, 4));
|
||||||
|
vMaskLo = BITCAST(vMaskLo, VectorType::get(mDoubleTy, 4));
|
||||||
|
vMaskHi = BITCAST(vMaskHi, VectorType::get(mDoubleTy, 4));
|
||||||
|
|
||||||
|
Value *vOffsetsLo = VEXTRACTI128(vOffsets, C(0));
|
||||||
|
Value *vOffsetsHi = VEXTRACTI128(vOffsets, C(1));
|
||||||
|
|
||||||
|
Value *vZeroDouble = VECTOR_SPLAT(4, ConstantFP::get(IRB()->getDoubleTy(), 0.0f));
|
||||||
|
|
||||||
|
Value* pGatherLo = GATHERPD(vZeroDouble,
|
||||||
|
pStreamBase, vOffsetsLo, vMaskLo, C((char)1));
|
||||||
|
Value* pGatherHi = GATHERPD(vZeroDouble,
|
||||||
|
pStreamBase, vOffsetsHi, vMaskHi, C((char)1));
|
||||||
|
|
||||||
|
pGatherLo = VCVTPD2PS(pGatherLo);
|
||||||
|
pGatherHi = VCVTPD2PS(pGatherHi);
|
||||||
|
|
||||||
|
Value *pGather = VSHUFFLE(pGatherLo, pGatherHi, C({0, 1, 2, 3, 4, 5, 6, 7}));
|
||||||
|
|
||||||
|
vVertexElements[currentVertexElement++] = pGather;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currentVertexElement > 3)
|
||||||
|
{
|
||||||
|
StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
|
||||||
|
// reset to the next vVertexElement to output
|
||||||
|
currentVertexElement = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// offset base to the next component in the vertex to gather
|
||||||
|
pStreamBase = GEP(pStreamBase, C((char)8));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
SWR_ASSERT(0, "Tried to fetch invalid FP format");
|
SWR_ASSERT(0, "Tried to fetch invalid FP format");
|
||||||
break;
|
break;
|
||||||
|
|
@ -1730,6 +1782,8 @@ PFN_FETCH_FUNC JitFetchFunc(HANDLE hJitMgr, const HANDLE hFunc)
|
||||||
fclose(fd);
|
fclose(fd);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
pJitMgr->DumpAsm(const_cast<llvm::Function*>(func), "final");
|
||||||
|
|
||||||
return pfnFetch;
|
return pfnFetch;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -84,6 +84,7 @@ inst_aliases = {
|
||||||
}
|
}
|
||||||
|
|
||||||
intrinsics = [
|
intrinsics = [
|
||||||
|
["VGATHERPD", "x86_avx2_gather_d_pd_256", ["src", "pBase", "indices", "mask", "scale"]],
|
||||||
["VGATHERPS", "x86_avx2_gather_d_ps_256", ["src", "pBase", "indices", "mask", "scale"]],
|
["VGATHERPS", "x86_avx2_gather_d_ps_256", ["src", "pBase", "indices", "mask", "scale"]],
|
||||||
["VGATHERDD", "x86_avx2_gather_d_d_256", ["src", "pBase", "indices", "mask", "scale"]],
|
["VGATHERDD", "x86_avx2_gather_d_d_256", ["src", "pBase", "indices", "mask", "scale"]],
|
||||||
["VSQRTPS", "x86_avx_sqrt_ps_256", ["a"]],
|
["VSQRTPS", "x86_avx_sqrt_ps_256", ["a"]],
|
||||||
|
|
@ -101,6 +102,7 @@ intrinsics = [
|
||||||
["VPSHUFB", "x86_avx2_pshuf_b", ["a", "b"]],
|
["VPSHUFB", "x86_avx2_pshuf_b", ["a", "b"]],
|
||||||
["VPERMD", "x86_avx2_permd", ["a", "idx"]],
|
["VPERMD", "x86_avx2_permd", ["a", "idx"]],
|
||||||
["VPERMPS", "x86_avx2_permps", ["idx", "a"]],
|
["VPERMPS", "x86_avx2_permps", ["idx", "a"]],
|
||||||
|
["VCVTPD2PS", "x86_avx_cvt_pd2_ps_256", ["a"]],
|
||||||
["VCVTPH2PS", "x86_vcvtph2ps_256", ["a"]],
|
["VCVTPH2PS", "x86_vcvtph2ps_256", ["a"]],
|
||||||
["VCVTPS2PH", "x86_vcvtps2ph_256", ["a", "round"]],
|
["VCVTPS2PH", "x86_vcvtps2ph_256", ["a", "round"]],
|
||||||
["VHSUBPS", "x86_avx_hsub_ps_256", ["a", "b"]],
|
["VHSUBPS", "x86_avx_hsub_ps_256", ["a", "b"]],
|
||||||
|
|
|
||||||
|
|
@ -537,6 +537,11 @@ mesa_to_swr_format(enum pipe_format format)
|
||||||
{PIPE_FORMAT_R32G32B32_FIXED, R32G32B32_SFIXED},
|
{PIPE_FORMAT_R32G32B32_FIXED, R32G32B32_SFIXED},
|
||||||
{PIPE_FORMAT_R32G32B32A32_FIXED, R32G32B32A32_SFIXED},
|
{PIPE_FORMAT_R32G32B32A32_FIXED, R32G32B32A32_SFIXED},
|
||||||
|
|
||||||
|
{PIPE_FORMAT_R64_FLOAT, R64_FLOAT},
|
||||||
|
{PIPE_FORMAT_R64G64_FLOAT, R64G64_FLOAT},
|
||||||
|
{PIPE_FORMAT_R64G64B64_FLOAT, R64G64B64_FLOAT},
|
||||||
|
{PIPE_FORMAT_R64G64B64A64_FLOAT, R64G64B64A64_FLOAT},
|
||||||
|
|
||||||
/* These formats have entries in SWR but don't have Load/StoreTile
|
/* These formats have entries in SWR but don't have Load/StoreTile
|
||||||
* implementations. That means these aren't renderable, and thus having
|
* implementations. That means these aren't renderable, and thus having
|
||||||
* a mapping entry here is detrimental.
|
* a mapping entry here is detrimental.
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue