mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-04 02:40:11 +01:00
swr: [rasterizer jitter] implement InstanceID/VertexID in fetch jit
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
7fc4a82007
commit
9a2a4ecb39
2 changed files with 336 additions and 173 deletions
|
|
@ -61,13 +61,14 @@ struct FetchJit : public Builder
|
|||
Value* GetSimdValid8bitIndices(Value* vIndices, Value* pLastIndex);
|
||||
|
||||
// package up Shuffle*bpcGatherd args into a tuple for convenience
|
||||
typedef std::tuple<Value*&, Value*, const Instruction::CastOps, const ConversionType,
|
||||
uint32_t&, uint32_t&, const ComponentEnable, const ComponentControl(&)[4], Value*(&)[4],
|
||||
const uint32_t (&)[4]> Shuffle8bpcArgs;
|
||||
typedef std::tuple<Value*&, Value*, const Instruction::CastOps, const ConversionType,
|
||||
uint32_t&, uint32_t&, const ComponentEnable, const ComponentControl(&)[4], Value*(&)[4],
|
||||
const uint32_t(&)[4], Value*, bool, uint32_t, bool, uint32_t> Shuffle8bpcArgs;
|
||||
void Shuffle8bpcGatherd(Shuffle8bpcArgs &args);
|
||||
|
||||
typedef std::tuple<Value*(&)[2], Value*, const Instruction::CastOps, const ConversionType,
|
||||
uint32_t&, uint32_t&, const ComponentEnable, const ComponentControl(&)[4], Value*(&)[4]> Shuffle16bpcArgs;
|
||||
uint32_t&, uint32_t&, const ComponentEnable, const ComponentControl(&)[4], Value*(&)[4],
|
||||
Value*, bool, uint32_t, bool, uint32_t> Shuffle16bpcArgs;
|
||||
void Shuffle16bpcGather(Shuffle16bpcArgs &args);
|
||||
|
||||
void StoreVertexElements(Value* pVtxOut, const uint32_t outputElt, const uint32_t numEltsToStore, Value* (&vVertexElements)[4]);
|
||||
|
|
@ -226,7 +227,7 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
|
|||
/// @brief Loads attributes from memory using LOADs, shuffling the
|
||||
/// components into SOA form.
|
||||
/// *Note* currently does not support component control,
|
||||
/// component packing, or instancing
|
||||
/// component packing, instancing, InstanceID SGVs, or VertexID SGVs
|
||||
/// @param fetchState - info about attributes to be fetched from memory
|
||||
/// @param streams - value pointer to the current vertex stream
|
||||
/// @param vIndices - vector value of indices to load
|
||||
|
|
@ -786,6 +787,23 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* f
|
|||
CreateGatherOddFormats((SWR_FORMAT)ied.Format, pStreamBase, vOffsets, pResults);
|
||||
ConvertFormat((SWR_FORMAT)ied.Format, pResults);
|
||||
|
||||
// check for InstanceID SGV
|
||||
if (fetchState.InstanceIdEnable && (fetchState.InstanceIdElementOffset == nInputElt))
|
||||
{
|
||||
SWR_ASSERT(fetchState.InstanceIdComponentNumber < (sizeof(pResults) / sizeof(pResults[0])));
|
||||
|
||||
// Load a SIMD of InstanceIDs
|
||||
pResults[fetchState.InstanceIdComponentNumber] = VBROADCAST(LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_CurInstance }))); // InstanceID
|
||||
}
|
||||
// check for VertexID SGV
|
||||
else if (fetchState.VertexIdEnable && (fetchState.VertexIdElementOffset == nInputElt))
|
||||
{
|
||||
SWR_ASSERT(fetchState.VertexIdComponentNumber < (sizeof(pResults) / sizeof(pResults[0])));
|
||||
|
||||
// Load a SIMD of VertexIDs
|
||||
pResults[fetchState.VertexIdComponentNumber] = LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID }));
|
||||
}
|
||||
|
||||
StoreVertexElements(pVtxOut, outputElt++, 4, pResults);
|
||||
currentVertexElement = 0;
|
||||
}
|
||||
|
|
@ -832,8 +850,13 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* f
|
|||
|
||||
// if we have at least one component to shuffle into place
|
||||
if(compMask){
|
||||
const bool instanceIdEnable = (fetchState.InstanceIdEnable) && (fetchState.InstanceIdElementOffset == nInputElt);
|
||||
const bool vertexIdEnable = (fetchState.VertexIdEnable) && (fetchState.VertexIdElementOffset == nInputElt);
|
||||
|
||||
Shuffle16bpcArgs args = std::forward_as_tuple(vGatherResult, pVtxOut, Instruction::CastOps::FPExt, CONVERT_NONE,
|
||||
currentVertexElement, outputElt, compMask, compCtrl, vVertexElements);
|
||||
currentVertexElement, outputElt, compMask, compCtrl, vVertexElements, fetchInfo, instanceIdEnable,
|
||||
fetchState.InstanceIdComponentNumber, vertexIdEnable, fetchState.VertexIdComponentNumber);
|
||||
|
||||
// Shuffle gathered components into place in simdvertex struct
|
||||
Shuffle16bpcGather(args); // outputs to vVertexElements ref
|
||||
}
|
||||
|
|
@ -841,30 +864,43 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* f
|
|||
break;
|
||||
case 32:
|
||||
{
|
||||
for(uint32_t i = 0; i < 4; i++)
|
||||
for (uint32_t i = 0; i < 4; i++)
|
||||
{
|
||||
if(!isComponentEnabled(compMask, i)){
|
||||
// offset base to the next component in the vertex to gather
|
||||
pStreamBase = GEP(pStreamBase, C((char)4));
|
||||
continue;
|
||||
}
|
||||
if (isComponentEnabled(compMask, i))
|
||||
{
|
||||
// check for InstanceID SGV
|
||||
if ((fetchState.InstanceIdEnable) && (fetchState.InstanceIdElementOffset == nInputElt) && (fetchState.InstanceIdComponentNumber == currentVertexElement))
|
||||
{
|
||||
// Load a SIMD of InstanceIDs
|
||||
vVertexElements[currentVertexElement++] = VBROADCAST(LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_CurInstance }))); // InstanceID
|
||||
}
|
||||
// check for VertexID SGV
|
||||
else if ((fetchState.VertexIdEnable) && (fetchState.VertexIdElementOffset == nInputElt) && (fetchState.VertexIdComponentNumber == currentVertexElement))
|
||||
{
|
||||
// Load a SIMD of VertexIDs
|
||||
vVertexElements[currentVertexElement++] = LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID }));
|
||||
}
|
||||
// if we need to gather the component
|
||||
else if (compCtrl[i] == StoreSrc)
|
||||
{
|
||||
// save mask as it is zero'd out after each gather
|
||||
Value *vMask = vGatherMask;
|
||||
|
||||
// if we need to gather the component
|
||||
if(compCtrl[i] == StoreSrc){
|
||||
// save mask as it is zero'd out after each gather
|
||||
Value *vMask = vGatherMask;
|
||||
// Gather a SIMD of vertices
|
||||
vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vMask, C((char)1));
|
||||
}
|
||||
else
|
||||
{
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
|
||||
}
|
||||
|
||||
// Gather a SIMD of vertices
|
||||
vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vMask, C((char)1));
|
||||
}
|
||||
else{
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
|
||||
}
|
||||
if (currentVertexElement > 3)
|
||||
{
|
||||
StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
|
||||
// reset to the next vVertexElement to output
|
||||
currentVertexElement = 0;
|
||||
}
|
||||
|
||||
if(currentVertexElement > 3){
|
||||
StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
|
||||
// reset to the next vVertexElement to output
|
||||
currentVertexElement = 0;
|
||||
}
|
||||
|
||||
// offset base to the next component in the vertex to gather
|
||||
|
|
@ -918,14 +954,20 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* f
|
|||
case 8:
|
||||
{
|
||||
// if we have at least one component to fetch
|
||||
if(compMask){
|
||||
if(compMask)
|
||||
{
|
||||
Value* vGatherResult = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask, C((char)1));
|
||||
// e.g. result of an 8x32bit integer gather for 8bit components
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw
|
||||
|
||||
const bool instanceIdEnable = fetchState.InstanceIdEnable && (fetchState.InstanceIdElementOffset == nInputElt);
|
||||
const bool vertexIdEnable = fetchState.VertexIdEnable && (fetchState.VertexIdElementOffset == nInputElt);
|
||||
|
||||
Shuffle8bpcArgs args = std::forward_as_tuple(vGatherResult, pVtxOut, extendCastType, conversionType,
|
||||
currentVertexElement, outputElt, compMask, compCtrl, vVertexElements, info.swizzle);
|
||||
currentVertexElement, outputElt, compMask, compCtrl, vVertexElements, info.swizzle, fetchInfo,
|
||||
instanceIdEnable, fetchState.InstanceIdComponentNumber, vertexIdEnable, fetchState.VertexIdComponentNumber);
|
||||
|
||||
// Shuffle gathered components into place in simdvertex struct
|
||||
Shuffle8bpcGatherd(args); // outputs to vVertexElements ref
|
||||
}
|
||||
|
|
@ -963,8 +1005,13 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* f
|
|||
|
||||
// if we have at least one component to shuffle into place
|
||||
if(compMask){
|
||||
const bool instanceIdEnable = fetchState.InstanceIdEnable && (fetchState.InstanceIdElementOffset == nInputElt);
|
||||
const bool vertexIdEnable = fetchState.VertexIdEnable && (fetchState.VertexIdElementOffset == nInputElt);
|
||||
|
||||
Shuffle16bpcArgs args = std::forward_as_tuple(vGatherResult, pVtxOut, extendCastType, conversionType,
|
||||
currentVertexElement, outputElt, compMask, compCtrl, vVertexElements);
|
||||
currentVertexElement, outputElt, compMask, compCtrl, vVertexElements, fetchInfo, instanceIdEnable,
|
||||
fetchState.InstanceIdComponentNumber, vertexIdEnable, fetchState.VertexIdComponentNumber);
|
||||
|
||||
// Shuffle gathered components into place in simdvertex struct
|
||||
Shuffle16bpcGather(args); // outputs to vVertexElements ref
|
||||
}
|
||||
|
|
@ -975,33 +1022,46 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* f
|
|||
SWR_ASSERT(conversionType == CONVERT_NONE);
|
||||
|
||||
// Gathered components into place in simdvertex struct
|
||||
for(uint32_t i = 0; i < 4; i++)
|
||||
for (uint32_t i = 0; i < 4; i++)
|
||||
{
|
||||
if(!isComponentEnabled(compMask, i)){
|
||||
// offset base to the next component in the vertex to gather
|
||||
pStreamBase = GEP(pStreamBase, C((char)4));
|
||||
continue;
|
||||
}
|
||||
if (isComponentEnabled(compMask, i))
|
||||
{
|
||||
// check for InstanceID SGV
|
||||
if (fetchState.InstanceIdEnable && (fetchState.InstanceIdElementOffset == nInputElt) && (fetchState.InstanceIdComponentNumber == currentVertexElement))
|
||||
{
|
||||
// Load a SIMD of InstanceIDs
|
||||
vVertexElements[currentVertexElement++] = VBROADCAST(LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_CurInstance }))); // InstanceID
|
||||
}
|
||||
// check for VertexID SGV
|
||||
else if (fetchState.VertexIdEnable && (fetchState.VertexIdElementOffset == nInputElt) && (fetchState.VertexIdComponentNumber == currentVertexElement))
|
||||
{
|
||||
// Load a SIMD of VertexIDs
|
||||
vVertexElements[currentVertexElement++] = LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID }));
|
||||
}
|
||||
// if we need to gather the component
|
||||
else if (compCtrl[i] == StoreSrc)
|
||||
{
|
||||
// save mask as it is zero'd out after each gather
|
||||
Value *vMask = vGatherMask;
|
||||
|
||||
// if we need to gather the component
|
||||
if(compCtrl[i] == StoreSrc){
|
||||
// save mask as it is zero'd out after each gather
|
||||
Value *vMask = vGatherMask;
|
||||
vVertexElements[currentVertexElement++] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vMask, C((char)1));
|
||||
|
||||
vVertexElements[currentVertexElement++] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vMask, C((char)1));
|
||||
// e.g. result of a single 8x32bit integer gather for 32bit components
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx
|
||||
}
|
||||
else
|
||||
{
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
|
||||
}
|
||||
|
||||
// e.g. result of a single 8x32bit integer gather for 32bit components
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx
|
||||
}
|
||||
else{
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
|
||||
}
|
||||
if (currentVertexElement > 3)
|
||||
{
|
||||
StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
|
||||
// reset to the next vVertexElement to output
|
||||
currentVertexElement = 0;
|
||||
}
|
||||
|
||||
if(currentVertexElement > 3){
|
||||
StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
|
||||
// reset to the next vVertexElement to output
|
||||
currentVertexElement = 0;
|
||||
}
|
||||
|
||||
// offset base to the next component in the vertex to gather
|
||||
|
|
@ -1140,6 +1200,11 @@ Value* FetchJit::GetSimdValid32bitIndices(Value* pIndices, Value* pLastIndex)
|
|||
/// @param compCtrl - component control val
|
||||
/// @param vVertexElements[4] - vertex components to output
|
||||
/// @param swizzle[4] - component swizzle location
|
||||
/// @param fetchInfo - fetch shader info
|
||||
/// @param instanceIdEnable - InstanceID enabled?
|
||||
/// @param instanceIdComponentNumber - InstanceID component override
|
||||
/// @param vertexIdEnable - VertexID enabled?
|
||||
/// @param vertexIdComponentNumber - VertexID component override
|
||||
void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
|
||||
{
|
||||
// Unpack tuple args
|
||||
|
|
@ -1153,6 +1218,11 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
|
|||
const ComponentControl (&compCtrl)[4] = std::get<7>(args);
|
||||
Value* (&vVertexElements)[4] = std::get<8>(args);
|
||||
const uint32_t (&swizzle)[4] = std::get<9>(args);
|
||||
Value *fetchInfo = std::get<10>(args);
|
||||
const bool instanceIdEnable = std::get<11>(args);
|
||||
const uint32_t instanceIdComponentNumber = std::get<12>(args);
|
||||
const bool vertexIdEnable = std::get<13>(args);
|
||||
const uint32_t vertexIdComponentNumber = std::get<14>(args);
|
||||
|
||||
// cast types
|
||||
Type* vGatherTy = mSimdInt32Ty;
|
||||
|
|
@ -1219,34 +1289,50 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
|
|||
}
|
||||
|
||||
// sign extend all enabled components. If we have a fill vVertexElements, output to current simdvertex
|
||||
for(uint32_t i = 0; i < 4; i++){
|
||||
if(!isComponentEnabled(compMask, i)){
|
||||
continue;
|
||||
}
|
||||
|
||||
if(compCtrl[i] == ComponentControl::StoreSrc){
|
||||
// if x or z, extract 128bits from lane 0, else for y or w, extract from lane 1
|
||||
uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1;
|
||||
// if x or y, use vi128XY permute result, else use vi128ZW
|
||||
Value* selectedPermute = (i < 2) ? vi128XY : vi128ZW;
|
||||
|
||||
// sign extend
|
||||
vVertexElements[currentVertexElement] = PMOVSXBD(BITCAST(VEXTRACT(selectedPermute, C(lane)), v16x8Ty));
|
||||
|
||||
// denormalize if needed
|
||||
if(conversionType != CONVERT_NONE){
|
||||
vVertexElements[currentVertexElement] = FMUL(CAST(fpCast, vVertexElements[currentVertexElement], mSimdFP32Ty), conversionFactor);
|
||||
for (uint32_t i = 0; i < 4; i++)
|
||||
{
|
||||
if (isComponentEnabled(compMask, i))
|
||||
{
|
||||
// check for InstanceID SGV
|
||||
if (instanceIdEnable && (instanceIdComponentNumber == currentVertexElement))
|
||||
{
|
||||
// Load a SIMD of InstanceIDs
|
||||
vVertexElements[currentVertexElement++] = VBROADCAST(LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_CurInstance }))); // InstanceID
|
||||
}
|
||||
currentVertexElement++;
|
||||
}
|
||||
else{
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
|
||||
}
|
||||
// check for VertexID SGV
|
||||
else if (vertexIdEnable && (vertexIdComponentNumber == currentVertexElement))
|
||||
{
|
||||
// Load a SIMD of VertexIDs
|
||||
vVertexElements[currentVertexElement++] = LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID }));
|
||||
}
|
||||
else if (compCtrl[i] == ComponentControl::StoreSrc)
|
||||
{
|
||||
// if x or z, extract 128bits from lane 0, else for y or w, extract from lane 1
|
||||
uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1;
|
||||
// if x or y, use vi128XY permute result, else use vi128ZW
|
||||
Value* selectedPermute = (i < 2) ? vi128XY : vi128ZW;
|
||||
|
||||
if(currentVertexElement > 3){
|
||||
StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
|
||||
// reset to the next vVertexElement to output
|
||||
currentVertexElement = 0;
|
||||
// sign extend
|
||||
vVertexElements[currentVertexElement] = PMOVSXBD(BITCAST(VEXTRACT(selectedPermute, C(lane)), v16x8Ty));
|
||||
|
||||
// denormalize if needed
|
||||
if (conversionType != CONVERT_NONE)
|
||||
{
|
||||
vVertexElements[currentVertexElement] = FMUL(CAST(fpCast, vVertexElements[currentVertexElement], mSimdFP32Ty), conversionFactor);
|
||||
}
|
||||
currentVertexElement++;
|
||||
}
|
||||
else
|
||||
{
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
|
||||
}
|
||||
|
||||
if (currentVertexElement > 3)
|
||||
{
|
||||
StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
|
||||
// reset to the next vVertexElement to output
|
||||
currentVertexElement = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1278,59 +1364,76 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
|
|||
}
|
||||
|
||||
// shuffle enabled components into lower byte of each 32bit lane, 0 extending to 32 bits
|
||||
for(uint32_t i = 0; i < 4; i++){
|
||||
if(!isComponentEnabled(compMask, i)){
|
||||
continue;
|
||||
}
|
||||
|
||||
if(compCtrl[i] == ComponentControl::StoreSrc){
|
||||
// pshufb masks for each component
|
||||
Value* vConstMask;
|
||||
switch(swizzle[i]){
|
||||
for (uint32_t i = 0; i < 4; i++)
|
||||
{
|
||||
if (isComponentEnabled(compMask, i))
|
||||
{
|
||||
// check for InstanceID SGV
|
||||
if (instanceIdEnable && (instanceIdComponentNumber == currentVertexElement))
|
||||
{
|
||||
// Load a SIMD of InstanceIDs
|
||||
vVertexElements[currentVertexElement++] = VBROADCAST(LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_CurInstance }))); // InstanceID
|
||||
}
|
||||
// check for VertexID SGV
|
||||
else if (vertexIdEnable && (vertexIdComponentNumber == currentVertexElement))
|
||||
{
|
||||
// Load a SIMD of VertexIDs
|
||||
vVertexElements[currentVertexElement++] = LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID }));
|
||||
}
|
||||
else if (compCtrl[i] == ComponentControl::StoreSrc)
|
||||
{
|
||||
// pshufb masks for each component
|
||||
Value* vConstMask;
|
||||
switch (swizzle[i])
|
||||
{
|
||||
case 0:
|
||||
// x shuffle mask
|
||||
vConstMask = C<char>({0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1,
|
||||
0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1});
|
||||
vConstMask = C<char>({ 0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1,
|
||||
0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1 });
|
||||
break;
|
||||
case 1:
|
||||
// y shuffle mask
|
||||
vConstMask = C<char>({1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1,
|
||||
1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1});
|
||||
vConstMask = C<char>({ 1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1,
|
||||
1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1 });
|
||||
break;
|
||||
case 2:
|
||||
// z shuffle mask
|
||||
vConstMask = C<char>({2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1,
|
||||
2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1});
|
||||
vConstMask = C<char>({ 2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1,
|
||||
2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1 });
|
||||
break;
|
||||
case 3:
|
||||
// w shuffle mask
|
||||
vConstMask = C<char>({3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1,
|
||||
3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1});
|
||||
vConstMask = C<char>({ 3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1,
|
||||
3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1 });
|
||||
break;
|
||||
default:
|
||||
vConstMask = nullptr;
|
||||
break;
|
||||
}
|
||||
|
||||
vVertexElements[currentVertexElement] = BITCAST(PSHUFB(BITCAST(vGatherResult, v32x8Ty), vConstMask), vGatherTy);
|
||||
// after pshufb for x channel
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// x000 x000 x000 x000 x000 x000 x000 x000
|
||||
|
||||
// denormalize if needed
|
||||
if (conversionType != CONVERT_NONE)
|
||||
{
|
||||
vVertexElements[currentVertexElement] = FMUL(CAST(fpCast, vVertexElements[currentVertexElement], mSimdFP32Ty), conversionFactor);
|
||||
}
|
||||
currentVertexElement++;
|
||||
}
|
||||
else
|
||||
{
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
|
||||
}
|
||||
|
||||
vVertexElements[currentVertexElement] = BITCAST(PSHUFB(BITCAST(vGatherResult, v32x8Ty), vConstMask), vGatherTy);
|
||||
// after pshufb for x channel
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// x000 x000 x000 x000 x000 x000 x000 x000
|
||||
|
||||
// denormalize if needed
|
||||
if (conversionType != CONVERT_NONE){
|
||||
vVertexElements[currentVertexElement] = FMUL(CAST(fpCast, vVertexElements[currentVertexElement], mSimdFP32Ty), conversionFactor);
|
||||
if (currentVertexElement > 3)
|
||||
{
|
||||
StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
|
||||
// reset to the next vVertexElement to output
|
||||
currentVertexElement = 0;
|
||||
}
|
||||
currentVertexElement++;
|
||||
}
|
||||
else{
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
|
||||
}
|
||||
|
||||
if(currentVertexElement > 3){
|
||||
StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
|
||||
// reset to the next vVertexElement to output
|
||||
currentVertexElement = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1354,6 +1457,11 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
|
|||
/// @param compMask - component packing mask
|
||||
/// @param compCtrl - component control val
|
||||
/// @param vVertexElements[4] - vertex components to output
|
||||
/// @param fetchInfo - fetch shader info
|
||||
/// @param instanceIdEnable - InstanceID enabled?
|
||||
/// @param instanceIdComponentNumber - InstanceID component override
|
||||
/// @param vertexIdEnable - VertexID enabled?
|
||||
/// @param vertexIdComponentNumber - VertexID component override
|
||||
void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
|
||||
{
|
||||
// Unpack tuple args
|
||||
|
|
@ -1366,6 +1474,11 @@ void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
|
|||
const ComponentEnable compMask = std::get<6>(args);
|
||||
const ComponentControl(&compCtrl)[4] = std::get<7>(args);
|
||||
Value* (&vVertexElements)[4] = std::get<8>(args);
|
||||
Value *fetchInfo = std::get<9>(args);
|
||||
const bool instanceIdEnable = std::get<10>(args);
|
||||
const uint32_t instanceIdComponentNumber = std::get<11>(args);
|
||||
const bool vertexIdEnable = std::get<12>(args);
|
||||
const uint32_t vertexIdComponentNumber = std::get<13>(args);
|
||||
|
||||
// cast types
|
||||
Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
|
||||
|
|
@ -1429,43 +1542,57 @@ void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
|
|||
}
|
||||
|
||||
// sign extend all enabled components. If we have a fill vVertexElements, output to current simdvertex
|
||||
for(uint32_t i = 0; i < 4; i++){
|
||||
if(!isComponentEnabled(compMask, i)){
|
||||
continue;
|
||||
}
|
||||
|
||||
if(compCtrl[i] == ComponentControl::StoreSrc){
|
||||
// if x or z, extract 128bits from lane 0, else for y or w, extract from lane 1
|
||||
uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1;
|
||||
// if x or y, use vi128XY permute result, else use vi128ZW
|
||||
Value* selectedPermute = (i < 2) ? vi128XY : vi128ZW;
|
||||
|
||||
if(bFP) {
|
||||
// extract 128 bit lanes to sign extend each component
|
||||
vVertexElements[currentVertexElement] = CVTPH2PS(BITCAST(VEXTRACT(selectedPermute, C(lane)), v8x16Ty));
|
||||
for (uint32_t i = 0; i < 4; i++)
|
||||
{
|
||||
if (isComponentEnabled(compMask, i))
|
||||
{
|
||||
// check for InstanceID SGV
|
||||
if (instanceIdEnable && (instanceIdComponentNumber == currentVertexElement))
|
||||
{
|
||||
// Load a SIMD of InstanceIDs
|
||||
vVertexElements[currentVertexElement++] = VBROADCAST(LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_CurInstance }))); // InstanceID
|
||||
}
|
||||
else {
|
||||
// extract 128 bit lanes to sign extend each component
|
||||
vVertexElements[currentVertexElement] = PMOVSXWD(BITCAST(VEXTRACT(selectedPermute, C(lane)), v8x16Ty));
|
||||
// check for VertexID SGV
|
||||
else if (vertexIdEnable && (vertexIdComponentNumber == currentVertexElement))
|
||||
{
|
||||
// Load a SIMD of VertexIDs
|
||||
vVertexElements[currentVertexElement++] = LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID }));
|
||||
}
|
||||
else if (compCtrl[i] == ComponentControl::StoreSrc)
|
||||
{
|
||||
// if x or z, extract 128bits from lane 0, else for y or w, extract from lane 1
|
||||
uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1;
|
||||
// if x or y, use vi128XY permute result, else use vi128ZW
|
||||
Value* selectedPermute = (i < 2) ? vi128XY : vi128ZW;
|
||||
|
||||
// denormalize if needed
|
||||
if(conversionType != CONVERT_NONE){
|
||||
vVertexElements[currentVertexElement] = FMUL(CAST(IntToFpCast, vVertexElements[currentVertexElement], mSimdFP32Ty), conversionFactor);
|
||||
if (bFP) {
|
||||
// extract 128 bit lanes to sign extend each component
|
||||
vVertexElements[currentVertexElement] = CVTPH2PS(BITCAST(VEXTRACT(selectedPermute, C(lane)), v8x16Ty));
|
||||
}
|
||||
}
|
||||
currentVertexElement++;
|
||||
}
|
||||
else{
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
|
||||
}
|
||||
else {
|
||||
// extract 128 bit lanes to sign extend each component
|
||||
vVertexElements[currentVertexElement] = PMOVSXWD(BITCAST(VEXTRACT(selectedPermute, C(lane)), v8x16Ty));
|
||||
|
||||
if(currentVertexElement > 3){
|
||||
StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
|
||||
// reset to the next vVertexElement to output
|
||||
currentVertexElement = 0;
|
||||
// denormalize if needed
|
||||
if (conversionType != CONVERT_NONE) {
|
||||
vVertexElements[currentVertexElement] = FMUL(CAST(IntToFpCast, vVertexElements[currentVertexElement], mSimdFP32Ty), conversionFactor);
|
||||
}
|
||||
}
|
||||
currentVertexElement++;
|
||||
}
|
||||
else
|
||||
{
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
|
||||
}
|
||||
|
||||
if (currentVertexElement > 3)
|
||||
{
|
||||
StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
|
||||
// reset to the next vVertexElement to output
|
||||
currentVertexElement = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
// else zero extend
|
||||
else if ((extendType == Instruction::CastOps::ZExt) || (extendType == Instruction::CastOps::UIToFP))
|
||||
|
|
@ -1509,36 +1636,52 @@ void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
|
|||
}
|
||||
|
||||
// shuffle enabled components into lower word of each 32bit lane, 0 extending to 32 bits
|
||||
for(uint32_t i = 0; i < 4; i++){
|
||||
if(!isComponentEnabled(compMask, i)){
|
||||
continue;
|
||||
}
|
||||
|
||||
if(compCtrl[i] == ComponentControl::StoreSrc){
|
||||
// select correct constMask for x/z or y/w pshufb
|
||||
uint32_t selectedMask = ((i == 0) || (i == 2)) ? 0 : 1;
|
||||
// if x or y, use vi128XY permute result, else use vi128ZW
|
||||
uint32_t selectedGather = (i < 2) ? 0 : 1;
|
||||
|
||||
vVertexElements[currentVertexElement] = BITCAST(PSHUFB(BITCAST(vGatherResult[selectedGather], v32x8Ty), vConstMask[selectedMask]), vGatherTy);
|
||||
// after pshufb mask for x channel; z uses the same shuffle from the second gather
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// xx00 xx00 xx00 xx00 xx00 xx00 xx00 xx00
|
||||
|
||||
// denormalize if needed
|
||||
if(conversionType != CONVERT_NONE){
|
||||
vVertexElements[currentVertexElement] = FMUL(CAST(fpCast, vVertexElements[currentVertexElement], mSimdFP32Ty), conversionFactor);
|
||||
for (uint32_t i = 0; i < 4; i++)
|
||||
{
|
||||
if (isComponentEnabled(compMask, i))
|
||||
{
|
||||
// check for InstanceID SGV
|
||||
if (instanceIdEnable && (instanceIdComponentNumber == currentVertexElement))
|
||||
{
|
||||
// Load a SIMD of InstanceIDs
|
||||
vVertexElements[currentVertexElement++] = VBROADCAST(LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_CurInstance }))); // InstanceID
|
||||
}
|
||||
currentVertexElement++;
|
||||
}
|
||||
else{
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
|
||||
}
|
||||
// check for VertexID SGV
|
||||
else if (vertexIdEnable && (vertexIdComponentNumber == currentVertexElement))
|
||||
{
|
||||
// Load a SIMD of VertexIDs
|
||||
vVertexElements[currentVertexElement++] = LOAD(GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID }));
|
||||
}
|
||||
else if (compCtrl[i] == ComponentControl::StoreSrc)
|
||||
{
|
||||
// select correct constMask for x/z or y/w pshufb
|
||||
uint32_t selectedMask = ((i == 0) || (i == 2)) ? 0 : 1;
|
||||
// if x or y, use vi128XY permute result, else use vi128ZW
|
||||
uint32_t selectedGather = (i < 2) ? 0 : 1;
|
||||
|
||||
if(currentVertexElement > 3){
|
||||
StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
|
||||
// reset to the next vVertexElement to output
|
||||
currentVertexElement = 0;
|
||||
vVertexElements[currentVertexElement] = BITCAST(PSHUFB(BITCAST(vGatherResult[selectedGather], v32x8Ty), vConstMask[selectedMask]), vGatherTy);
|
||||
// after pshufb mask for x channel; z uses the same shuffle from the second gather
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// xx00 xx00 xx00 xx00 xx00 xx00 xx00 xx00
|
||||
|
||||
// denormalize if needed
|
||||
if (conversionType != CONVERT_NONE)
|
||||
{
|
||||
vVertexElements[currentVertexElement] = FMUL(CAST(fpCast, vVertexElements[currentVertexElement], mSimdFP32Ty), conversionFactor);
|
||||
}
|
||||
currentVertexElement++;
|
||||
}
|
||||
else
|
||||
{
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
|
||||
}
|
||||
|
||||
if (currentVertexElement > 3)
|
||||
{
|
||||
StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
|
||||
// reset to the next vVertexElement to output
|
||||
currentVertexElement = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -97,13 +97,20 @@ struct FETCH_COMPILE_STATE
|
|||
SWR_FORMAT indexType;
|
||||
uint32_t cutIndex{ 0xffffffff };
|
||||
|
||||
bool InstanceIdEnable;
|
||||
uint32_t InstanceIdElementOffset;
|
||||
uint32_t InstanceIdComponentNumber;
|
||||
bool VertexIdEnable;
|
||||
uint32_t VertexIdElementOffset;
|
||||
uint32_t VertexIdComponentNumber;
|
||||
|
||||
// Options that effect the JIT'd code
|
||||
bool bDisableVGATHER; // if enabled, FetchJit will generate loads/shuffles instead of VGATHERs
|
||||
bool bDisableIndexOOBCheck; // if enabled, FetchJit will exclude index OOB check
|
||||
bool bEnableCutIndex{ false }; // compares indices with the cut index and returns a cut mask
|
||||
|
||||
FETCH_COMPILE_STATE(bool useVGATHER = false, bool indexOOBCheck = false) :
|
||||
bDisableVGATHER(useVGATHER), bDisableIndexOOBCheck(indexOOBCheck){};
|
||||
FETCH_COMPILE_STATE(bool disableVGATHER = false, bool diableIndexOOBCheck = false):
|
||||
bDisableVGATHER(disableVGATHER), bDisableIndexOOBCheck(diableIndexOOBCheck){ };
|
||||
|
||||
bool operator==(const FETCH_COMPILE_STATE &other) const
|
||||
{
|
||||
|
|
@ -114,6 +121,19 @@ struct FETCH_COMPILE_STATE
|
|||
if (bEnableCutIndex != other.bEnableCutIndex) return false;
|
||||
if (cutIndex != other.cutIndex) return false;
|
||||
|
||||
if (InstanceIdEnable != other.InstanceIdEnable) return false;
|
||||
if (InstanceIdEnable)
|
||||
{
|
||||
if (InstanceIdComponentNumber != other.InstanceIdComponentNumber) return false;
|
||||
if (InstanceIdElementOffset != other.InstanceIdElementOffset) return false;
|
||||
}
|
||||
if (VertexIdEnable != other.VertexIdEnable) return false;
|
||||
if (VertexIdEnable)
|
||||
{
|
||||
if (VertexIdComponentNumber != other.VertexIdComponentNumber) return false;
|
||||
if (VertexIdElementOffset != other.VertexIdElementOffset) return false;
|
||||
}
|
||||
|
||||
for(uint32_t i = 0; i < numAttribs; ++i)
|
||||
{
|
||||
if((layout[i].bits != other.layout[i].bits) ||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue