amd: update addrlib

Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27954>
This commit is contained in:
Marek Olšák 2024-03-04 01:16:08 -05:00 committed by Marge Bot
parent 2cd192f879
commit e01266335b
14 changed files with 1041 additions and 206 deletions

View file

@ -23,8 +23,8 @@ extern "C"
{
#endif
#define ADDRLIB_VERSION_MAJOR 8
#define ADDRLIB_VERSION_MINOR 9
#define ADDRLIB_VERSION_MAJOR 9
#define ADDRLIB_VERSION_MINOR 0
#define ADDRLIB_VERSION ((ADDRLIB_VERSION_MAJOR << 16) | ADDRLIB_VERSION_MINOR)
/// Virtually all interface functions need ADDR_HANDLE as first parameter
@ -33,6 +33,13 @@ typedef VOID* ADDR_HANDLE;
/// Client handle used in callbacks
typedef VOID* ADDR_CLIENT_HANDLE;
typedef struct _ADDR_EXTENT3D
{
UINT_32 width;
UINT_32 height;
UINT_32 depth; // also slices for 2D images
} ADDR_EXTENT3D;
/**
* /////////////////////////////////////////////////////////////////////////////////////////////////
* // Callback functions
@ -44,6 +51,8 @@ typedef VOID* ADDR_CLIENT_HANDLE;
* typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_DEBUGPRINT)(
* const ADDR_DEBUGPRINT_INPUT* pInput);
*
**/
/**
* /////////////////////////////////////////////////////////////////////////////////////////////////
* // Create/Destroy/Config functions
* /////////////////////////////////////////////////////////////////////////////////////////////////
@ -78,11 +87,15 @@ typedef VOID* ADDR_CLIENT_HANDLE;
* AddrComputeFmaskAddrFromCoord()
* AddrComputeFmaskCoordFromAddr()
*
**/
/**
* /////////////////////////////////////////////////////////////////////////////////////////////////
* // Element/Utility functions
* /////////////////////////////////////////////////////////////////////////////////////////////////
* ElemFlt32ToDepthPixel()
* ElemFlt32ToColorPixel()
**/
/**
* AddrExtractBankPipeSwizzle()
* AddrCombineBankPipeSwizzle()
* AddrComputeSliceSwizzle()
@ -420,7 +433,6 @@ ADDR_E_RETURNCODE ADDR_API AddrDestroy(
ADDR_HANDLE hLib);
////////////////////////////////////////////////////////////////////////////////////////////////////
// Surface functions
////////////////////////////////////////////////////////////////////////////////////////////////////
@ -1498,7 +1510,6 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskCoordFromAddr(
ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut);
////////////////////////////////////////////////////////////////////////////////////////////////////
// Element/utility functions
////////////////////////////////////////////////////////////////////////////////////////////////////
@ -1821,7 +1832,6 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeBaseSwizzle(
ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut);
/**
****************************************************************************************************
* ELEM_GETEXPORTNORM_INPUT
@ -2329,7 +2339,6 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo(
ADDR_COMPUTE_DCCINFO_OUTPUT* pOut);
/**
****************************************************************************************************
* ADDR_GET_MAX_ALIGNMENTS_OUTPUT
@ -2395,7 +2404,7 @@ ADDR_E_RETURNCODE ADDR_API AddrGetMaxMetaAlignments(
////////////////////////////////////////////////////////////////////////////////////////////////////
// Surface functions for Gfx9
// Surface functions for Addr2
////////////////////////////////////////////////////////////////////////////////////////////////////
/**
@ -2449,7 +2458,7 @@ typedef struct _ADDR2_COMPUTE_SURFACE_INFO_INPUT
UINT_32 size; ///< Size of this structure in bytes
ADDR2_SURFACE_FLAGS flags; ///< Surface flags
AddrSwizzleMode swizzleMode; ///< Swizzle Mode for Gfx9
AddrSwizzleMode swizzleMode; ///< Swizzle Mode for Addr2
AddrResourceType resourceType; ///< Surface type
AddrFormat format; ///< Surface format
UINT_32 bpp; ///< bits per pixel
@ -2578,7 +2587,7 @@ typedef struct _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
UINT_32 sample; ///< Sample index, use fragment index for EQAA
UINT_32 mipId; ///< the mip ID in mip chain
AddrSwizzleMode swizzleMode; ///< Swizzle mode for Gfx9
AddrSwizzleMode swizzleMode; ///< Swizzle mode for Addr2
ADDR2_SURFACE_FLAGS flags; ///< Surface flags
AddrResourceType resourceType; ///< Surface type
UINT_32 bpp; ///< Bits per pixel
@ -2644,7 +2653,7 @@ typedef struct _ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT
UINT_32 bitPosition; ///< Bit position in addr. 0-7. for surface bpp < 8,
/// e.g. FMT_1;
AddrSwizzleMode swizzleMode; ///< Swizzle mode for Gfx9
AddrSwizzleMode swizzleMode; ///< Swizzle mode for Addr2
ADDR2_SURFACE_FLAGS flags; ///< Surface flags
AddrResourceType resourceType; ///< Surface type
UINT_32 bpp; ///< Bits per pixel
@ -2696,7 +2705,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceCoordFromAddr(
////////////////////////////////////////////////////////////////////////////////////////////////////
// HTile functions for Gfx9
// HTile functions for Addr2
////////////////////////////////////////////////////////////////////////////////////////////////////
/**
@ -2944,7 +2953,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileCoordFromAddr(
////////////////////////////////////////////////////////////////////////////////////////////////////
// C-mask functions for Gfx9
// C-mask functions for Addr2
////////////////////////////////////////////////////////////////////////////////////////////////////
/**
@ -3169,7 +3178,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskCoordFromAddr(
////////////////////////////////////////////////////////////////////////////////////////////////////
// F-mask functions for Gfx9
// F-mask functions for Addr2
////////////////////////////////////////////////////////////////////////////////////////////////////
/**
@ -3374,7 +3383,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskCoordFromAddr(
////////////////////////////////////////////////////////////////////////////////////////////////////
// DCC key functions for Gfx9
// DCC key functions for Addr2
////////////////////////////////////////////////////////////////////////////////////////////////////
/**
@ -3552,7 +3561,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeDccAddrFromCoord(
ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut);
////////////////////////////////////////////////////////////////////////////////////////////////////
// Misc functions for Gfx9
// Misc functions for Addr2
////////////////////////////////////////////////////////////////////////////////////////////////////
/**
@ -3709,7 +3718,7 @@ typedef struct _ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT
{
UINT_32 size; ///< Size of this structure in bytes
ADDR2_SURFACE_FLAGS flags; ///< Surface flags
AddrSwizzleMode swizzleMode; ///< Swizzle Mode for Gfx9
AddrSwizzleMode swizzleMode; ///< Swizzle Mode for Addr2
AddrResourceType resourceType; ///< Surface type
AddrFormat format; ///< Surface format
UINT_32 width; ///< Width of mip0 in texels (not in compressed block)

View file

@ -79,7 +79,6 @@ ADDR_E_RETURNCODE ADDR_API AddrDestroy(
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// Surface functions
////////////////////////////////////////////////////////////////////////////////////////////////////
@ -528,7 +527,6 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo(
}
///////////////////////////////////////////////////////////////////////////////
// Below functions are element related or helper functions
///////////////////////////////////////////////////////////////////////////////

View file

@ -21,15 +21,14 @@
#define FAMILY_CI 0x78 //# 120 / Sea Islands: Bonaire, Hawaii
#define FAMILY_KV 0x7D //# 125 / Kaveri APUs: Spectre, Spooky, Kalindi, Godavari
#define FAMILY_VI 0x82 //# 130 / Volcanic Islands: Iceland, Tonga, Fiji
#define FAMILY_POLARIS 0x82 //# 130 / Polaris: 10, 11, 12
#define FAMILY_CZ 0x87 //# 135 / Carrizo APUs: Carrizo, Stoney
#define FAMILY_AI 0x8D //# 141 / Vega: 10, 20
#define FAMILY_RV 0x8E //# 142 / Raven
#define FAMILY_NV 0x8F //# 143 / Navi: 10
#define FAMILY_VGH 0x90 //# 144 / Van Gogh
#define FAMILY_NV3 0x91 //# 145 / Navi: 3x
#define FAMILY_GFX1103 0x94
#define FAMILY_GFX1150 0x96
#define FAMILY_GFX1103 0x94
#define FAMILY_RMB 0x92 //# 146 / Rembrandt
#define FAMILY_RPL 0x95 //# 149 / Raphael
#define FAMILY_MDN 0x97 //# 151 / Mendocino
@ -46,8 +45,8 @@
#define FAMILY_IS_AI(f) FAMILY_IS(f, AI)
#define FAMILY_IS_RV(f) FAMILY_IS(f, RV)
#define FAMILY_IS_NV(f) FAMILY_IS(f, NV)
#define FAMILY_IS_NV3(f) FAMILY_IS(f, NV3)
#define FAMILY_IS_RMB(f) FAMILY_IS(f, RMB)
#define FAMILY_IS_NV3(f) FAMILY_IS(f, NV3)
#define AMDGPU_UNKNOWN 0xFF
@ -99,16 +98,13 @@
#define AMDGPU_NAVI31_RANGE 0x01, 0x10 //# 01 <= x < 16
#define AMDGPU_NAVI32_RANGE 0x20, 0xFF //# 32 <= x < 255
#define AMDGPU_NAVI33_RANGE 0x10, 0x20 //# 16 <= x < 32
#define AMDGPU_GFX1103_R1_RANGE 0x01, 0x80 //# 1 <= x < 128
#define AMDGPU_GFX1103_R2_RANGE 0x80, 0xFF //# 128 <= x < max
#define AMDGPU_GFX1150_RANGE 0x01, 0x40 //# 1 <= x < 64
#define AMDGPU_GFX1151_RANGE 0xC0, 0xFF //# 192 <= x < max
#define AMDGPU_GFX1151_RANGE 0xC0, 0xFF //# 192 <= x < 255
#define AMDGPU_GFX1103_R1_RANGE 0x01, 0x80 //# 1 <= x < 128
#define AMDGPU_GFX1103_R2_RANGE 0x80, 0xC0 //# 128 <= x < 192
#define AMDGPU_REMBRANDT_RANGE 0x01, 0xFF //# 01 <= x < 255
#define AMDGPU_RAPHAEL_RANGE 0x01, 0xFF //# 1 <= x < max
#define AMDGPU_MENDOCINO_RANGE 0x01, 0xFF //# 1 <= x < max
#define AMDGPU_EXPAND_FIX(x) x
@ -174,15 +170,13 @@
#define ASICREV_IS_NAVI31_P(r) ASICREV_IS(r, NAVI31)
#define ASICREV_IS_NAVI32_P(r) ASICREV_IS(r, NAVI32)
#define ASICREV_IS_NAVI33_P(r) ASICREV_IS(r, NAVI33)
#define ASICREV_IS_GFX1103_R1(r) ASICREV_IS(r, GFX1103_R1)
#define ASICREV_IS_GFX1103_R2(r) ASICREV_IS(r, GFX1103_R2)
#define ASICREV_IS_GFX1150(r) ASICREV_IS(r, GFX1150)
#define ASICREV_IS_GFX1151(r) ASICREV_IS(r, GFX1151)
#define ASICREV_IS_GFX1103_R1(r) ASICREV_IS(r, GFX1103_R1)
#define ASICREV_IS_GFX1103_R2(r) ASICREV_IS(r, GFX1103_R2)
#define ASICREV_IS_REMBRANDT(r) ASICREV_IS(r, REMBRANDT)
#define ASICREV_IS_RAPHAEL(r) ASICREV_IS(r, RAPHAEL)
#define ASICREV_IS_MENDOCINO(r) ASICREV_IS(r, MENDOCINO)
#endif // _AMDGPU_ASIC_ADDR_H

View file

@ -76,9 +76,52 @@
#endif
#if defined(LITTLEENDIAN_CPU)
typedef struct _GB_ADDR_CONFIG_N {
unsigned int num_pipes : 3;
unsigned int pipe_interleave_size : 3;
unsigned int max_compressed_frags : 2;
unsigned int bank_interleave_size : 3;
unsigned int : 1;
unsigned int num_banks : 3;
unsigned int : 1;
unsigned int shader_engine_tile_size : 3;
unsigned int num_shader_engines : 2;
unsigned int num_gpus : 3;
unsigned int multi_gpu_tile_size : 2;
unsigned int num_rb_per_se : 2;
unsigned int row_size : 2;
unsigned int num_lower_pipes : 1;
unsigned int se_enable : 1;
} GB_ADDR_CONFIG_N;
#elif defined(BIGENDIAN_CPU)
typedef struct _GB_ADDR_CONFIG_N {
unsigned int se_enable : 1;
unsigned int num_lower_pipes : 1;
unsigned int row_size : 2;
unsigned int num_rb_per_se : 2;
unsigned int multi_gpu_tile_size : 2;
unsigned int num_gpus : 3;
unsigned int num_shader_engines : 2;
unsigned int shader_engine_tile_size : 3;
unsigned int : 1;
unsigned int num_banks : 3;
unsigned int : 1;
unsigned int bank_interleave_size : 3;
unsigned int max_compressed_frags : 2;
unsigned int pipe_interleave_size : 3;
unsigned int num_pipes : 3;
} GB_ADDR_CONFIG_N;
#endif
typedef union {
unsigned int val : 32;
GB_ADDR_CONFIG_T f;
GB_ADDR_CONFIG_N n;
} GB_ADDR_CONFIG;
#if defined(LITTLEENDIAN_CPU)

View file

@ -659,24 +659,6 @@ static inline VOID SafeAssign(
}
}
/**
****************************************************************************************************
* SafeAssign
*
* @brief
* NULL pointer safe assignment for AddrTileMode
****************************************************************************************************
*/
static inline VOID SafeAssign(
AddrTileMode* pLVal, ///< [in] Pointer to left val
AddrTileMode rVal) ///< [in] Right value
{
if (pLVal)
{
*pLVal = rVal;
}
}
/**
****************************************************************************************************
* RoundHalf

View file

@ -190,10 +190,10 @@ ADDR_E_RETURNCODE Lib::Create(
case FAMILY_SI:
pLib = SiHwlInit(&client);
break;
case FAMILY_VI:
case FAMILY_CZ: // VI based fusion
case FAMILY_CI:
case FAMILY_KV: // CI based fusion
case FAMILY_VI:
case FAMILY_CZ: // VI based fusion
pLib = CiHwlInit(&client);
break;
default:
@ -216,8 +216,8 @@ ADDR_E_RETURNCODE Lib::Create(
pLib = Gfx10HwlInit(&client);
break;
case FAMILY_NV3:
case FAMILY_GFX1103:
case FAMILY_GFX1150:
case FAMILY_GFX1103:
pLib = Gfx11HwlInit(&client);
break;
default:
@ -231,6 +231,10 @@ ADDR_E_RETURNCODE Lib::Create(
}
}
if(pLib == NULL)
{
returnCode = ADDR_OUTOFMEMORY;
}
if (pLib != NULL)
{
BOOL_32 initValid;
@ -269,6 +273,7 @@ ADDR_E_RETURNCODE Lib::Create(
{
delete pLib;
pLib = NULL;
returnCode = ADDR_OUTOFMEMORY;
ADDR_ASSERT_ALWAYS();
}
else
@ -288,12 +293,6 @@ ADDR_E_RETURNCODE Lib::Create(
pLib->SetMaxAlignments();
}
else if ((pLib == NULL) &&
(returnCode == ADDR_OK))
{
// Unknown failures, we return the general error code
returnCode = ADDR_ERROR;
}
return returnCode;
}

View file

@ -115,13 +115,15 @@ Lib* Lib::GetLib(
ADDR_HANDLE hLib) ///< [in] handle of ADDR_HANDLE
{
Addr::Lib* pAddrLib = Addr::Lib::GetLib(hLib);
if ((pAddrLib != NULL) &&
(pAddrLib->GetChipFamily() <= ADDR_CHIP_FAMILY_VI))
{
// only valid and GFX9+ ASIC can use AddrLib2 function.
// only GFX9+ ASIC can use AddrLib2 function.
ADDR_ASSERT_ALWAYS();
hLib = NULL;
}
return static_cast<Lib*>(hLib);
}
@ -1170,6 +1172,7 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoordLinear(
ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
localIn.bpp = pIn->bpp;
localIn.swizzleMode = pIn->swizzleMode;
localIn.flags = pIn->flags;
localIn.width = Max(pIn->unalignedWidth, 1u);
localIn.height = Max(pIn->unalignedHeight, 1u);
@ -1259,6 +1262,7 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddrLinear(
ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
localIn.bpp = pIn->bpp;
localIn.swizzleMode = pIn->swizzleMode;
localIn.flags = pIn->flags;
localIn.width = Max(pIn->unalignedWidth, 1u);
localIn.height = Max(pIn->unalignedHeight, 1u);

View file

@ -1058,7 +1058,6 @@ ChipFamily Gfx10Lib::HwlConvertChipFamily(
ADDR_ASSERT(!"Unknown chip revision");
}
break;
case FAMILY_RMB:
if (ASICREV_IS_REMBRANDT(chipRevision))
{
@ -1467,7 +1466,6 @@ VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
if (IsXor(swMode) == FALSE)
{
// Use simplified logic when we only have one bit-component
for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
{
ADDR_ASSERT(IsPow2(pSwizzle[i].value));
@ -1497,87 +1495,475 @@ VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
pEquation->addr[i].valid = 1;
pEquation->addr[i].index = Log2(pSwizzle[i].z);
}
pEquation->xor1[i].value = 0;
pEquation->xor2[i].value = 0;
}
}
else
else if (IsThin(rsrcType, swMode))
{
Dim3d dim;
ComputeBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, rsrcType, swMode);
ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);
const UINT_32 blkXLog2 = Log2(dim.w);
const UINT_32 blkYLog2 = Log2(dim.h);
const UINT_32 blkZLog2 = Log2(dim.d);
const UINT_32 blkXMask = dim.w - 1;
const UINT_32 blkYMask = dim.h - 1;
const UINT_32 blkZMask = dim.d - 1;
ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
memcpy(&swizzle, pSwizzle, sizeof(swizzle));
UINT_32 xMask = 0;
UINT_32 yMask = 0;
UINT_32 zMask = 0;
UINT_32 bMask = (1 << elemLog2) - 1;
for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
{
for (UINT_32 bitComp = 0; bitComp < ADDR_MAX_EQUATION_COMP; bitComp++)
if (IsPow2(pSwizzle[i].value))
{
if (swizzle[i].value == 0)
if (pSwizzle[i].x != 0)
{
ADDR_ASSERT(bitComp != 0); // Bits above element size must have at least one addr-bit
ADDR_ASSERT(bitComp <= pPatInfo->maxItemCount);
break;
}
ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
xMask |= pSwizzle[i].x;
if (swizzle[i].x != 0)
{
const UINT_32 xLog2 = BitScanForward(swizzle[i].x);
swizzle[i].x = UnsetLeastBit(swizzle[i].x);
xMask |= (1 << xLog2);
const UINT_32 xLog2 = Log2(pSwizzle[i].x);
pEquation->comps[bitComp][i].channel = 0;
pEquation->comps[bitComp][i].valid = 1;
pEquation->comps[bitComp][i].index = xLog2 + elemLog2;
}
else if (swizzle[i].y != 0)
{
const UINT_32 yLog2 = BitScanForward(swizzle[i].y);
swizzle[i].y = UnsetLeastBit(swizzle[i].y);
yMask |= (1 << yLog2);
ADDR_ASSERT(xLog2 < blkXLog2);
pEquation->comps[bitComp][i].channel = 1;
pEquation->comps[bitComp][i].valid = 1;
pEquation->comps[bitComp][i].index = yLog2;
}
else if (swizzle[i].z != 0)
{
const UINT_32 zLog2 = BitScanForward(swizzle[i].z);
swizzle[i].z = UnsetLeastBit(swizzle[i].z);
zMask |= (1 << zLog2);
pEquation->comps[bitComp][i].channel = 2;
pEquation->comps[bitComp][i].valid = 1;
pEquation->comps[bitComp][i].index = zLog2;
pEquation->addr[i].channel = 0;
pEquation->addr[i].valid = 1;
pEquation->addr[i].index = xLog2 + elemLog2;
}
else
{
// This function doesn't handle MSAA (must update block dims, here, and consumers)
ADDR_ASSERT_ALWAYS();
ADDR_ASSERT(pSwizzle[i].y != 0);
ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
yMask |= pSwizzle[i].y;
pEquation->addr[i].channel = 1;
pEquation->addr[i].valid = 1;
pEquation->addr[i].index = Log2(pSwizzle[i].y);
ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
}
swizzle[i].value = 0;
bMask |= 1 << i;
}
else
{
if (pSwizzle[i].z != 0)
{
ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
pEquation->xor2[i].channel = 2;
pEquation->xor2[i].valid = 1;
pEquation->xor2[i].index = Log2(pSwizzle[i].z);
}
swizzle[i].x = pSwizzle[i].x;
swizzle[i].y = pSwizzle[i].y;
swizzle[i].z = swizzle[i].s = 0;
ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
const UINT_32 xHi = swizzle[i].x & (~blkXMask);
if (xHi != 0)
{
ADDR_ASSERT(IsPow2(xHi));
ADDR_ASSERT(pEquation->xor1[i].value == 0);
pEquation->xor1[i].channel = 0;
pEquation->xor1[i].valid = 1;
pEquation->xor1[i].index = Log2(xHi) + elemLog2;
swizzle[i].x &= blkXMask;
}
const UINT_32 yHi = swizzle[i].y & (~blkYMask);
if (yHi != 0)
{
ADDR_ASSERT(IsPow2(yHi));
if (xHi == 0)
{
ADDR_ASSERT(pEquation->xor1[i].value == 0);
pEquation->xor1[i].channel = 1;
pEquation->xor1[i].valid = 1;
pEquation->xor1[i].index = Log2(yHi);
}
else
{
ADDR_ASSERT(pEquation->xor2[i].value == 0);
pEquation->xor2[i].channel = 1;
pEquation->xor2[i].valid = 1;
pEquation->xor2[i].index = Log2(yHi);
}
swizzle[i].y &= blkYMask;
}
if (swizzle[i].value == 0)
{
bMask |= 1 << i;
}
}
ADDR_ASSERT(swizzle[i].value == 0); // We missed an xor? Are there too many?
}
// We missed an address bit for coords inside the block?
// That means two coords will land on the same addr, which is bad.
ADDR_ASSERT(((xMask & blkXMask) == blkXMask) &&
((yMask & blkYMask) == blkYMask) &&
((zMask & blkZMask) == blkZMask));
// We're sourcing from outside our block? That won't fly for PRTs, which need to be movable.
// Non-xor modes can also be used for 2D PRTs but they're handled in the simplified logic above.
ADDR_ASSERT((IsPrt(swMode) == false) ||
((xMask == blkXMask) &&
(yMask == blkYMask) &&
(zMask == blkZMask)));
const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
while (bMask != blockMask)
{
for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
{
if ((bMask & (1 << i)) == 0)
{
if (IsPow2(swizzle[i].value))
{
if (swizzle[i].x != 0)
{
ADDR_ASSERT((xMask & swizzle[i].x) == 0);
xMask |= swizzle[i].x;
const UINT_32 xLog2 = Log2(swizzle[i].x);
ADDR_ASSERT(xLog2 < blkXLog2);
pEquation->addr[i].channel = 0;
pEquation->addr[i].valid = 1;
pEquation->addr[i].index = xLog2 + elemLog2;
}
else
{
ADDR_ASSERT(swizzle[i].y != 0);
ADDR_ASSERT((yMask & swizzle[i].y) == 0);
yMask |= swizzle[i].y;
pEquation->addr[i].channel = 1;
pEquation->addr[i].valid = 1;
pEquation->addr[i].index = Log2(swizzle[i].y);
ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
}
swizzle[i].value = 0;
bMask |= 1 << i;
}
else
{
const UINT_32 x = swizzle[i].x & xMask;
const UINT_32 y = swizzle[i].y & yMask;
if (x != 0)
{
ADDR_ASSERT(IsPow2(x));
if (pEquation->xor1[i].value == 0)
{
pEquation->xor1[i].channel = 0;
pEquation->xor1[i].valid = 1;
pEquation->xor1[i].index = Log2(x) + elemLog2;
}
else
{
ADDR_ASSERT(pEquation->xor2[i].value == 0);
pEquation->xor2[i].channel = 0;
pEquation->xor2[i].valid = 1;
pEquation->xor2[i].index = Log2(x) + elemLog2;
}
}
if (y != 0)
{
ADDR_ASSERT(IsPow2(y));
if (pEquation->xor1[i].value == 0)
{
pEquation->xor1[i].channel = 1;
pEquation->xor1[i].valid = 1;
pEquation->xor1[i].index = Log2(y);
}
else
{
ADDR_ASSERT(pEquation->xor2[i].value == 0);
pEquation->xor2[i].channel = 1;
pEquation->xor2[i].valid = 1;
pEquation->xor2[i].index = Log2(y);
}
}
swizzle[i].x &= ~x;
swizzle[i].y &= ~y;
}
}
}
}
ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
}
else
{
const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w;
const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h;
const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d;
const UINT_32 blkXMask = (1 << blkXLog2) - 1;
const UINT_32 blkYMask = (1 << blkYLog2) - 1;
const UINT_32 blkZMask = (1 << blkZLog2) - 1;
ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
UINT_32 xMask = 0;
UINT_32 yMask = 0;
UINT_32 zMask = 0;
UINT_32 bMask = (1 << elemLog2) - 1;
for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
{
if (IsPow2(pSwizzle[i].value))
{
if (pSwizzle[i].x != 0)
{
ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
xMask |= pSwizzle[i].x;
const UINT_32 xLog2 = Log2(pSwizzle[i].x);
ADDR_ASSERT(xLog2 < blkXLog2);
pEquation->addr[i].channel = 0;
pEquation->addr[i].valid = 1;
pEquation->addr[i].index = xLog2 + elemLog2;
}
else if (pSwizzle[i].y != 0)
{
ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
yMask |= pSwizzle[i].y;
pEquation->addr[i].channel = 1;
pEquation->addr[i].valid = 1;
pEquation->addr[i].index = Log2(pSwizzle[i].y);
ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
}
else
{
ADDR_ASSERT(pSwizzle[i].z != 0);
ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
zMask |= pSwizzle[i].z;
pEquation->addr[i].channel = 2;
pEquation->addr[i].valid = 1;
pEquation->addr[i].index = Log2(pSwizzle[i].z);
ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
}
swizzle[i].value = 0;
bMask |= 1 << i;
}
else
{
swizzle[i].x = pSwizzle[i].x;
swizzle[i].y = pSwizzle[i].y;
swizzle[i].z = pSwizzle[i].z;
swizzle[i].s = 0;
ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
const UINT_32 xHi = swizzle[i].x & (~blkXMask);
const UINT_32 yHi = swizzle[i].y & (~blkYMask);
const UINT_32 zHi = swizzle[i].z & (~blkZMask);
ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
if (xHi != 0)
{
ADDR_ASSERT(IsPow2(xHi));
ADDR_ASSERT(pEquation->xor1[i].value == 0);
pEquation->xor1[i].channel = 0;
pEquation->xor1[i].valid = 1;
pEquation->xor1[i].index = Log2(xHi) + elemLog2;
swizzle[i].x &= blkXMask;
}
if (yHi != 0)
{
ADDR_ASSERT(IsPow2(yHi));
if (pEquation->xor1[i].value == 0)
{
pEquation->xor1[i].channel = 1;
pEquation->xor1[i].valid = 1;
pEquation->xor1[i].index = Log2(yHi);
}
else
{
ADDR_ASSERT(pEquation->xor2[i].value == 0);
pEquation->xor2[i].channel = 1;
pEquation->xor2[i].valid = 1;
pEquation->xor2[i].index = Log2(yHi);
}
swizzle[i].y &= blkYMask;
}
if (zHi != 0)
{
ADDR_ASSERT(IsPow2(zHi));
if (pEquation->xor1[i].value == 0)
{
pEquation->xor1[i].channel = 2;
pEquation->xor1[i].valid = 1;
pEquation->xor1[i].index = Log2(zHi);
}
else
{
ADDR_ASSERT(pEquation->xor2[i].value == 0);
pEquation->xor2[i].channel = 2;
pEquation->xor2[i].valid = 1;
pEquation->xor2[i].index = Log2(zHi);
}
swizzle[i].z &= blkZMask;
}
if (swizzle[i].value == 0)
{
bMask |= 1 << i;
}
}
}
const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
while (bMask != blockMask)
{
for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
{
if ((bMask & (1 << i)) == 0)
{
if (IsPow2(swizzle[i].value))
{
if (swizzle[i].x != 0)
{
ADDR_ASSERT((xMask & swizzle[i].x) == 0);
xMask |= swizzle[i].x;
const UINT_32 xLog2 = Log2(swizzle[i].x);
ADDR_ASSERT(xLog2 < blkXLog2);
pEquation->addr[i].channel = 0;
pEquation->addr[i].valid = 1;
pEquation->addr[i].index = xLog2 + elemLog2;
}
else if (swizzle[i].y != 0)
{
ADDR_ASSERT((yMask & swizzle[i].y) == 0);
yMask |= swizzle[i].y;
pEquation->addr[i].channel = 1;
pEquation->addr[i].valid = 1;
pEquation->addr[i].index = Log2(swizzle[i].y);
ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
}
else
{
ADDR_ASSERT(swizzle[i].z != 0);
ADDR_ASSERT((zMask & swizzle[i].z) == 0);
zMask |= swizzle[i].z;
pEquation->addr[i].channel = 2;
pEquation->addr[i].valid = 1;
pEquation->addr[i].index = Log2(swizzle[i].z);
ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
}
swizzle[i].value = 0;
bMask |= 1 << i;
}
else
{
const UINT_32 x = swizzle[i].x & xMask;
const UINT_32 y = swizzle[i].y & yMask;
const UINT_32 z = swizzle[i].z & zMask;
if (x != 0)
{
ADDR_ASSERT(IsPow2(x));
if (pEquation->xor1[i].value == 0)
{
pEquation->xor1[i].channel = 0;
pEquation->xor1[i].valid = 1;
pEquation->xor1[i].index = Log2(x) + elemLog2;
}
else
{
ADDR_ASSERT(pEquation->xor2[i].value == 0);
pEquation->xor2[i].channel = 0;
pEquation->xor2[i].valid = 1;
pEquation->xor2[i].index = Log2(x) + elemLog2;
}
}
if (y != 0)
{
ADDR_ASSERT(IsPow2(y));
if (pEquation->xor1[i].value == 0)
{
pEquation->xor1[i].channel = 1;
pEquation->xor1[i].valid = 1;
pEquation->xor1[i].index = Log2(y);
}
else
{
ADDR_ASSERT(pEquation->xor2[i].value == 0);
pEquation->xor2[i].channel = 1;
pEquation->xor2[i].valid = 1;
pEquation->xor2[i].index = Log2(y);
}
}
if (z != 0)
{
ADDR_ASSERT(IsPow2(z));
if (pEquation->xor1[i].value == 0)
{
pEquation->xor1[i].channel = 2;
pEquation->xor1[i].valid = 1;
pEquation->xor1[i].index = Log2(z);
}
else
{
ADDR_ASSERT(pEquation->xor2[i].value == 0);
pEquation->xor2[i].channel = 2;
pEquation->xor2[i].valid = 1;
pEquation->xor2[i].index = Log2(z);
}
}
swizzle[i].x &= ~x;
swizzle[i].y &= ~y;
swizzle[i].z &= ~z;
}
}
}
}
ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
}
}
@ -1620,17 +2006,28 @@ VOID Gfx10Lib::InitEquationTable()
if (pPatInfo != NULL)
{
ADDR_ASSERT(IsValidSwMode(swMode));
ADDR_EQUATION equation = {};
if (pPatInfo->maxItemCount <= 3) // Get a valid equationIndex
{
ADDR_EQUATION equation = {};
// Passing in pPatInfo to get the addr equation
ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
// Passing in pPatInfo to get the addr equation
ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
equationIndex = m_numEquations;
ADDR_ASSERT(equationIndex < EquationTableSize);
// Updates m_equationTable[m_numEquations] to be the addr equation for this PatInfo
m_equationTable[equationIndex] = equation;
// Increment m_numEquations
m_numEquations++;
equationIndex = m_numEquations;
ADDR_ASSERT(equationIndex < EquationTableSize);
// Updates m_equationTable[m_numEquations] to be the addr equation for this PatInfo
m_equationTable[equationIndex] = equation;
// Increment m_numEquations
m_numEquations++;
}
else // There is no equationIndex
{
// We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case
ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
ADDR_ASSERT(rsrcTypeIdx == 1);
ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
ADDR_ASSERT(m_settings.supportRbPlus == 1);
}
}
// equationIndex, which is used to look up equations in m_equationTable, will be cached for every
// iteration in this nested for-loop
@ -2254,7 +2651,7 @@ BOOL_32 Gfx10Lib::ValidateSwModeParams(
{
if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) ||
(prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) ||
(thin3d && ((swizzleMask & Gfx10Rsrc3dThinSwModeMask) == 0)))
(thin3d && ((swizzleMask & Gfx10Rsrc3dViewAs2dSwModeMask) == 0)))
{
ADDR_ASSERT_ALWAYS();
valid = FALSE;
@ -2571,7 +2968,8 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
if (pIn->flags.view3dAs2dArray)
{
allowedSwModeSet.value &= Gfx10Rsrc3dThinSwModeMask;
// SW_LINEAR can be used for 3D thin images, including BCn image format.
allowedSwModeSet.value &= Gfx10Rsrc3dViewAs2dSwModeMask;
}
break;

View file

@ -146,6 +146,8 @@ const UINT_32 Gfx10Rsrc3dThin64KBSwModeMask = (1u << ADDR_SW_64KB_Z_X) |
const UINT_32 Gfx10Rsrc3dThinSwModeMask = Gfx10Rsrc3dThin64KBSwModeMask |
Gfx10BlkVarSwModeMask;
const UINT_32 Gfx10Rsrc3dViewAs2dSwModeMask = Gfx10Rsrc3dThinSwModeMask | Gfx10LinearSwModeMask;
const UINT_32 Gfx10Rsrc3dThickSwModeMask = Gfx10Rsrc3dSwModeMask & ~(Gfx10Rsrc3dThinSwModeMask | Gfx10LinearSwModeMask);
const UINT_32 Gfx10Rsrc3dThick4KBSwModeMask = Gfx10Rsrc3dThickSwModeMask & Gfx10Blk4KBSwModeMask;

View file

@ -740,7 +740,10 @@ ChipFamily Gfx11Lib::HwlConvertChipFamily(
}
break;
case FAMILY_GFX1150:
if (ASICREV_IS_GFX1150(chipRevision) || ASICREV_IS_GFX1151(chipRevision))
if (false
|| ASICREV_IS_GFX1150(chipRevision)
|| ASICREV_IS_GFX1151(chipRevision)
)
{
m_settings.isGfx1150 = 1;
}
@ -1021,15 +1024,6 @@ UINT_32 Gfx11Lib::GetMetaBlkSize(
// For htile surfaces, pad meta block size to 2K * num_pipes
metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
}
const INT_32 compFragLog2 = numSamplesLog2;
if (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
{
const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
metablkSizeLog2 = Max(metablkSizeLog2, tmp);
}
}
const INT_32 metablkBitsLog2 =
@ -1110,7 +1104,6 @@ VOID Gfx11Lib::ConvertSwizzlePatternToEquation(
if (IsXor(swMode) == FALSE)
{
// Use simplified logic when we only have one bit-component
for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
{
ADDR_ASSERT(IsPow2(pSwizzle[i].value));
@ -1140,87 +1133,479 @@ VOID Gfx11Lib::ConvertSwizzlePatternToEquation(
pEquation->addr[i].valid = 1;
pEquation->addr[i].index = Log2(pSwizzle[i].z);
}
pEquation->xor1[i].value = 0;
pEquation->xor2[i].value = 0;
}
}
else
else if (IsThin(rsrcType, swMode))
{
Dim3d dim;
ComputeBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, rsrcType, swMode);
ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);
const UINT_32 blkXLog2 = Log2(dim.w);
const UINT_32 blkYLog2 = Log2(dim.h);
const UINT_32 blkZLog2 = Log2(dim.d);
const UINT_32 blkXMask = dim.w - 1;
const UINT_32 blkYMask = dim.h - 1;
const UINT_32 blkZMask = dim.d - 1;
ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
memcpy(&swizzle, pSwizzle, sizeof(swizzle));
ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT];
UINT_32 xMask = 0;
UINT_32 yMask = 0;
UINT_32 zMask = 0;
UINT_32 bMask = (1 << elemLog2) - 1;
for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
{
for (UINT_32 bitComp = 0; bitComp < ADDR_MAX_EQUATION_COMP; bitComp++)
if (IsPow2(pSwizzle[i].value))
{
if (swizzle[i].value == 0)
if (pSwizzle[i].x != 0)
{
ADDR_ASSERT(bitComp != 0); // Bits above element size must have at least one addr-bit
ADDR_ASSERT(bitComp <= pPatInfo->maxItemCount);
break;
}
ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
xMask |= pSwizzle[i].x;
if (swizzle[i].x != 0)
{
const UINT_32 xLog2 = BitScanForward(swizzle[i].x);
swizzle[i].x = UnsetLeastBit(swizzle[i].x);
xMask |= (1 << xLog2);
const UINT_32 xLog2 = Log2(pSwizzle[i].x);
pEquation->comps[bitComp][i].channel = 0;
pEquation->comps[bitComp][i].valid = 1;
pEquation->comps[bitComp][i].index = xLog2 + elemLog2;
}
else if (swizzle[i].y != 0)
{
const UINT_32 yLog2 = BitScanForward(swizzle[i].y);
swizzle[i].y = UnsetLeastBit(swizzle[i].y);
yMask |= (1 << yLog2);
ADDR_ASSERT(xLog2 < blkXLog2);
pEquation->comps[bitComp][i].channel = 1;
pEquation->comps[bitComp][i].valid = 1;
pEquation->comps[bitComp][i].index = yLog2;
}
else if (swizzle[i].z != 0)
{
const UINT_32 zLog2 = BitScanForward(swizzle[i].z);
swizzle[i].z = UnsetLeastBit(swizzle[i].z);
zMask |= (1 << zLog2);
pEquation->comps[bitComp][i].channel = 2;
pEquation->comps[bitComp][i].valid = 1;
pEquation->comps[bitComp][i].index = zLog2;
pEquation->addr[i].channel = 0;
pEquation->addr[i].valid = 1;
pEquation->addr[i].index = xLog2 + elemLog2;
}
else
{
// This function doesn't handle MSAA (must update block dims, here, and consumers)
ADDR_ASSERT_ALWAYS();
ADDR_ASSERT(pSwizzle[i].y != 0);
ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
yMask |= pSwizzle[i].y;
pEquation->addr[i].channel = 1;
pEquation->addr[i].valid = 1;
pEquation->addr[i].index = Log2(pSwizzle[i].y);
ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
}
swizzle[i].value = 0;
bMask |= 1 << i;
}
else
{
if (pSwizzle[i].z != 0)
{
ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
pEquation->xor2[i].channel = 2;
pEquation->xor2[i].valid = 1;
pEquation->xor2[i].index = Log2(pSwizzle[i].z);
}
swizzle[i].x = pSwizzle[i].x;
swizzle[i].y = pSwizzle[i].y;
swizzle[i].z = swizzle[i].s = 0;
ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
const UINT_32 xHi = swizzle[i].x & (~blkXMask);
if (xHi != 0)
{
ADDR_ASSERT(IsPow2(xHi));
ADDR_ASSERT(pEquation->xor1[i].value == 0);
pEquation->xor1[i].channel = 0;
pEquation->xor1[i].valid = 1;
pEquation->xor1[i].index = Log2(xHi) + elemLog2;
swizzle[i].x &= blkXMask;
}
const UINT_32 yHi = swizzle[i].y & (~blkYMask);
if (yHi != 0)
{
ADDR_ASSERT(IsPow2(yHi));
if (xHi == 0)
{
ADDR_ASSERT(pEquation->xor1[i].value == 0);
pEquation->xor1[i].channel = 1;
pEquation->xor1[i].valid = 1;
pEquation->xor1[i].index = Log2(yHi);
}
else
{
ADDR_ASSERT(pEquation->xor2[i].value == 0);
pEquation->xor2[i].channel = 1;
pEquation->xor2[i].valid = 1;
pEquation->xor2[i].index = Log2(yHi);
}
swizzle[i].y &= blkYMask;
}
if (swizzle[i].value == 0)
{
bMask |= 1 << i;
}
}
ADDR_ASSERT(swizzle[i].value == 0); // We missed an xor? Are there too many?
}
// We missed an address bit for coords inside the block?
// That means two coords will land on the same addr, which is bad.
ADDR_ASSERT(((xMask & blkXMask) == blkXMask) &&
((yMask & blkYMask) == blkYMask) &&
((zMask & blkZMask) == blkZMask));
// We're sourcing from outside our block? That won't fly for PRTs, which need to be movable.
// Non-xor modes can also be used for 2D PRTs but they're handled in the simplified logic above.
ADDR_ASSERT((IsPrt(swMode) == false) ||
((xMask == blkXMask) &&
(yMask == blkYMask) &&
(zMask == blkZMask)));
const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
while (bMask != blockMask)
{
for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
{
if ((bMask & (1 << i)) == 0)
{
if (IsPow2(swizzle[i].value))
{
if (swizzle[i].x != 0)
{
ADDR_ASSERT((xMask & swizzle[i].x) == 0);
xMask |= swizzle[i].x;
const UINT_32 xLog2 = Log2(swizzle[i].x);
ADDR_ASSERT(xLog2 < blkXLog2);
pEquation->addr[i].channel = 0;
pEquation->addr[i].valid = 1;
pEquation->addr[i].index = xLog2 + elemLog2;
}
else
{
ADDR_ASSERT(swizzle[i].y != 0);
ADDR_ASSERT((yMask & swizzle[i].y) == 0);
yMask |= swizzle[i].y;
pEquation->addr[i].channel = 1;
pEquation->addr[i].valid = 1;
pEquation->addr[i].index = Log2(swizzle[i].y);
ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
}
swizzle[i].value = 0;
bMask |= 1 << i;
}
else
{
const UINT_32 x = swizzle[i].x & xMask;
const UINT_32 y = swizzle[i].y & yMask;
if (x != 0)
{
ADDR_ASSERT(IsPow2(x));
if (pEquation->xor1[i].value == 0)
{
pEquation->xor1[i].channel = 0;
pEquation->xor1[i].valid = 1;
pEquation->xor1[i].index = Log2(x) + elemLog2;
}
else
{
ADDR_ASSERT(pEquation->xor2[i].value == 0);
pEquation->xor2[i].channel = 0;
pEquation->xor2[i].valid = 1;
pEquation->xor2[i].index = Log2(x) + elemLog2;
}
}
if (y != 0)
{
ADDR_ASSERT(IsPow2(y));
if (pEquation->xor1[i].value == 0)
{
pEquation->xor1[i].channel = 1;
pEquation->xor1[i].valid = 1;
pEquation->xor1[i].index = Log2(y);
}
else
{
ADDR_ASSERT(pEquation->xor2[i].value == 0);
pEquation->xor2[i].channel = 1;
pEquation->xor2[i].valid = 1;
pEquation->xor2[i].index = Log2(y);
}
}
swizzle[i].x &= ~x;
swizzle[i].y &= ~y;
}
}
}
}
ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
}
else
{
const Dim3d& blkDim = (blockSizeLog2 == Log2Size256K) ?
Block256K_Log2_3d[elemLog2] :
((blockSizeLog2 == Log2Size4K) ? Block4K_Log2_3d[elemLog2] : Block64K_Log2_3d[elemLog2]);
const UINT_32 blkXLog2 = blkDim.w;
const UINT_32 blkYLog2 = blkDim.h;
const UINT_32 blkZLog2 = blkDim.d;
const UINT_32 blkXMask = (1 << blkXLog2) - 1;
const UINT_32 blkYMask = (1 << blkYLog2) - 1;
const UINT_32 blkZMask = (1 << blkZLog2) - 1;
ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
UINT_32 xMask = 0;
UINT_32 yMask = 0;
UINT_32 zMask = 0;
UINT_32 bMask = (1 << elemLog2) - 1;
for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
{
if (IsPow2(pSwizzle[i].value))
{
if (pSwizzle[i].x != 0)
{
ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
xMask |= pSwizzle[i].x;
const UINT_32 xLog2 = Log2(pSwizzle[i].x);
ADDR_ASSERT(xLog2 < blkXLog2);
pEquation->addr[i].channel = 0;
pEquation->addr[i].valid = 1;
pEquation->addr[i].index = xLog2 + elemLog2;
}
else if (pSwizzle[i].y != 0)
{
ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
yMask |= pSwizzle[i].y;
pEquation->addr[i].channel = 1;
pEquation->addr[i].valid = 1;
pEquation->addr[i].index = Log2(pSwizzle[i].y);
ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
}
else
{
ADDR_ASSERT(pSwizzle[i].z != 0);
ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
zMask |= pSwizzle[i].z;
pEquation->addr[i].channel = 2;
pEquation->addr[i].valid = 1;
pEquation->addr[i].index = Log2(pSwizzle[i].z);
ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
}
swizzle[i].value = 0;
bMask |= 1 << i;
}
else
{
swizzle[i].x = pSwizzle[i].x;
swizzle[i].y = pSwizzle[i].y;
swizzle[i].z = pSwizzle[i].z;
swizzle[i].s = 0;
ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
const UINT_32 xHi = swizzle[i].x & (~blkXMask);
const UINT_32 yHi = swizzle[i].y & (~blkYMask);
const UINT_32 zHi = swizzle[i].z & (~blkZMask);
ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
if (xHi != 0)
{
ADDR_ASSERT(IsPow2(xHi));
ADDR_ASSERT(pEquation->xor1[i].value == 0);
pEquation->xor1[i].channel = 0;
pEquation->xor1[i].valid = 1;
pEquation->xor1[i].index = Log2(xHi) + elemLog2;
swizzle[i].x &= blkXMask;
}
if (yHi != 0)
{
ADDR_ASSERT(IsPow2(yHi));
if (pEquation->xor1[i].value == 0)
{
pEquation->xor1[i].channel = 1;
pEquation->xor1[i].valid = 1;
pEquation->xor1[i].index = Log2(yHi);
}
else
{
ADDR_ASSERT(pEquation->xor2[i].value == 0);
pEquation->xor2[i].channel = 1;
pEquation->xor2[i].valid = 1;
pEquation->xor2[i].index = Log2(yHi);
}
swizzle[i].y &= blkYMask;
}
if (zHi != 0)
{
ADDR_ASSERT(IsPow2(zHi));
if (pEquation->xor1[i].value == 0)
{
pEquation->xor1[i].channel = 2;
pEquation->xor1[i].valid = 1;
pEquation->xor1[i].index = Log2(zHi);
}
else
{
ADDR_ASSERT(pEquation->xor2[i].value == 0);
pEquation->xor2[i].channel = 2;
pEquation->xor2[i].valid = 1;
pEquation->xor2[i].index = Log2(zHi);
}
swizzle[i].z &= blkZMask;
}
if (swizzle[i].value == 0)
{
bMask |= 1 << i;
}
}
}
const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
while (bMask != blockMask)
{
for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
{
if ((bMask & (1 << i)) == 0)
{
if (IsPow2(swizzle[i].value))
{
if (swizzle[i].x != 0)
{
ADDR_ASSERT((xMask & swizzle[i].x) == 0);
xMask |= swizzle[i].x;
const UINT_32 xLog2 = Log2(swizzle[i].x);
ADDR_ASSERT(xLog2 < blkXLog2);
pEquation->addr[i].channel = 0;
pEquation->addr[i].valid = 1;
pEquation->addr[i].index = xLog2 + elemLog2;
}
else if (swizzle[i].y != 0)
{
ADDR_ASSERT((yMask & swizzle[i].y) == 0);
yMask |= swizzle[i].y;
pEquation->addr[i].channel = 1;
pEquation->addr[i].valid = 1;
pEquation->addr[i].index = Log2(swizzle[i].y);
ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
}
else
{
ADDR_ASSERT(swizzle[i].z != 0);
ADDR_ASSERT((zMask & swizzle[i].z) == 0);
zMask |= swizzle[i].z;
pEquation->addr[i].channel = 2;
pEquation->addr[i].valid = 1;
pEquation->addr[i].index = Log2(swizzle[i].z);
ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
}
swizzle[i].value = 0;
bMask |= 1 << i;
}
else
{
const UINT_32 x = swizzle[i].x & xMask;
const UINT_32 y = swizzle[i].y & yMask;
const UINT_32 z = swizzle[i].z & zMask;
if (x != 0)
{
ADDR_ASSERT(IsPow2(x));
if (pEquation->xor1[i].value == 0)
{
pEquation->xor1[i].channel = 0;
pEquation->xor1[i].valid = 1;
pEquation->xor1[i].index = Log2(x) + elemLog2;
}
else
{
ADDR_ASSERT(pEquation->xor2[i].value == 0);
pEquation->xor2[i].channel = 0;
pEquation->xor2[i].valid = 1;
pEquation->xor2[i].index = Log2(x) + elemLog2;
}
}
if (y != 0)
{
ADDR_ASSERT(IsPow2(y));
if (pEquation->xor1[i].value == 0)
{
pEquation->xor1[i].channel = 1;
pEquation->xor1[i].valid = 1;
pEquation->xor1[i].index = Log2(y);
}
else
{
ADDR_ASSERT(pEquation->xor2[i].value == 0);
pEquation->xor2[i].channel = 1;
pEquation->xor2[i].valid = 1;
pEquation->xor2[i].index = Log2(y);
}
}
if (z != 0)
{
ADDR_ASSERT(IsPow2(z));
if (pEquation->xor1[i].value == 0)
{
pEquation->xor1[i].channel = 2;
pEquation->xor1[i].valid = 1;
pEquation->xor1[i].index = Log2(z);
}
else
{
ADDR_ASSERT(pEquation->xor2[i].value == 0);
pEquation->xor2[i].channel = 2;
pEquation->xor2[i].valid = 1;
pEquation->xor2[i].index = Log2(z);
}
}
swizzle[i].x &= ~x;
swizzle[i].y &= ~y;
swizzle[i].z &= ~z;
}
}
}
}
ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
}
}
@ -1255,16 +1640,28 @@ VOID Gfx11Lib::InitEquationTable()
if (pPatInfo != NULL)
{
ADDR_ASSERT(IsValidSwMode(swMode));
ADDR_EQUATION equation = {};
ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
if (pPatInfo->maxItemCount <= 3) // Get a valid equationIndex
{
ADDR_EQUATION equation = {};
equationIndex = m_numEquations;
ADDR_ASSERT(equationIndex < EquationTableSize);
// Passing in pPatInfo to get the addr equation
ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
m_equationTable[equationIndex] = equation;
m_numEquations++;
equationIndex = m_numEquations;
ADDR_ASSERT(equationIndex < EquationTableSize);
// Updates m_equationTable[m_numEquations] to be the addr equation for this PatInfo
m_equationTable[equationIndex] = equation;
// Increment m_numEquations
m_numEquations++;
}
else // There is no equationIndex
{
// We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X
ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
ADDR_ASSERT(rsrcType == ADDR_RSRC_TEX_3D);
ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
}
}
m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
@ -1874,7 +2271,7 @@ BOOL_32 Gfx11Lib::ValidateSwModeParams(
{
if (((swizzleMask & Gfx11Rsrc3dSwModeMask) == 0) ||
(prt && ((swizzleMask & Gfx11Rsrc3dPrtSwModeMask) == 0)) ||
(thin3d && ((swizzleMask & Gfx11Rsrc3dThinSwModeMask) == 0)))
(thin3d && ((swizzleMask & Gfx11Rsrc3dViewAs2dSwModeMask) == 0)))
{
ADDR_ASSERT_ALWAYS();
valid = FALSE;
@ -2110,7 +2507,8 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting(
if (pIn->flags.view3dAs2dArray)
{
allowedSwModeSet.value &= Gfx11Rsrc3dThinSwModeMask;
// SW_LINEAR can be used for 3D thin images, including BCn image format.
allowedSwModeSet.value &= Gfx11Rsrc3dViewAs2dSwModeMask;
}
break;
@ -2594,7 +2992,8 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlGetPossibleSwizzleModes(
if (pIn->flags.view3dAs2dArray)
{
allowedSwModeSet.value &= Gfx11Rsrc3dThinSwModeMask;
// SW_LINEAR can be used for 3D thin images, including BCn image format.
allowedSwModeSet.value &= Gfx11Rsrc3dViewAs2dSwModeMask;
}
break;

View file

@ -142,6 +142,8 @@ const UINT_32 Gfx11Rsrc3dThin256KBSwModeMask = (1u << ADDR_SW_256KB_Z_X) |
const UINT_32 Gfx11Rsrc3dThinSwModeMask = Gfx11Rsrc3dThin64KBSwModeMask | Gfx11Rsrc3dThin256KBSwModeMask;
const UINT_32 Gfx11Rsrc3dViewAs2dSwModeMask = Gfx11Rsrc3dThinSwModeMask | Gfx11LinearSwModeMask;
const UINT_32 Gfx11Rsrc3dThickSwModeMask = Gfx11Rsrc3dSwModeMask & ~(Gfx11Rsrc3dThinSwModeMask | Gfx11LinearSwModeMask);
const UINT_32 Gfx11Rsrc3dThick4KBSwModeMask = Gfx11Rsrc3dThickSwModeMask & Gfx11Blk4KBSwModeMask;

View file

@ -1305,11 +1305,8 @@ BOOL_32 Gfx9Lib::HwlInitGlobalParams(
((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
{
ADDR_ASSERT(m_settings.isVega10 == FALSE);
ADDR_ASSERT(m_settings.isRaven == FALSE);
ADDR_ASSERT(m_settings.isVega20 == FALSE);
if (m_settings.isVega12)
{
m_settings.htileCacheRbConflict = 1;

View file

@ -1645,7 +1645,9 @@ UINT_32 SiLib::HwlGetPitchAlignmentLinear(
}
else
{
pitchAlign = Max(8u, 64 / BITS_TO_BYTES(bpp));
{
pitchAlign = Max(8u, 64 / BITS_TO_BYTES(bpp));
}
}
return pitchAlign;
@ -2263,7 +2265,10 @@ BOOL_32 SiLib::DecodeGbRegs(
reg.val = pRegValue->gbAddrConfig;
switch (reg.f.pipe_interleave_size)
UINT_32 pipe_interleave_size = reg.f.pipe_interleave_size;
UINT_32 row_size = reg.f.row_size;
switch (pipe_interleave_size)
{
case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
@ -2277,7 +2282,7 @@ BOOL_32 SiLib::DecodeGbRegs(
break;
}
switch (reg.f.row_size)
switch (row_size)
{
case ADDR_CONFIG_1KB_ROW:
m_rowSize = ADDR_ROWSIZE_1KB;

View file

@ -68,8 +68,11 @@ struct SiChipSettings
UINT_32 isPolaris10 : 1;
UINT_32 isPolaris11 : 1;
UINT_32 isPolaris12 : 1;
// VI fusion
UINT_32 isVegaM : 1;
UINT_32 isCarrizo : 1;
UINT_32 : 2;
};
/**