amd/addrlib: update to the latest version

Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
This commit is contained in:
Marek Olšák 2019-09-03 22:38:38 -04:00
parent a3ea4805aa
commit 69ea473eeb
16 changed files with 7263 additions and 8050 deletions

View file

@ -308,7 +308,8 @@ typedef union _ADDR_CREATE_FLAGS
UINT_32 useHtileSliceAlign : 1; ///< Do htile single slice alignment UINT_32 useHtileSliceAlign : 1; ///< Do htile single slice alignment
UINT_32 allowLargeThickTile : 1; ///< Allow 64*thickness*bytesPerPixel > rowSize UINT_32 allowLargeThickTile : 1; ///< Allow 64*thickness*bytesPerPixel > rowSize
UINT_32 forceDccAndTcCompat : 1; ///< Force enable DCC and TC compatibility UINT_32 forceDccAndTcCompat : 1; ///< Force enable DCC and TC compatibility
UINT_32 reserved : 24; ///< Reserved bits for future use UINT_32 nonPower2MemConfig : 1; ///< Physical video memory size is not power of 2
UINT_32 reserved : 23; ///< Reserved bits for future use
}; };
UINT_32 value; UINT_32 value;
@ -347,9 +348,6 @@ typedef struct _ADDR_REGISTER_VALUE
///< CI registers------------------------------------------------- ///< CI registers-------------------------------------------------
const UINT_32* pMacroTileConfig; ///< Global macro tile mode table const UINT_32* pMacroTileConfig; ///< Global macro tile mode table
UINT_32 noOfMacroEntries; ///< Number of entries in pMacroTileConfig UINT_32 noOfMacroEntries; ///< Number of entries in pMacroTileConfig
///< GFX9 HW parameters
UINT_32 blockVarSizeLog2; ///< SW_VAR_* block size
} ADDR_REGISTER_VALUE; } ADDR_REGISTER_VALUE;
/** /**
@ -3549,12 +3547,14 @@ typedef union _ADDR2_BLOCK_SET
{ {
struct struct
{ {
UINT_32 micro : 1; // 256B block for 2D resource UINT_32 micro : 1; // 256B block for 2D resource
UINT_32 macro4KB : 1; // 4KB for 2D/3D resource UINT_32 macroThin4KB : 1; // Thin 4KB for 2D/3D resource
UINT_32 macro64KB : 1; // 64KB for 2D/3D resource UINT_32 macroThick4KB : 1; // Thick 4KB for 3D resource
UINT_32 var : 1; // VAR block UINT_32 macroThin64KB : 1; // Thin 64KB for 2D/3D resource
UINT_32 linear : 1; // Linear block UINT_32 macroThick64KB : 1; // Thick 64KB for 3D resource
UINT_32 reserved : 27; UINT_32 var : 1; // VAR block
UINT_32 linear : 1; // Linear block
UINT_32 reserved : 25;
}; };
UINT_32 value; UINT_32 value;
@ -3594,38 +3594,38 @@ typedef union _ADDR2_SWMODE_SET
{ {
struct struct
{ {
UINT_32 swLinear : 1; UINT_32 swLinear : 1;
UINT_32 sw256B_S : 1; UINT_32 sw256B_S : 1;
UINT_32 sw256B_D : 1; UINT_32 sw256B_D : 1;
UINT_32 sw256B_R : 1; UINT_32 sw256B_R : 1;
UINT_32 sw4KB_Z : 1; UINT_32 sw4KB_Z : 1;
UINT_32 sw4KB_S : 1; UINT_32 sw4KB_S : 1;
UINT_32 sw4KB_D : 1; UINT_32 sw4KB_D : 1;
UINT_32 sw4KB_R : 1; UINT_32 sw4KB_R : 1;
UINT_32 sw64KB_Z : 1; UINT_32 sw64KB_Z : 1;
UINT_32 sw64KB_S : 1; UINT_32 sw64KB_S : 1;
UINT_32 sw64KB_D : 1; UINT_32 sw64KB_D : 1;
UINT_32 sw64KB_R : 1; UINT_32 sw64KB_R : 1;
UINT_32 swVar_Z : 1; UINT_32 swReserved0 : 1;
UINT_32 swVar_S : 1; UINT_32 swReserved1 : 1;
UINT_32 swVar_D : 1; UINT_32 swReserved2 : 1;
UINT_32 swVar_R : 1; UINT_32 swReserved3 : 1;
UINT_32 sw64KB_Z_T : 1; UINT_32 sw64KB_Z_T : 1;
UINT_32 sw64KB_S_T : 1; UINT_32 sw64KB_S_T : 1;
UINT_32 sw64KB_D_T : 1; UINT_32 sw64KB_D_T : 1;
UINT_32 sw64KB_R_T : 1; UINT_32 sw64KB_R_T : 1;
UINT_32 sw4KB_Z_X : 1; UINT_32 sw4KB_Z_X : 1;
UINT_32 sw4KB_S_X : 1; UINT_32 sw4KB_S_X : 1;
UINT_32 sw4KB_D_X : 1; UINT_32 sw4KB_D_X : 1;
UINT_32 sw4KB_R_X : 1; UINT_32 sw4KB_R_X : 1;
UINT_32 sw64KB_Z_X : 1; UINT_32 sw64KB_Z_X : 1;
UINT_32 sw64KB_S_X : 1; UINT_32 sw64KB_S_X : 1;
UINT_32 sw64KB_D_X : 1; UINT_32 sw64KB_D_X : 1;
UINT_32 sw64KB_R_X : 1; UINT_32 sw64KB_R_X : 1;
UINT_32 swVar_Z_X : 1; UINT_32 swVar_Z_X : 1;
UINT_32 swVar_S_X : 1; UINT_32 swReserved4 : 1;
UINT_32 swVar_D_X : 1; UINT_32 swReserved5 : 1;
UINT_32 swVar_R_X : 1; UINT_32 swVar_R_X : 1;
}; };
UINT_32 value; UINT_32 value;

View file

@ -90,11 +90,7 @@ typedef int INT;
#if defined(BRAHMA_ARM) #if defined(BRAHMA_ARM)
#define ADDR_FASTCALL #define ADDR_FASTCALL
#elif defined(__GNUC__) #elif defined(__GNUC__)
#if defined(__i386__) #define ADDR_FASTCALL __attribute__((regparm(0)))
#define ADDR_FASTCALL __attribute__((regparm(0)))
#else
#define ADDR_FASTCALL
#endif
#else #else
#define ADDR_FASTCALL __fastcall #define ADDR_FASTCALL __fastcall
#endif #endif
@ -203,22 +199,32 @@ typedef enum _AddrTileMode
/** /**
**************************************************************************************************** ****************************************************************************************************
* @brief * @brief
* Neutral enums that define swizzle modes for Gfx9 ASIC * Neutral enums that define swizzle modes for Gfx9+ ASIC
* @note * @note
* *
* ADDR_SW_LINEAR linear aligned addressing mode, for 1D/2D/3D resouce * ADDR_SW_LINEAR linear aligned addressing mode, for 1D/2D/3D resource
* ADDR_SW_256B_* addressing block aligned size is 256B, for 2D/3D resouce * ADDR_SW_256B_* addressing block aligned size is 256B, for 2D/3D resource
* ADDR_SW_4KB_* addressing block aligned size is 4KB, for 2D/3D resouce * ADDR_SW_4KB_* addressing block aligned size is 4KB, for 2D/3D resource
* ADDR_SW_64KB_* addressing block aligned size is 64KB, for 2D/3D resouce * ADDR_SW_64KB_* addressing block aligned size is 64KB, for 2D/3D resource
* ADDR_SW_VAR_* addressing block aligned size is ASIC specific, for 2D/3D resouce
* *
* ADDR_SW_*_Z For 2D resouce, represents Z-order swizzle mode for depth/stencil/FMask * ADDR_SW_*_Z For GFX9:
For 3D resouce, represents a swizzle mode similar to legacy thick tile mode - for 2D resource, represents Z-order swizzle mode for depth/stencil/FMask
* ADDR_SW_*_S represents standard swizzle mode defined by MS - for 3D resource, represents a swizzle mode similar to legacy thick tile mode
* ADDR_SW_*_D For 2D resouce, represents a swizzle mode for displayable resource For GFX10:
* For 3D resouce, represents a swizzle mode which places each slice in order & pixel - represents Z-order swizzle mode for depth/stencil/FMask
* ADDR_SW_*_S For GFX9+:
- represents standard swizzle mode defined by MS
* ADDR_SW_*_D For GFX9:
- for 2D resource, represents a swizzle mode for displayable resource
* - for 3D resource, represents a swizzle mode which places each slice in order & pixel
For GFX10:
- for 2D resource, represents a swizzle mode for displayable resource
- for 3D resource, represents a swizzle mode similar to legacy thick tile mode
within slice is placed as 2D ADDR_SW_*_S. Don't use this combination if possible! within slice is placed as 2D ADDR_SW_*_S. Don't use this combination if possible!
* ADDR_SW_*_R For 2D resouce only, represents a swizzle mode for rotated displayable resource * ADDR_SW_*_R For GFX9:
- 2D resource only, represents a swizzle mode for rotated displayable resource
For GFX10:
- represents a swizzle mode for render target resource
* *
**************************************************************************************************** ****************************************************************************************************
*/ */
@ -236,10 +242,10 @@ typedef enum _AddrSwizzleMode
ADDR_SW_64KB_S = 9, ADDR_SW_64KB_S = 9,
ADDR_SW_64KB_D = 10, ADDR_SW_64KB_D = 10,
ADDR_SW_64KB_R = 11, ADDR_SW_64KB_R = 11,
ADDR_SW_VAR_Z = 12, ADDR_SW_RESERVED0 = 12,
ADDR_SW_VAR_S = 13, ADDR_SW_RESERVED1 = 13,
ADDR_SW_VAR_D = 14, ADDR_SW_RESERVED2 = 14,
ADDR_SW_VAR_R = 15, ADDR_SW_RESERVED3 = 15,
ADDR_SW_64KB_Z_T = 16, ADDR_SW_64KB_Z_T = 16,
ADDR_SW_64KB_S_T = 17, ADDR_SW_64KB_S_T = 17,
ADDR_SW_64KB_D_T = 18, ADDR_SW_64KB_D_T = 18,
@ -253,17 +259,11 @@ typedef enum _AddrSwizzleMode
ADDR_SW_64KB_D_X = 26, ADDR_SW_64KB_D_X = 26,
ADDR_SW_64KB_R_X = 27, ADDR_SW_64KB_R_X = 27,
ADDR_SW_VAR_Z_X = 28, ADDR_SW_VAR_Z_X = 28,
ADDR_SW_VAR_S_X = 29, ADDR_SW_RESERVED4 = 29,
ADDR_SW_VAR_D_X = 30, ADDR_SW_RESERVED5 = 30,
ADDR_SW_VAR_R_X = 31, ADDR_SW_VAR_R_X = 31,
ADDR_SW_LINEAR_GENERAL = 32, ADDR_SW_LINEAR_GENERAL = 32,
ADDR_SW_MAX_TYPE = 33, ADDR_SW_MAX_TYPE = 33,
// Used for represent block with identical size
ADDR_SW_256B = ADDR_SW_256B_S,
ADDR_SW_4KB = ADDR_SW_4KB_S_X,
ADDR_SW_64KB = ADDR_SW_64KB_S_X,
ADDR_SW_VAR = ADDR_SW_VAR_S_X,
} AddrSwizzleMode; } AddrSwizzleMode;
/** /**
@ -316,7 +316,9 @@ typedef enum _AddrSwType
ADDR_SW_Z = 0, // Resource basic swizzle mode is ZOrder ADDR_SW_Z = 0, // Resource basic swizzle mode is ZOrder
ADDR_SW_S = 1, // Resource basic swizzle mode is Standard ADDR_SW_S = 1, // Resource basic swizzle mode is Standard
ADDR_SW_D = 2, // Resource basic swizzle mode is Display ADDR_SW_D = 2, // Resource basic swizzle mode is Display
ADDR_SW_R = 3, // Resource basic swizzle mode is Rotated ADDR_SW_R = 3, // Resource basic swizzle mode is Rotated/Render optimized
ADDR_SW_L = 4, // Resource basic swizzle mode is Linear
ADDR_SW_MAX_SWTYPE
} AddrSwType; } AddrSwType;
/** /**

View file

@ -60,7 +60,6 @@ ADDR_E_RETURNCODE ADDR_API AddrCreate(
ADDR_CREATE_OUTPUT* pAddrCreateOut) ///< [out] address lib handle ADDR_CREATE_OUTPUT* pAddrCreateOut) ///< [out] address lib handle
{ {
ADDR_E_RETURNCODE returnCode = ADDR_OK; ADDR_E_RETURNCODE returnCode = ADDR_OK;
{ {
returnCode = Lib::Create(pAddrCreateIn, pAddrCreateOut); returnCode = Lib::Create(pAddrCreateIn, pAddrCreateOut);
} }

View file

@ -46,12 +46,16 @@
// ADDR_LNX_KERNEL_BUILD is for internal build // ADDR_LNX_KERNEL_BUILD is for internal build
// Moved from addrinterface.h so __KERNEL__ is not needed any more // Moved from addrinterface.h so __KERNEL__ is not needed any more
#if !defined(__APPLE__) || defined(HAVE_TSERVER) #if ADDR_LNX_KERNEL_BUILD // || (defined(__GNUC__) && defined(__KERNEL__))
#include <string.h>
#elif !defined(__APPLE__) || defined(HAVE_TSERVER)
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <assert.h>
#endif #endif
#include <assert.h>
#include "util/macros.h"
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
// Platform specific debug break defines // Platform specific debug break defines
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
@ -156,11 +160,7 @@
#endif // DEBUG #endif // DEBUG
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
#if defined(static_assert) #define ADDR_C_ASSERT(__e) STATIC_ASSERT(__e)
#define ADDR_C_ASSERT(__e) static_assert(__e, "")
#else
#define ADDR_C_ASSERT(__e) typedef char __ADDR_C_ASSERT__[(__e) ? 1 : -1]
#endif
namespace Addr namespace Addr
{ {
@ -270,7 +270,8 @@ union ConfigFlags
UINT_32 disableLinearOpt : 1; ///< Disallow tile modes to be optimized to linear UINT_32 disableLinearOpt : 1; ///< Disallow tile modes to be optimized to linear
UINT_32 use32bppFor422Fmt : 1; ///< View 422 formats as 32 bits per pixel element UINT_32 use32bppFor422Fmt : 1; ///< View 422 formats as 32 bits per pixel element
UINT_32 forceDccAndTcCompat : 1; ///< Force enable DCC and TC compatibility UINT_32 forceDccAndTcCompat : 1; ///< Force enable DCC and TC compatibility
UINT_32 reserved : 20; ///< Reserved bits for future use UINT_32 nonPower2MemConfig : 1; ///< Physical video memory size is not power of 2
UINT_32 reserved : 19; ///< Reserved bits for future use
}; };
UINT_32 value; UINT_32 value;
@ -926,6 +927,21 @@ static inline UINT_32 GetCoordActiveMask(
return mask; return mask;
} }
/**
****************************************************************************************************
* ShiftCeil
*
* @brief
* Apply righ-shift with ceiling
****************************************************************************************************
*/
static inline UINT_32 ShiftCeil(
UINT_32 a, ///< [in] value to be right-shifted
UINT_32 b) ///< [in] number of bits to shift
{
return (a >> b) + (((a & ((1 << b) - 1)) != 0) ? 1 : 0);
}
} // Addr } // Addr
#endif // __ADDR_COMMON_H__ #endif // __ADDR_COMMON_H__

View file

@ -250,6 +250,7 @@ ADDR_E_RETURNCODE Lib::Create(
pLib->m_configFlags.useHtileSliceAlign = pCreateIn->createFlags.useHtileSliceAlign; pLib->m_configFlags.useHtileSliceAlign = pCreateIn->createFlags.useHtileSliceAlign;
pLib->m_configFlags.allowLargeThickTile = pCreateIn->createFlags.allowLargeThickTile; pLib->m_configFlags.allowLargeThickTile = pCreateIn->createFlags.allowLargeThickTile;
pLib->m_configFlags.forceDccAndTcCompat = pCreateIn->createFlags.forceDccAndTcCompat; pLib->m_configFlags.forceDccAndTcCompat = pCreateIn->createFlags.forceDccAndTcCompat;
pLib->m_configFlags.nonPower2MemConfig = pCreateIn->createFlags.nonPower2MemConfig;
pLib->m_configFlags.disableLinearOpt = FALSE; pLib->m_configFlags.disableLinearOpt = FALSE;
pLib->SetChipFamily(pCreateIn->chipFamily, pCreateIn->chipRevision); pLib->SetChipFamily(pCreateIn->chipFamily, pCreateIn->chipRevision);

View file

@ -73,7 +73,8 @@ Lib::Lib()
m_rbPerSeLog2(0), m_rbPerSeLog2(0),
m_maxCompFragLog2(0), m_maxCompFragLog2(0),
m_pipeInterleaveLog2(0), m_pipeInterleaveLog2(0),
m_blockVarSizeLog2(0) m_blockVarSizeLog2(0),
m_numEquations(0)
{ {
} }
@ -98,7 +99,8 @@ Lib::Lib(const Client* pClient)
m_rbPerSeLog2(0), m_rbPerSeLog2(0),
m_maxCompFragLog2(0), m_maxCompFragLog2(0),
m_pipeInterleaveLog2(0), m_pipeInterleaveLog2(0),
m_blockVarSizeLog2(0) m_blockVarSizeLog2(0),
m_numEquations(0)
{ {
} }
@ -1363,35 +1365,65 @@ ADDR_E_RETURNCODE Lib::ComputeBlockDimensionForSurf(
AddrResourceType resourceType, AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const AddrSwizzleMode swizzleMode) const
{ {
ADDR_E_RETURNCODE returnCode = ComputeBlockDimension(pWidth, ADDR_E_RETURNCODE returnCode = ADDR_OK;
pHeight,
pDepth,
bpp,
resourceType,
swizzleMode);
if ((returnCode == ADDR_OK) && (numSamples > 1) && IsThin(resourceType, swizzleMode)) if (IsThick(resourceType, swizzleMode))
{ {
const UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode); ComputeThickBlockDimension(pWidth, pHeight, pDepth, bpp, resourceType, swizzleMode);
const UINT_32 log2sample = Log2(numSamples); }
const UINT_32 q = log2sample >> 1; else if (IsThin(resourceType, swizzleMode))
const UINT_32 r = log2sample & 1; {
ComputeThinBlockDimension(pWidth, pHeight, pDepth, bpp, numSamples, resourceType, swizzleMode);
if (log2blkSize & 1) }
{ else
*pWidth >>= q; {
*pHeight >>= (q + r); ADDR_ASSERT_ALWAYS();
} returnCode = ADDR_INVALIDPARAMS;
else
{
*pWidth >>= (q + r);
*pHeight >>= q;
}
} }
return returnCode; return returnCode;
} }
/**
************************************************************************************************************************
* Lib::ComputeThinBlockDimension
*
* @brief
* Internal function to get thin block width/height/depth in element from surface input params.
*
* @return
* N/A
************************************************************************************************************************
*/
VOID Lib::ComputeThinBlockDimension(
UINT_32* pWidth,
UINT_32* pHeight,
UINT_32* pDepth,
UINT_32 bpp,
UINT_32 numSamples,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
ADDR_ASSERT(IsThin(resourceType, swizzleMode));
// GFX9/GFX10 use different dimension amplifying logic: say for 128KB block + 1xAA + 1BPE, the dimension of thin
// swizzle mode will be [256W * 512H] on GFX9 ASICs and [512W * 256H] on GFX10 ASICs. Since GFX10 is newer HWL so we
// make its implementation into base class (in order to save future change on new HWLs)
const UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
const UINT_32 log2EleBytes = Log2(bpp >> 3);
const UINT_32 log2Samples = Log2(Max(numSamples, 1u));
const UINT_32 log2NumEle = log2BlkSize - log2EleBytes - log2Samples;
// For "1xAA/4xAA cases" or "2xAA/8xAA + odd log2BlkSize cases", width == height or width == 2 * height;
// For other cases, height == width or height == 2 * width
const BOOL_32 widthPrecedent = ((log2Samples & 1) == 0) || ((log2BlkSize & 1) != 0);
const UINT_32 log2Width = (log2NumEle + (widthPrecedent ? 1 : 0)) / 2;
*pWidth = 1u << log2Width;
*pHeight = 1u << (log2NumEle - log2Width);
*pDepth = 1;
}
/** /**
************************************************************************************************************************ ************************************************************************************************************************
* Lib::ComputeBlockDimension * Lib::ComputeBlockDimension
@ -1404,42 +1436,22 @@ ADDR_E_RETURNCODE Lib::ComputeBlockDimensionForSurf(
************************************************************************************************************************ ************************************************************************************************************************
*/ */
ADDR_E_RETURNCODE Lib::ComputeBlockDimension( ADDR_E_RETURNCODE Lib::ComputeBlockDimension(
UINT_32* pWidth, UINT_32* pWidth,
UINT_32* pHeight, UINT_32* pHeight,
UINT_32* pDepth, UINT_32* pDepth,
UINT_32 bpp, UINT_32 bpp,
AddrResourceType resourceType, AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const AddrSwizzleMode swizzleMode) const
{ {
ADDR_E_RETURNCODE returnCode = ADDR_OK; ADDR_E_RETURNCODE returnCode = ADDR_OK;
UINT_32 eleBytes = bpp >> 3; if (IsThick(resourceType, swizzleMode))
UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode);
if (IsThin(resourceType, swizzleMode))
{ {
UINT_32 log2blkSizeIn256B = log2blkSize - 8; ComputeThickBlockDimension(pWidth, pHeight, pDepth, bpp, resourceType, swizzleMode);
UINT_32 widthAmp = log2blkSizeIn256B / 2;
UINT_32 heightAmp = log2blkSizeIn256B - widthAmp;
ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));
*pWidth = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
*pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
*pDepth = 1;
} }
else if (IsThick(resourceType, swizzleMode)) else if (IsThin(resourceType, swizzleMode))
{ {
UINT_32 log2blkSizeIn1KB = log2blkSize - 10; ComputeThinBlockDimension(pWidth, pHeight, pDepth, bpp, 0, resourceType, swizzleMode);
UINT_32 averageAmp = log2blkSizeIn1KB / 3;
UINT_32 restAmp = log2blkSizeIn1KB % 3;
ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block1K_3d) / sizeof(Block1K_3d[0]));
*pWidth = Block1K_3d[microBlockSizeTableIndex].w << averageAmp;
*pHeight = Block1K_3d[microBlockSizeTableIndex].h << (averageAmp + (restAmp / 2));
*pDepth = Block1K_3d[microBlockSizeTableIndex].d << (averageAmp + ((restAmp != 0) ? 1 : 0));
} }
else else
{ {
@ -1450,6 +1462,42 @@ ADDR_E_RETURNCODE Lib::ComputeBlockDimension(
return returnCode; return returnCode;
} }
/**
************************************************************************************************************************
* Lib::ComputeThickBlockDimension
*
* @brief
* Internal function to get block width/height/depth in element for thick swizzle mode
*
* @return
* N/A
************************************************************************************************************************
*/
VOID Lib::ComputeThickBlockDimension(
UINT_32* pWidth,
UINT_32* pHeight,
UINT_32* pDepth,
UINT_32 bpp,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
ADDR_ASSERT(IsThick(resourceType, swizzleMode));
const UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
const UINT_32 eleBytes = bpp >> 3;
const UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block1K_3d) / sizeof(Block1K_3d[0]));
const UINT_32 log2blkSizeIn1KB = log2BlkSize - 10;
const UINT_32 averageAmp = log2blkSizeIn1KB / 3;
const UINT_32 restAmp = log2blkSizeIn1KB % 3;
*pWidth = Block1K_3d[microBlockSizeTableIndex].w << averageAmp;
*pHeight = Block1K_3d[microBlockSizeTableIndex].h << (averageAmp + (restAmp / 2));
*pDepth = Block1K_3d[microBlockSizeTableIndex].d << (averageAmp + ((restAmp != 0) ? 1 : 0));
}
/** /**
************************************************************************************************************************ ************************************************************************************************************************
* Lib::GetMipTailDim * Lib::GetMipTailDim
@ -1469,11 +1517,11 @@ Dim3d Lib::GetMipTailDim(
UINT_32 blockDepth) const UINT_32 blockDepth) const
{ {
Dim3d out = {blockWidth, blockHeight, blockDepth}; Dim3d out = {blockWidth, blockHeight, blockDepth};
UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode); UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
if (IsThick(resourceType, swizzleMode)) if (IsThick(resourceType, swizzleMode))
{ {
UINT_32 dim = log2blkSize % 3; UINT_32 dim = log2BlkSize % 3;
if (dim == 0) if (dim == 0)
{ {
@ -1490,11 +1538,22 @@ Dim3d Lib::GetMipTailDim(
} }
else else
{ {
if (log2blkSize & 1) ADDR_ASSERT(IsThin(resourceType, swizzleMode));
// GFX9/GFX10 use different dimension shrinking logic for mipmap tail: say for 128KB block + 2BPE, the maximum
// dimension of mipmap tail level will be [256W * 128H] on GFX9 ASICs and [128W * 256H] on GFX10 ASICs. Since
// GFX10 is newer HWL so we make its implementation into base class, in order to save future change on new HWLs.
// And assert log2BlkSize will always be an even value on GFX9, so we never need the logic wrapped by DEBUG...
#if DEBUG
if ((log2BlkSize & 1) && (m_chipFamily == ADDR_CHIP_FAMILY_AI))
{ {
// Should never go here...
ADDR_ASSERT_ALWAYS();
out.h >>= 1; out.h >>= 1;
} }
else else
#endif
{ {
out.w >>= 1; out.w >>= 1;
} }
@ -1873,7 +1932,52 @@ VOID Lib::ComputeQbStereoInfo(
pOut->pixelHeight <<= 1; pOut->pixelHeight <<= 1;
// Double size // Double size
pOut->surfSize <<= 1; pOut->surfSize <<= 1;
pOut->sliceSize <<= 1;
}
/**
************************************************************************************************************************
* Lib::FilterInvalidEqSwizzleMode
*
* @brief
* Filter out swizzle mode(s) if it doesn't have valid equation index
*
* @return
* N/A
************************************************************************************************************************
*/
VOID Lib::FilterInvalidEqSwizzleMode(
ADDR2_SWMODE_SET& allowedSwModeSet,
AddrResourceType resourceType,
UINT_32 elemLog2
) const
{
if (resourceType != ADDR_RSRC_TEX_1D)
{
UINT_32 allowedSwModeSetVal = allowedSwModeSet.value;
const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(resourceType) - 1;
UINT_32 validSwModeSet = allowedSwModeSetVal;
for (UINT_32 swModeIdx = 0; validSwModeSet != 0; swModeIdx++)
{
if (validSwModeSet & 1)
{
if (m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] == ADDR_INVALID_EQUATION_INDEX)
{
allowedSwModeSetVal &= ~(1u << swModeIdx);
}
}
validSwModeSet >>= 1;
}
// Only apply the filtering if at least one valid swizzle mode remains
if (allowedSwModeSetVal != 0)
{
allowedSwModeSet.value = allowedSwModeSetVal;
}
}
} }
} // V2 } // V2

View file

@ -68,6 +68,8 @@ struct SwizzleModeFlags
UINT_32 isT : 1; // T mode UINT_32 isT : 1; // T mode
UINT_32 isRtOpt : 1; // mode opt for render target UINT_32 isRtOpt : 1; // mode opt for render target
UINT_32 reserved : 20; // Reserved bits
}; };
struct Dim2d struct Dim2d
@ -87,25 +89,14 @@ struct Dim3d
enum AddrBlockType enum AddrBlockType
{ {
AddrBlockMicro = 0, // Resource uses 256B block AddrBlockMicro = 0, // Resource uses 256B block
AddrBlock4KB = 1, // Resource uses 4KB block AddrBlockThin4KB = 1, // Resource uses thin 4KB block
AddrBlock64KB = 2, // Resource uses 64KB block AddrBlockThick4KB = 2, // Resource uses thick 4KB block
AddrBlockVar = 3, // Resource uses var block, only valid for GFX9 AddrBlockThin64KB = 3, // Resource uses thin 64KB block
AddrBlockLinear = 4, // Resource uses linear swizzle mode AddrBlockThick64KB = 4, // Resource uses thick 64KB block
AddrBlockVar = 5, // Resource uses var block, only valid for GFX9
AddrBlockLinear = 6, // Resource uses linear swizzle mode
AddrBlockMaxTiledType = AddrBlock64KB + 1, AddrBlockMaxTiledType = AddrBlockVar + 1,
};
enum AddrBlockSet
{
AddrBlockSetMicro = 1 << AddrBlockMicro,
AddrBlockSetMacro4KB = 1 << AddrBlock4KB,
AddrBlockSetMacro64KB = 1 << AddrBlock64KB,
AddrBlockSetVar = 1 << AddrBlockVar,
AddrBlockSetLinear = 1 << AddrBlockLinear,
AddrBlockSetMacro = AddrBlockSetMacro4KB | AddrBlockSetMacro64KB,
AddrBlockSet2dGfx10 = AddrBlockSetMicro | AddrBlockSetMacro,
AddrBlockSet3dGfx10 = AddrBlockSetMacro,
}; };
enum AddrSwSet enum AddrSwSet
@ -115,11 +106,17 @@ enum AddrSwSet
AddrSwSetD = 1 << ADDR_SW_D, AddrSwSetD = 1 << ADDR_SW_D,
AddrSwSetR = 1 << ADDR_SW_R, AddrSwSetR = 1 << ADDR_SW_R,
AddrSwSetAll = AddrSwSetZ | AddrSwSetS | AddrSwSetD | AddrSwSetR, AddrSwSetAll = AddrSwSetZ | AddrSwSetS | AddrSwSetD | AddrSwSetR,
AddrSwSet3dThinGfx10 = AddrSwSetZ | AddrSwSetR,
AddrSwSetColorGfx10 = AddrSwSetS | AddrSwSetD | AddrSwSetR,
}; };
const UINT_32 Size256 = 256u;
const UINT_32 Size4K = 4096u;
const UINT_32 Size64K = 65536u;
const UINT_32 Log2Size256 = 8u;
const UINT_32 Log2Size4K = 12u;
const UINT_32 Log2Size64K = 16u;
/** /**
************************************************************************************************************************ ************************************************************************************************************************
* @brief This class contains asic independent address lib functionalities * @brief This class contains asic independent address lib functionalities
@ -237,6 +234,15 @@ protected:
static const UINT_32 MaxMipLevels = 16; static const UINT_32 MaxMipLevels = 16;
BOOL_32 IsValidSwMode(AddrSwizzleMode swizzleMode) const
{
// Don't dereference a reinterpret_cast pointer so as not to break
// strict-aliasing rules.
UINT_32 mode;
memcpy(&mode, &m_swizzleModeTable[swizzleMode], sizeof(UINT_32));
return mode != 0;
}
// Checking block size // Checking block size
BOOL_32 IsBlock256b(AddrSwizzleMode swizzleMode) const BOOL_32 IsBlock256b(AddrSwizzleMode swizzleMode) const
{ {
@ -356,7 +362,7 @@ protected:
{ {
blockSizeLog2 = 16; blockSizeLog2 = 16;
} }
else if (IsBlockVariable(swizzleMode)) else if (IsBlockVariable(swizzleMode) && (m_blockVarSizeLog2 != 0))
{ {
blockSizeLog2 = m_blockVarSizeLog2; blockSizeLog2 = m_blockVarSizeLog2;
} }
@ -653,12 +659,29 @@ protected:
AddrSwizzleMode swizzleMode) const; AddrSwizzleMode swizzleMode) const;
ADDR_E_RETURNCODE ComputeBlockDimension( ADDR_E_RETURNCODE ComputeBlockDimension(
UINT_32* pWidth, UINT_32* pWidth,
UINT_32* pHeight, UINT_32* pHeight,
UINT_32* pDepth, UINT_32* pDepth,
UINT_32 bpp, UINT_32 bpp,
AddrResourceType resourceType, AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const; AddrSwizzleMode swizzleMode) const;
virtual VOID ComputeThinBlockDimension(
UINT_32* pWidth,
UINT_32* pHeight,
UINT_32* pDepth,
UINT_32 bpp,
UINT_32 numSamples,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const;
VOID ComputeThickBlockDimension(
UINT_32* pWidth,
UINT_32* pHeight,
UINT_32* pDepth,
UINT_32 bpp,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const;
static UINT_64 ComputePadSize( static UINT_64 ComputePadSize(
const Dim3d* pBlkDim, const Dim3d* pBlkDim,
@ -793,6 +816,11 @@ protected:
VOID ComputeQbStereoInfo(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; VOID ComputeQbStereoInfo(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
VOID FilterInvalidEqSwizzleMode(
ADDR2_SWMODE_SET& allowedSwModeSet,
AddrResourceType resourceType,
UINT_32 elemLog2) const;
UINT_32 m_se; ///< Number of shader engine UINT_32 m_se; ///< Number of shader engine
UINT_32 m_rbPerSe; ///< Number of render backend per shader engine UINT_32 m_rbPerSe; ///< Number of render backend per shader engine
UINT_32 m_maxCompFrag; ///< Number of max compressed fragment UINT_32 m_maxCompFrag; ///< Number of max compressed fragment
@ -809,6 +837,22 @@ protected:
SwizzleModeFlags m_swizzleModeTable[ADDR_SW_MAX_TYPE]; ///< Swizzle mode table SwizzleModeFlags m_swizzleModeTable[ADDR_SW_MAX_TYPE]; ///< Swizzle mode table
// Max number of swizzle mode supported for equation
static const UINT_32 MaxSwModeType = 32;
// Max number of resource type (2D/3D) supported for equation
static const UINT_32 MaxRsrcType = 2;
// Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp)
static const UINT_32 MaxElementBytesLog2 = 5;
// Almost all swizzle mode + resource type support equation
static const UINT_32 EquationTableSize = MaxElementBytesLog2 * MaxSwModeType * MaxRsrcType;
// Equation table
ADDR_EQUATION m_equationTable[EquationTableSize];
// Number of equation entries in the table
UINT_32 m_numEquations;
// Equation lookup table according to bpp and tile index
UINT_32 m_equationLookupTable[MaxRsrcType][MaxSwModeType][MaxElementBytesLog2];
private: private:
// Disallow the copy constructor // Disallow the copy constructor
Lib(const Lib& a); Lib(const Lib& a);

View file

@ -119,7 +119,7 @@ VOID* Object::Alloc(
size_t objSize ///< [in] Size to allocate size_t objSize ///< [in] Size to allocate
) const ) const
{ {
return ClientAlloc(objSize, &m_client); return ClientAlloc(objSize, &m_client);;
} }
/** /**
@ -216,16 +216,20 @@ VOID Object::DebugPrint(
#if DEBUG #if DEBUG
if (m_client.callbacks.debugPrint != NULL) if (m_client.callbacks.debugPrint != NULL)
{ {
va_list ap;
va_start(ap, pDebugString);
ADDR_DEBUGPRINT_INPUT debugPrintInput = {0}; ADDR_DEBUGPRINT_INPUT debugPrintInput = {0};
debugPrintInput.size = sizeof(ADDR_DEBUGPRINT_INPUT); debugPrintInput.size = sizeof(ADDR_DEBUGPRINT_INPUT);
debugPrintInput.pDebugString = const_cast<CHAR*>(pDebugString); debugPrintInput.pDebugString = const_cast<CHAR*>(pDebugString);
debugPrintInput.hClient = m_client.handle; debugPrintInput.hClient = m_client.handle;
va_start(debugPrintInput.ap, pDebugString); va_copy(debugPrintInput.ap, ap);
m_client.callbacks.debugPrint(&debugPrintInput); m_client.callbacks.debugPrint(&debugPrintInput);
va_end(debugPrintInput.ap); va_end(ap);
} }
#endif #endif
} }

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -36,6 +36,7 @@
#include "addrlib2.h" #include "addrlib2.h"
#include "coord.h" #include "coord.h"
#include "gfx10SwizzlePattern.h"
namespace Addr namespace Addr
{ {
@ -93,7 +94,11 @@ const UINT_32 Gfx10Blk64KBSwModeMask = (1u << ADDR_SW_64KB_S) |
(1u << ADDR_SW_64KB_D_X) | (1u << ADDR_SW_64KB_D_X) |
(1u << ADDR_SW_64KB_R_X); (1u << ADDR_SW_64KB_R_X);
const UINT_32 Gfx10ZSwModeMask = (1u << ADDR_SW_64KB_Z_X); const UINT_32 Gfx10BlkVarSwModeMask = (1u << ADDR_SW_VAR_Z_X) |
(1u << ADDR_SW_VAR_R_X);
const UINT_32 Gfx10ZSwModeMask = (1u << ADDR_SW_64KB_Z_X) |
(1u << ADDR_SW_VAR_Z_X);
const UINT_32 Gfx10StandardSwModeMask = (1u << ADDR_SW_256B_S) | const UINT_32 Gfx10StandardSwModeMask = (1u << ADDR_SW_256B_S) |
(1u << ADDR_SW_4KB_S) | (1u << ADDR_SW_4KB_S) |
@ -109,14 +114,16 @@ const UINT_32 Gfx10DisplaySwModeMask = (1u << ADDR_SW_256B_D) |
(1u << ADDR_SW_4KB_D_X) | (1u << ADDR_SW_4KB_D_X) |
(1u << ADDR_SW_64KB_D_X); (1u << ADDR_SW_64KB_D_X);
const UINT_32 Gfx10RenderSwModeMask = (1u << ADDR_SW_64KB_R_X); const UINT_32 Gfx10RenderSwModeMask = (1u << ADDR_SW_64KB_R_X) |
(1u << ADDR_SW_VAR_R_X);
const UINT_32 Gfx10XSwModeMask = (1u << ADDR_SW_4KB_S_X) | const UINT_32 Gfx10XSwModeMask = (1u << ADDR_SW_4KB_S_X) |
(1u << ADDR_SW_4KB_D_X) | (1u << ADDR_SW_4KB_D_X) |
(1u << ADDR_SW_64KB_Z_X) | (1u << ADDR_SW_64KB_Z_X) |
(1u << ADDR_SW_64KB_S_X) | (1u << ADDR_SW_64KB_S_X) |
(1u << ADDR_SW_64KB_D_X) | (1u << ADDR_SW_64KB_D_X) |
(1u << ADDR_SW_64KB_R_X); (1u << ADDR_SW_64KB_R_X) |
Gfx10BlkVarSwModeMask;
const UINT_32 Gfx10TSwModeMask = (1u << ADDR_SW_64KB_S_T) | const UINT_32 Gfx10TSwModeMask = (1u << ADDR_SW_64KB_S_T) |
(1u << ADDR_SW_64KB_D_T); (1u << ADDR_SW_64KB_D_T);
@ -131,7 +138,8 @@ const UINT_32 Gfx10Rsrc1dSwModeMask = Gfx10LinearSwModeMask |
const UINT_32 Gfx10Rsrc2dSwModeMask = Gfx10LinearSwModeMask | const UINT_32 Gfx10Rsrc2dSwModeMask = Gfx10LinearSwModeMask |
Gfx10Blk256BSwModeMask | Gfx10Blk256BSwModeMask |
Gfx10Blk4KBSwModeMask | Gfx10Blk4KBSwModeMask |
Gfx10Blk64KBSwModeMask; Gfx10Blk64KBSwModeMask |
Gfx10BlkVarSwModeMask;
const UINT_32 Gfx10Rsrc3dSwModeMask = (1u << ADDR_SW_LINEAR) | const UINT_32 Gfx10Rsrc3dSwModeMask = (1u << ADDR_SW_LINEAR) |
(1u << ADDR_SW_4KB_S) | (1u << ADDR_SW_4KB_S) |
@ -141,14 +149,23 @@ const UINT_32 Gfx10Rsrc3dSwModeMask = (1u << ADDR_SW_LINEAR) |
(1u << ADDR_SW_64KB_Z_X) | (1u << ADDR_SW_64KB_Z_X) |
(1u << ADDR_SW_64KB_S_X) | (1u << ADDR_SW_64KB_S_X) |
(1u << ADDR_SW_64KB_D_X) | (1u << ADDR_SW_64KB_D_X) |
(1u << ADDR_SW_64KB_R_X); (1u << ADDR_SW_64KB_R_X) |
Gfx10BlkVarSwModeMask;
const UINT_32 Gfx10Rsrc2dPrtSwModeMask = (Gfx10Blk4KBSwModeMask | Gfx10Blk64KBSwModeMask) & ~Gfx10XSwModeMask; const UINT_32 Gfx10Rsrc2dPrtSwModeMask = (Gfx10Blk4KBSwModeMask | Gfx10Blk64KBSwModeMask) & ~Gfx10XSwModeMask;
const UINT_32 Gfx10Rsrc3dPrtSwModeMask = Gfx10Rsrc2dPrtSwModeMask & ~Gfx10DisplaySwModeMask; const UINT_32 Gfx10Rsrc3dPrtSwModeMask = Gfx10Rsrc2dPrtSwModeMask & ~Gfx10DisplaySwModeMask;
const UINT_32 Gfx10Rsrc3dThinSwModeMask = (1u << ADDR_SW_64KB_Z_X) | const UINT_32 Gfx10Rsrc3dThin64KBSwModeMask = (1u << ADDR_SW_64KB_Z_X) |
(1u << ADDR_SW_64KB_R_X); (1u << ADDR_SW_64KB_R_X);
const UINT_32 Gfx10Rsrc3dThinSwModeMask = Gfx10Rsrc3dThin64KBSwModeMask | Gfx10BlkVarSwModeMask;
const UINT_32 Gfx10Rsrc3dThickSwModeMask = Gfx10Rsrc3dSwModeMask & ~(Gfx10Rsrc3dThinSwModeMask | Gfx10LinearSwModeMask);
const UINT_32 Gfx10Rsrc3dThick4KBSwModeMask = Gfx10Rsrc3dThickSwModeMask & Gfx10Blk4KBSwModeMask;
const UINT_32 Gfx10Rsrc3dThick64KBSwModeMask = Gfx10Rsrc3dThickSwModeMask & Gfx10Blk64KBSwModeMask;
const UINT_32 Gfx10MsaaSwModeMask = Gfx10ZSwModeMask | const UINT_32 Gfx10MsaaSwModeMask = Gfx10ZSwModeMask |
Gfx10RenderSwModeMask; Gfx10RenderSwModeMask;
@ -290,6 +307,14 @@ protected:
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
virtual UINT_32 HwlComputeMaxBaseAlignments() const;
virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;
virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn);
virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);
// Initialize equation table // Initialize equation table
VOID InitEquationTable(); VOID InitEquationTable();
@ -309,6 +334,7 @@ protected:
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
private:
UINT_32 ComputeOffsetFromSwizzlePattern( UINT_32 ComputeOffsetFromSwizzlePattern(
const UINT_64* pPattern, const UINT_64* pPattern,
UINT_32 numBits, UINT_32 numBits,
@ -351,13 +377,6 @@ protected:
return compressBlkDim; return compressBlkDim;
} }
static UINT_32 ShiftCeil(
UINT_32 a,
UINT_32 b)
{
return (a >> b) + (((a & ((1 << b) - 1)) != 0) ? 1 : 0);
}
static void GetMipSize( static void GetMipSize(
UINT_32 mip0Width, UINT_32 mip0Width,
UINT_32 mip0Height, UINT_32 mip0Height,
@ -376,18 +395,39 @@ protected:
} }
} }
const UINT_64* GetSwizzlePattern( const ADDR_SW_PATINFO* GetSwizzlePatternInfo(
AddrSwizzleMode swizzleMode, AddrSwizzleMode swizzleMode,
AddrResourceType resourceType, AddrResourceType resourceType,
UINT_32 log2Elem, UINT_32 log2Elem,
UINT_32 numFrag) const; UINT_32 numFrag) const;
VOID GetSwizzlePatternFromPatternInfo(
const ADDR_SW_PATINFO* pPatInfo,
ADDR_BIT_SETTING (&pSwizzle)[20]) const
{
memcpy(pSwizzle,
GFX10_SW_PATTERN_NIBBLE01[pPatInfo->nibble01Idx],
sizeof(GFX10_SW_PATTERN_NIBBLE01[pPatInfo->nibble01Idx]));
memcpy(&pSwizzle[8],
GFX10_SW_PATTERN_NIBBLE2[pPatInfo->nibble2Idx],
sizeof(GFX10_SW_PATTERN_NIBBLE2[pPatInfo->nibble2Idx]));
memcpy(&pSwizzle[12],
GFX10_SW_PATTERN_NIBBLE3[pPatInfo->nibble3Idx],
sizeof(GFX10_SW_PATTERN_NIBBLE3[pPatInfo->nibble3Idx]));
memcpy(&pSwizzle[16],
GFX10_SW_PATTERN_NIBBLE4[pPatInfo->nibble4Idx],
sizeof(GFX10_SW_PATTERN_NIBBLE4[pPatInfo->nibble4Idx]));
}
VOID ConvertSwizzlePatternToEquation( VOID ConvertSwizzlePatternToEquation(
UINT_32 elemLog2, UINT_32 elemLog2,
AddrResourceType rsrcType, AddrResourceType rsrcType,
AddrSwizzleMode swMode, AddrSwizzleMode swMode,
const UINT_64* pPattern, const ADDR_SW_PATINFO* pPatInfo,
ADDR_EQUATION* pEquation) const; ADDR_EQUATION* pEquation) const;
static INT_32 GetMetaElementSizeLog2(Gfx10DataType dataType); static INT_32 GetMetaElementSizeLog2(Gfx10DataType dataType);
@ -429,14 +469,6 @@ protected:
BOOL_32 pipeAlign, BOOL_32 pipeAlign,
Dim3d* pBlock) const; Dim3d* pBlock) const;
BOOL_32 IsEquationCompatibleThick(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
return IsThick(resourceType, swizzleMode) &&
((m_settings.supportRbPlus == 0) || (swizzleMode != ADDR_SW_64KB_D_X));
}
INT_32 GetPipeRotateAmount( INT_32 GetPipeRotateAmount(
AddrResourceType resourceType, AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const; AddrSwizzleMode swizzleMode) const;
@ -460,61 +492,29 @@ protected:
} }
static const Dim3d Block256_3d[MaxNumOfBpp];
static const Dim3d Block64K_3d[MaxNumOfBpp];
static const Dim3d Block4K_3d[MaxNumOfBpp];
static const Dim3d Block64K_Log2_3d[MaxNumOfBpp];
static const Dim3d Block4K_Log2_3d[MaxNumOfBpp];
static const Dim2d Block64K_2d[MaxNumOfBpp];
static const Dim2d Block4K_2d[MaxNumOfBpp];
static const Dim2d Block64K_Log2_2d[MaxNumOfBpp];
static const Dim2d Block4K_Log2_2d[MaxNumOfBpp];
static const SwizzleModeFlags SwizzleModeTable[ADDR_SW_MAX_TYPE];
// Max number of swizzle mode supported for equation
static const UINT_32 MaxSwMode = 32;
// Max number of resource type (2D/3D) supported for equation
static const UINT_32 MaxRsrcType = 2;
// Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp)
static const UINT_32 MaxElementBytesLog2 = 5;
// Almost all swizzle mode + resource type support equation
static const UINT_32 EquationTableSize = MaxElementBytesLog2 * MaxSwMode * MaxRsrcType;
// Equation table
ADDR_EQUATION m_equationTable[EquationTableSize];
// Number of equation entries in the table
UINT_32 m_numEquations;
// Equation lookup table according to bpp and tile index
UINT_32 m_equationLookupTable[MaxRsrcType][MaxSwMode][MaxElementBytesLog2];
// Number of packers log2
UINT_32 m_numPkrLog2;
// Number of shader array log2
UINT_32 m_numSaLog2;
private:
virtual UINT_32 HwlComputeMaxBaseAlignments() const;
virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;
virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn);
virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);
BOOL_32 IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const; BOOL_32 IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
UINT_32 GetMaxNumMipsInTail(UINT_32 blockSizeLog2, BOOL_32 isThin) const; UINT_32 GetMaxNumMipsInTail(UINT_32 blockSizeLog2, BOOL_32 isThin) const;
static ADDR2_BLOCK_SET GetAllowedBlockSet(ADDR2_SWMODE_SET allowedSwModeSet) static ADDR2_BLOCK_SET GetAllowedBlockSet(ADDR2_SWMODE_SET allowedSwModeSet, AddrResourceType rsrcType)
{ {
ADDR2_BLOCK_SET allowedBlockSet = {}; ADDR2_BLOCK_SET allowedBlockSet = {};
allowedBlockSet.micro = (allowedSwModeSet.value & Gfx10Blk256BSwModeMask) ? TRUE : FALSE; allowedBlockSet.micro = (allowedSwModeSet.value & Gfx10Blk256BSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macro4KB = (allowedSwModeSet.value & Gfx10Blk4KBSwModeMask) ? TRUE : FALSE; allowedBlockSet.linear = (allowedSwModeSet.value & Gfx10LinearSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macro64KB = (allowedSwModeSet.value & Gfx10Blk64KBSwModeMask) ? TRUE : FALSE; allowedBlockSet.var = (allowedSwModeSet.value & Gfx10BlkVarSwModeMask) ? TRUE : FALSE;
allowedBlockSet.linear = (allowedSwModeSet.value & Gfx10LinearSwModeMask) ? TRUE : FALSE;
if (rsrcType == ADDR_RSRC_TEX_3D)
{
allowedBlockSet.macroThick4KB = (allowedSwModeSet.value & Gfx10Rsrc3dThick4KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macroThin64KB = (allowedSwModeSet.value & Gfx10Rsrc3dThin64KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macroThick64KB = (allowedSwModeSet.value & Gfx10Rsrc3dThick64KBSwModeMask) ? TRUE : FALSE;
}
else
{
allowedBlockSet.macroThin4KB = (allowedSwModeSet.value & Gfx10Blk4KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macroThin64KB = (allowedSwModeSet.value & Gfx10Blk64KBSwModeMask) ? TRUE : FALSE;
}
return allowedBlockSet; return allowedBlockSet;
} }
@ -554,12 +554,26 @@ private:
BOOL_32 ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const; BOOL_32 ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
BOOL_32 ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const; BOOL_32 ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
static const UINT_32 ColumnBits = 2; static const UINT_32 ColumnBits = 2;
static const UINT_32 BankBits = 4; static const UINT_32 BankBits = 4;
static const UINT_32 UnalignedDccType = 3;
static const Dim3d Block256_3d[MaxNumOfBpp];
static const Dim3d Block64K_Log2_3d[MaxNumOfBpp];
static const Dim3d Block4K_Log2_3d[MaxNumOfBpp];
static const SwizzleModeFlags SwizzleModeTable[ADDR_SW_MAX_TYPE];
// Number of packers log2
UINT_32 m_numPkrLog2;
// Number of shader array log2
UINT_32 m_numSaLog2;
Gfx10ChipSettings m_settings; Gfx10ChipSettings m_settings;
UINT_32 m_colorBaseIndex;
UINT_32 m_htileBaseIndex; UINT_32 m_colorBaseIndex;
UINT_32 m_xmaskBaseIndex;
UINT_32 m_dccBaseIndex;
}; };
} // V2 } // V2

View file

@ -69,51 +69,50 @@ namespace V2
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] = const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
{//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
{0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
{0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_D {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D
{0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_256B_R {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_R
{0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
{0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
{0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_D {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D
{0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_R {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_R
{0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
{0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_D {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D
{0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_R {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_R
{0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_Z {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
{0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_S {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
{0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_VAR_D {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
{0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_R {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_Z_T {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_Z_T
{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_S_T {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T
{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0}, // ADDR_SW_64KB_D_T {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T
{0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0}, // ADDR_SW_64KB_R_T {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0}, // ADDR_SW_64KB_R_T
{0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_Z_x {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_Z_x
{0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_S_x {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_x
{0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_4KB_D_x {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_x
{0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_4KB_R_x {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0}, // ADDR_SW_4KB_R_x
{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_Z_X {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X
{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_S_X {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X
{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_64KB_D_X {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X
{0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_R_X {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0}, // ADDR_SW_64KB_R_X
{0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_Z_X {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
{0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_S_X {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
{0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_VAR_D_X {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
{0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_VAR_R_X {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
}; };
const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 6, 5, 4, 3, 2, 1, 0};
8, 6, 5, 4, 3, 2, 1, 0};
const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}}; const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
@ -130,8 +129,7 @@ const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2
*/ */
Gfx9Lib::Gfx9Lib(const Client* pClient) Gfx9Lib::Gfx9Lib(const Client* pClient)
: :
Lib(pClient), Lib(pClient)
m_numEquations(0)
{ {
m_class = AI_ADDRLIB; m_class = AI_ADDRLIB;
memset(&m_settings, 0, sizeof(m_settings)); memset(&m_settings, 0, sizeof(m_settings));
@ -281,8 +279,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
) const ) const
{ {
// TODO: Clarify with AddrLib team ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
// ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned, UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
pIn->swizzleMode); pIn->swizzleMode);
@ -687,7 +684,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
*/ */
UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
{ {
return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB); return Size64K;
} }
/** /**
@ -722,7 +719,7 @@ UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
if (m_settings.metaBaseAlignFix) if (m_settings.metaBaseAlignFix)
{ {
maxBaseAlignHtile = Max(maxBaseAlignHtile, GetBlockSize(ADDR_SW_64KB)); maxBaseAlignHtile = Max(maxBaseAlignHtile, Size64K);
} }
if (m_settings.htileAlignFix) if (m_settings.htileAlignFix)
@ -745,7 +742,7 @@ UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
if (m_settings.metaBaseAlignFix) if (m_settings.metaBaseAlignFix)
{ {
maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, GetBlockSize(ADDR_SW_64KB)); maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, Size64K);
} }
return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D)); return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
@ -1222,11 +1219,6 @@ BOOL_32 Gfx9Lib::HwlInitGlobalParams(
break; break;
} }
m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
if ((m_rbPerSeLog2 == 1) && if ((m_rbPerSeLog2 == 1) &&
(((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) || (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2))))) ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
@ -1241,6 +1233,9 @@ BOOL_32 Gfx9Lib::HwlInitGlobalParams(
m_settings.htileCacheRbConflict = 1; m_settings.htileCacheRbConflict = 1;
} }
} }
// For simplicity we never allow VAR swizzle mode for GFX9, the actural value is 18 on GFX9
m_blockVarSizeLog2 = 0;
} }
else else
{ {
@ -2164,6 +2159,7 @@ BOOL_32 Gfx9Lib::IsEquationSupported(
UINT_32 elementBytesLog2) const UINT_32 elementBytesLog2) const
{ {
BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) && BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
(IsValidSwMode(swMode) == TRUE) &&
(IsLinear(swMode) == FALSE) && (IsLinear(swMode) == FALSE) &&
(((IsTex2d(rsrcType) == TRUE) && (((IsTex2d(rsrcType) == TRUE) &&
((elementBytesLog2 < 4) || ((elementBytesLog2 < 4) ||
@ -2197,7 +2193,7 @@ VOID Gfx9Lib::InitEquationTable()
AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D); AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
// Loop all possible swizzle mode // Loop all possible swizzle mode
for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++) for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
{ {
AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx); AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
@ -2209,7 +2205,7 @@ VOID Gfx9Lib::InitEquationTable()
// Check if the input is supported // Check if the input is supported
if (IsEquationSupported(rsrcType, swMode, bppIdx)) if (IsEquationSupported(rsrcType, swMode, bppIdx))
{ {
ADDR_EQUATION equation; ADDR_EQUATION equation;
ADDR_E_RETURNCODE retCode; ADDR_E_RETURNCODE retCode;
memset(&equation, 0, sizeof(ADDR_EQUATION)); memset(&equation, 0, sizeof(ADDR_EQUATION));
@ -2954,14 +2950,10 @@ BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
case ADDR_SW_4KB_R: case ADDR_SW_4KB_R:
case ADDR_SW_64KB_D: case ADDR_SW_64KB_D:
case ADDR_SW_64KB_R: case ADDR_SW_64KB_R:
case ADDR_SW_VAR_D:
case ADDR_SW_VAR_R:
case ADDR_SW_4KB_D_X: case ADDR_SW_4KB_D_X:
case ADDR_SW_4KB_R_X: case ADDR_SW_4KB_R_X:
case ADDR_SW_64KB_D_X: case ADDR_SW_64KB_D_X:
case ADDR_SW_64KB_R_X: case ADDR_SW_64KB_R_X:
case ADDR_SW_VAR_D_X:
case ADDR_SW_VAR_R_X:
support = (pIn->bpp <= 64); support = (pIn->bpp <= 64);
break; break;
@ -2975,22 +2967,18 @@ BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
{ {
case ADDR_SW_4KB_D: case ADDR_SW_4KB_D:
case ADDR_SW_64KB_D: case ADDR_SW_64KB_D:
case ADDR_SW_VAR_D:
case ADDR_SW_64KB_D_T: case ADDR_SW_64KB_D_T:
case ADDR_SW_4KB_D_X: case ADDR_SW_4KB_D_X:
case ADDR_SW_64KB_D_X: case ADDR_SW_64KB_D_X:
case ADDR_SW_VAR_D_X:
support = (pIn->bpp == 64); support = (pIn->bpp == 64);
break; break;
case ADDR_SW_LINEAR: case ADDR_SW_LINEAR:
case ADDR_SW_4KB_S: case ADDR_SW_4KB_S:
case ADDR_SW_64KB_S: case ADDR_SW_64KB_S:
case ADDR_SW_VAR_S:
case ADDR_SW_64KB_S_T: case ADDR_SW_64KB_S_T:
case ADDR_SW_4KB_S_X: case ADDR_SW_4KB_S_X:
case ADDR_SW_64KB_S_X: case ADDR_SW_64KB_S_X:
case ADDR_SW_VAR_S_X:
support = (pIn->bpp <= 64); support = (pIn->bpp <= 64);
break; break;
@ -3210,7 +3198,7 @@ BOOL_32 Gfx9Lib::ValidateSwModeParams(
{ {
BOOL_32 valid = TRUE; BOOL_32 valid = TRUE;
if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE) if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
{ {
ADDR_ASSERT_ALWAYS(); ADDR_ASSERT_ALWAYS();
valid = FALSE; valid = FALSE;
@ -3229,7 +3217,6 @@ BOOL_32 Gfx9Lib::ValidateSwModeParams(
const AddrSwizzleMode swizzle = pIn->swizzleMode; const AddrSwizzleMode swizzle = pIn->swizzleMode;
const BOOL_32 linear = IsLinear(swizzle); const BOOL_32 linear = IsLinear(swizzle);
const BOOL_32 blk256B = IsBlock256b(swizzle); const BOOL_32 blk256B = IsBlock256b(swizzle);
const BOOL_32 blkVar = IsBlockVariable(swizzle);
const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle); const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
const ADDR2_SURFACE_FLAGS flags = pIn->flags; const ADDR2_SURFACE_FLAGS flags = pIn->flags;
@ -3337,11 +3324,6 @@ BOOL_32 Gfx9Lib::ValidateSwModeParams(
valid = FALSE; valid = FALSE;
} }
} }
else if (blkVar)
{
ADDR_ASSERT_ALWAYS();
valid = FALSE;
}
return valid; return valid;
} }
@ -3444,12 +3426,22 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
if (ValidateNonSwModeParams(&localIn)) if (ValidateNonSwModeParams(&localIn))
{ {
// Forbid swizzle mode(s) by client setting, for simplicity we never allow VAR swizzle mode for GFX9 // Forbid swizzle mode(s) by client setting
ADDR2_SWMODE_SET allowedSwModeSet = {}; ADDR2_SWMODE_SET allowedSwModeSet = {};
allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask; allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx9Blk256BSwModeMask; allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx9Blk256BSwModeMask;
allowedSwModeSet.value |= pIn->forbiddenBlock.macro4KB ? 0 : Gfx9Blk4KBSwModeMask; allowedSwModeSet.value |=
allowedSwModeSet.value |= pIn->forbiddenBlock.macro64KB ? 0 : Gfx9Blk64KBSwModeMask; pIn->forbiddenBlock.macroThin4KB ? 0 :
((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask);
allowedSwModeSet.value |=
pIn->forbiddenBlock.macroThick4KB ? 0 :
((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick4KBSwModeMask : 0);
allowedSwModeSet.value |=
pIn->forbiddenBlock.macroThin64KB ? 0 :
((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask);
allowedSwModeSet.value |=
pIn->forbiddenBlock.macroThick64KB ? 0 :
((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick64KBSwModeMask : 0);
if (pIn->preferredSwSet.value != 0) if (pIn->preferredSwSet.value != 0)
{ {
@ -3466,17 +3458,17 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
if (pIn->maxAlign > 0) if (pIn->maxAlign > 0)
{ {
if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB)) if (pIn->maxAlign < Size64K)
{ {
allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask; allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
} }
if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB)) if (pIn->maxAlign < Size4K)
{ {
allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask; allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
} }
if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B)) if (pIn->maxAlign < Size256)
{ {
allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask; allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
} }
@ -3583,7 +3575,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
} }
if ((numFrags > 1) && if ((numFrags > 1) &&
(GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags))) (Size4K < (m_pipeInterleaveBytes * numFrags)))
{ {
// MSAA surface must have blk_bytes/pipe_interleave >= num_samples // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask; allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
@ -3630,7 +3622,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
pOut->validSwModeSet = allowedSwModeSet; pOut->validSwModeSet = allowedSwModeSet;
pOut->canXor = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE; pOut->canXor = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet); pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet); pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
pOut->clientPreferredSwSet = pIn->preferredSwSet; pOut->clientPreferredSwSet = pIn->preferredSwSet;
@ -3640,6 +3632,12 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
pOut->clientPreferredSwSet.value = AddrSwSetAll; pOut->clientPreferredSwSet.value = AddrSwSetAll;
} }
// Apply optional restrictions
if (pIn->flags.needEquation)
{
FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
}
if (allowedSwModeSet.value == Gfx9LinearSwModeMask) if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
{ {
pOut->swizzleMode = ADDR_SW_LINEAR; pOut->swizzleMode = ADDR_SW_LINEAR;
@ -3649,15 +3647,26 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
// Always ignore linear swizzle mode if there is other choice. // Always ignore linear swizzle mode if there is other choice.
allowedSwModeSet.swLinear = 0; allowedSwModeSet.swLinear = 0;
ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet); ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
// Determine block size if there is 2 or more block type candidates // Determine block size if there is 2 or more block type candidates
if (IsPow2(allowedBlockSet.value) == FALSE) if (IsPow2(allowedBlockSet.value) == FALSE)
{ {
const AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {ADDR_SW_256B, ADDR_SW_4KB, ADDR_SW_64KB}; AddrSwizzleMode swMode[AddrBlockMaxTiledType] = { ADDR_SW_LINEAR };
Dim3d blkDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
Dim3d padDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}}; swMode[AddrBlockMicro] = ADDR_SW_256B_D;
UINT_64 padSize[AddrBlockMaxTiledType] = {0}; swMode[AddrBlockThin4KB] = ADDR_SW_4KB_D;
swMode[AddrBlockThin64KB] = ADDR_SW_64KB_D;
if (pOut->resourceType == ADDR_RSRC_TEX_3D)
{
swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S;
swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
}
Dim3d blkDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}};
Dim3d padDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}};
UINT_64 padSize[AddrBlockMaxTiledType] = {0};
const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2); const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1); const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
@ -3683,7 +3692,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
} }
padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]); padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement); padSize[i] = PowTwoAlign(padSize[i] * numFrags, sizeAlignInElement);
if ((minSize == 0) || if ((minSize == 0) ||
((padSize[i] * ratioHi) <= (minSize * ratioLow))) ((padSize[i] * ratioHi) <= (minSize * ratioLow)))
@ -3697,28 +3706,41 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
if ((allowedBlockSet.micro == TRUE) && if ((allowedBlockSet.micro == TRUE) &&
(width <= blkDim[AddrBlockMicro].w) && (width <= blkDim[AddrBlockMicro].w) &&
(height <= blkDim[AddrBlockMicro].h) && (height <= blkDim[AddrBlockMicro].h) &&
(NextPow2(pIn->minSizeAlign) <= GetBlockSize(ADDR_SW_256B))) (NextPow2(pIn->minSizeAlign) <= Size256))
{ {
minSizeBlk = AddrBlockMicro; minSizeBlk = AddrBlockMicro;
} }
if (minSizeBlk == AddrBlockMicro) if (minSizeBlk == AddrBlockMicro)
{ {
ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
allowedSwModeSet.value &= Gfx9Blk256BSwModeMask; allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
} }
else if (minSizeBlk == AddrBlock4KB) else if (minSizeBlk == AddrBlockThick4KB)
{ {
allowedSwModeSet.value &= Gfx9Blk4KBSwModeMask; ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
allowedSwModeSet.value &= Gfx9Rsrc3dThick4KBSwModeMask;
}
else if (minSizeBlk == AddrBlockThin4KB)
{
allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask;
}
else if (minSizeBlk == AddrBlockThick64KB)
{
ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
allowedSwModeSet.value &= Gfx9Rsrc3dThick64KBSwModeMask;
} }
else else
{ {
ADDR_ASSERT(minSizeBlk == AddrBlock64KB); ADDR_ASSERT(minSizeBlk == AddrBlockThin64KB);
allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask; allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask;
} }
} }
// Block type should be determined. // Block type should be determined.
ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet).value)); ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet); ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
@ -3775,7 +3797,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
{ {
allowedSwModeSet.value &= Gfx9RotateSwModeMask; allowedSwModeSet.value &= Gfx9RotateSwModeMask;
} }
else if (displayRsrc && allowedSwSet.sw_D) else if (allowedSwSet.sw_D)
{ {
allowedSwModeSet.value &= Gfx9DisplaySwModeMask; allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
} }
@ -3794,8 +3816,8 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
// Swizzle type should be determined. // Swizzle type should be determined.
ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value)); ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
// Determine swizzle mode now - always select the "largest" swizzle mode for a given block type + // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle
// swizzle type combination. For example, for AddrBlock64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's // type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
// available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9). // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value)); pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
} }
@ -3848,13 +3870,13 @@ ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2); const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2);
const UINT_32 bppLog2 = Log2(pIn->bpp >> 3); const UINT_32 bppLog2 = Log2(pIn->bpp >> 3);
const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1; const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
ASSERTED const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex]; const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
ADDR_ASSERT(maxYCoordBlock256 == ADDR_ASSERT(maxYCoordBlock256 ==
GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1)); GetMaxValidChannelIndex(&pEqToCheck->addr[0], Log2Size256, 1));
const UINT_32 maxYCoordInBaseEquation = const UINT_32 maxYCoordInBaseEquation =
(blkSizeLog2 - GetBlockSizeLog2(ADDR_SW_256B)) / 2 + maxYCoordBlock256; (blkSizeLog2 - Log2Size256) / 2 + maxYCoordBlock256;
ADDR_ASSERT(maxYCoordInBaseEquation == ADDR_ASSERT(maxYCoordInBaseEquation ==
GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1)); GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
@ -4548,7 +4570,7 @@ Dim3d Gfx9Lib::GetMipStartPos(
// Report mip in tail if Mip0 is already in mip tail // Report mip in tail if Mip0 is already in mip tail
BOOL_32 inMipTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth); BOOL_32 inMipTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode); UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
UINT_32 mipIndexInTail = mipId; UINT_32 mipIndexInTail = mipId;
if (inMipTail == FALSE) if (inMipTail == FALSE)
@ -4598,7 +4620,7 @@ Dim3d Gfx9Lib::GetMipStartPos(
if (IsThick(resourceType, swizzleMode)) if (IsThick(resourceType, swizzleMode))
{ {
UINT_32 dim = log2blkSize % 3; UINT_32 dim = log2BlkSize % 3;
if (dim == 0) if (dim == 0)
{ {
@ -4618,7 +4640,7 @@ Dim3d Gfx9Lib::GetMipStartPos(
} }
else else
{ {
if (log2blkSize & 1) if (log2BlkSize & 1)
{ {
inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1); inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
} }
@ -4648,7 +4670,7 @@ Dim3d Gfx9Lib::GetMipStartPos(
if (inMipTail) if (inMipTail)
{ {
UINT_32 index = mipIndexInTail + MaxMacroBits - log2blkSize; UINT_32 index = mipIndexInTail + MaxMacroBits - log2BlkSize;
ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32)); ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
*pMipTailBytesOffset = MipTailOffset256B[index] << 8; *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
} }
@ -4729,7 +4751,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
if (IsThin(pIn->resourceType, pIn->swizzleMode)) if (IsThin(pIn->resourceType, pIn->swizzleMode))
{ {
UINT_32 blockOffset = 0; UINT_32 blockOffset = 0;
UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode); UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
if (IsZOrderSwizzle(pIn->swizzleMode)) if (IsZOrderSwizzle(pIn->swizzleMode))
{ {
@ -4774,7 +4796,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8; MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
// Sample bits start location // Sample bits start location
UINT_32 sampleStart = log2blkSize - Log2(pIn->numSamples); UINT_32 sampleStart = log2BlkSize - Log2(pIn->numSamples);
// Join sample bits information to the highest Macro block bits // Join sample bits information to the highest Macro block bits
if (IsNonPrtXor(pIn->swizzleMode)) if (IsNonPrtXor(pIn->swizzleMode))
{ {
@ -4787,7 +4809,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
// after this op, the blockOffset only contains log2 Macro block size bits // after this op, the blockOffset only contains log2 Macro block size bits
blockOffset %= (1 << sampleStart); blockOffset %= (1 << sampleStart);
blockOffset |= (pIn->sample << sampleStart); blockOffset |= (pIn->sample << sampleStart);
ADDR_ASSERT((blockOffset >> log2blkSize) == 0); ADDR_ASSERT((blockOffset >> log2BlkSize) == 0);
} }
} }
@ -4796,7 +4818,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
// Mask off bits above Macro block bits to keep page synonyms working for prt // Mask off bits above Macro block bits to keep page synonyms working for prt
if (IsPrt(pIn->swizzleMode)) if (IsPrt(pIn->swizzleMode))
{ {
blockOffset &= ((1 << log2blkSize) - 1); blockOffset &= ((1 << log2BlkSize) - 1);
} }
// Preserve offset inside pipe interleave // Preserve offset inside pipe interleave
@ -4804,13 +4826,13 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
blockOffset >>= m_pipeInterleaveLog2; blockOffset >>= m_pipeInterleaveLog2;
// Pipe/Se xor bits // Pipe/Se xor bits
pipeBits = GetPipeXorBits(log2blkSize); pipeBits = GetPipeXorBits(log2BlkSize);
// Pipe xor // Pipe xor
pipeXor = FoldXor2d(blockOffset, pipeBits); pipeXor = FoldXor2d(blockOffset, pipeBits);
blockOffset >>= pipeBits; blockOffset >>= pipeBits;
// Bank xor bits // Bank xor bits
bankBits = GetBankXorBits(log2blkSize); bankBits = GetBankXorBits(log2BlkSize);
// Bank Xor // Bank Xor
bankXor = FoldXor2d(blockOffset, bankBits); bankXor = FoldXor2d(blockOffset, bankBits);
blockOffset >>= bankBits; blockOffset >>= bankBits;
@ -4825,7 +4847,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
} }
ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset)); ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize))); ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
blockOffset |= mipTailBytesOffset; blockOffset |= mipTailBytesOffset;
@ -4840,7 +4862,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor, returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
bankBits, pipeBits, &blockOffset); bankBits, pipeBits, &blockOffset);
blockOffset %= (1 << log2blkSize); blockOffset %= (1 << log2BlkSize);
UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth; UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight; UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
@ -4850,11 +4872,11 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock + ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
((pIn->x / localOut.blockWidth) + mipStartPos.w); ((pIn->x / localOut.blockWidth) + mipStartPos.w);
pOut->addr = blockOffset | (macroBlockIndex << log2blkSize); pOut->addr = blockOffset | (macroBlockIndex << log2BlkSize);
} }
else else
{ {
UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode); UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
Dim3d microBlockDim = Block1K_3d[log2ElementBytes]; Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
@ -4871,7 +4893,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
// Mask off bits above Macro block bits to keep page synonyms working for prt // Mask off bits above Macro block bits to keep page synonyms working for prt
if (IsPrt(pIn->swizzleMode)) if (IsPrt(pIn->swizzleMode))
{ {
blockOffset &= ((1 << log2blkSize) - 1); blockOffset &= ((1 << log2BlkSize) - 1);
} }
// Preserve offset inside pipe interleave // Preserve offset inside pipe interleave
@ -4879,13 +4901,13 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
blockOffset >>= m_pipeInterleaveLog2; blockOffset >>= m_pipeInterleaveLog2;
// Pipe/Se xor bits // Pipe/Se xor bits
pipeBits = GetPipeXorBits(log2blkSize); pipeBits = GetPipeXorBits(log2BlkSize);
// Pipe xor // Pipe xor
pipeXor = FoldXor3d(blockOffset, pipeBits); pipeXor = FoldXor3d(blockOffset, pipeBits);
blockOffset >>= pipeBits; blockOffset >>= pipeBits;
// Bank xor bits // Bank xor bits
bankBits = GetBankXorBits(log2blkSize); bankBits = GetBankXorBits(log2BlkSize);
// Bank Xor // Bank Xor
bankXor = FoldXor3d(blockOffset, bankBits); bankXor = FoldXor3d(blockOffset, bankBits);
blockOffset >>= bankBits; blockOffset >>= bankBits;
@ -4900,13 +4922,13 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
} }
ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset)); ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize))); ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
blockOffset |= mipTailBytesOffset; blockOffset |= mipTailBytesOffset;
returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor, returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
bankBits, pipeBits, &blockOffset); bankBits, pipeBits, &blockOffset);
blockOffset %= (1 << log2blkSize); blockOffset %= (1 << log2BlkSize);
UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w; UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w;
UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h; UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
@ -4917,7 +4939,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
(localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock; (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
pOut->addr = blockOffset | (blockIndex << log2blkSize); pOut->addr = blockOffset | (blockIndex << log2BlkSize);
} }
} }
else else
@ -4996,5 +5018,59 @@ ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
return returnCode; return returnCode;
} }
/**
************************************************************************************************************************
* Gfx9Lib::ComputeThinBlockDimension
*
* @brief
* Internal function to get thin block width/height/depth in element from surface input params.
*
* @return
* N/A
************************************************************************************************************************
*/
VOID Gfx9Lib::ComputeThinBlockDimension(
UINT_32* pWidth,
UINT_32* pHeight,
UINT_32* pDepth,
UINT_32 bpp,
UINT_32 numSamples,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
ADDR_ASSERT(IsThin(resourceType, swizzleMode));
const UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
const UINT_32 eleBytes = bpp >> 3;
const UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
const UINT_32 log2blkSizeIn256B = log2BlkSize - 8;
const UINT_32 widthAmp = log2blkSizeIn256B / 2;
const UINT_32 heightAmp = log2blkSizeIn256B - widthAmp;
ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));
*pWidth = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
*pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
*pDepth = 1;
if (numSamples > 1)
{
const UINT_32 log2sample = Log2(numSamples);
const UINT_32 q = log2sample >> 1;
const UINT_32 r = log2sample & 1;
if (log2BlkSize & 1)
{
*pWidth >>= q;
*pHeight >>= (q + r);
}
else
{
*pWidth >>= (q + r);
*pHeight >>= q;
}
}
}
} // V2 } // V2
} // Addr } // Addr

View file

@ -114,49 +114,32 @@ const UINT_32 Gfx9Blk64KBSwModeMask = (1u << ADDR_SW_64KB_Z) |
(1u << ADDR_SW_64KB_D_X) | (1u << ADDR_SW_64KB_D_X) |
(1u << ADDR_SW_64KB_R_X); (1u << ADDR_SW_64KB_R_X);
const UINT_32 Gfx9BlkVarSwModeMask = (1u << ADDR_SW_VAR_Z) |
(1u << ADDR_SW_VAR_S) |
(1u << ADDR_SW_VAR_D) |
(1u << ADDR_SW_VAR_R) |
(1u << ADDR_SW_VAR_Z_X) |
(1u << ADDR_SW_VAR_S_X) |
(1u << ADDR_SW_VAR_D_X) |
(1u << ADDR_SW_VAR_R_X);
const UINT_32 Gfx9ZSwModeMask = (1u << ADDR_SW_4KB_Z) | const UINT_32 Gfx9ZSwModeMask = (1u << ADDR_SW_4KB_Z) |
(1u << ADDR_SW_64KB_Z) | (1u << ADDR_SW_64KB_Z) |
(1u << ADDR_SW_VAR_Z) |
(1u << ADDR_SW_64KB_Z_T) | (1u << ADDR_SW_64KB_Z_T) |
(1u << ADDR_SW_4KB_Z_X) | (1u << ADDR_SW_4KB_Z_X) |
(1u << ADDR_SW_64KB_Z_X) | (1u << ADDR_SW_64KB_Z_X);
(1u << ADDR_SW_VAR_Z_X);
const UINT_32 Gfx9StandardSwModeMask = (1u << ADDR_SW_256B_S) | const UINT_32 Gfx9StandardSwModeMask = (1u << ADDR_SW_256B_S) |
(1u << ADDR_SW_4KB_S) | (1u << ADDR_SW_4KB_S) |
(1u << ADDR_SW_64KB_S) | (1u << ADDR_SW_64KB_S) |
(1u << ADDR_SW_VAR_S) |
(1u << ADDR_SW_64KB_S_T) | (1u << ADDR_SW_64KB_S_T) |
(1u << ADDR_SW_4KB_S_X) | (1u << ADDR_SW_4KB_S_X) |
(1u << ADDR_SW_64KB_S_X) | (1u << ADDR_SW_64KB_S_X);
(1u << ADDR_SW_VAR_S_X);
const UINT_32 Gfx9DisplaySwModeMask = (1u << ADDR_SW_256B_D) | const UINT_32 Gfx9DisplaySwModeMask = (1u << ADDR_SW_256B_D) |
(1u << ADDR_SW_4KB_D) | (1u << ADDR_SW_4KB_D) |
(1u << ADDR_SW_64KB_D) | (1u << ADDR_SW_64KB_D) |
(1u << ADDR_SW_VAR_D) |
(1u << ADDR_SW_64KB_D_T) | (1u << ADDR_SW_64KB_D_T) |
(1u << ADDR_SW_4KB_D_X) | (1u << ADDR_SW_4KB_D_X) |
(1u << ADDR_SW_64KB_D_X) | (1u << ADDR_SW_64KB_D_X);
(1u << ADDR_SW_VAR_D_X);
const UINT_32 Gfx9RotateSwModeMask = (1u << ADDR_SW_256B_R) | const UINT_32 Gfx9RotateSwModeMask = (1u << ADDR_SW_256B_R) |
(1u << ADDR_SW_4KB_R) | (1u << ADDR_SW_4KB_R) |
(1u << ADDR_SW_64KB_R) | (1u << ADDR_SW_64KB_R) |
(1u << ADDR_SW_VAR_R) |
(1u << ADDR_SW_64KB_R_T) | (1u << ADDR_SW_64KB_R_T) |
(1u << ADDR_SW_4KB_R_X) | (1u << ADDR_SW_4KB_R_X) |
(1u << ADDR_SW_64KB_R_X) | (1u << ADDR_SW_64KB_R_X);
(1u << ADDR_SW_VAR_R_X);
const UINT_32 Gfx9XSwModeMask = (1u << ADDR_SW_4KB_Z_X) | const UINT_32 Gfx9XSwModeMask = (1u << ADDR_SW_4KB_Z_X) |
(1u << ADDR_SW_4KB_S_X) | (1u << ADDR_SW_4KB_S_X) |
@ -165,11 +148,7 @@ const UINT_32 Gfx9XSwModeMask = (1u << ADDR_SW_4KB_Z_X) |
(1u << ADDR_SW_64KB_Z_X) | (1u << ADDR_SW_64KB_Z_X) |
(1u << ADDR_SW_64KB_S_X) | (1u << ADDR_SW_64KB_S_X) |
(1u << ADDR_SW_64KB_D_X) | (1u << ADDR_SW_64KB_D_X) |
(1u << ADDR_SW_64KB_R_X) | (1u << ADDR_SW_64KB_R_X);
(1u << ADDR_SW_VAR_Z_X) |
(1u << ADDR_SW_VAR_S_X) |
(1u << ADDR_SW_VAR_D_X) |
(1u << ADDR_SW_VAR_R_X);
const UINT_32 Gfx9TSwModeMask = (1u << ADDR_SW_64KB_Z_T) | const UINT_32 Gfx9TSwModeMask = (1u << ADDR_SW_64KB_Z_T) |
(1u << ADDR_SW_64KB_S_T) | (1u << ADDR_SW_64KB_S_T) |
@ -197,6 +176,16 @@ const UINT_32 Gfx9Rsrc3dPrtSwModeMask = Gfx9Rsrc2dPrtSwModeMask & ~Gfx9RotateSwM
const UINT_32 Gfx9Rsrc3dThinSwModeMask = Gfx9DisplaySwModeMask & ~Gfx9Blk256BSwModeMask; const UINT_32 Gfx9Rsrc3dThinSwModeMask = Gfx9DisplaySwModeMask & ~Gfx9Blk256BSwModeMask;
const UINT_32 Gfx9Rsrc3dThin4KBSwModeMask = Gfx9Rsrc3dThinSwModeMask & Gfx9Blk4KBSwModeMask;
const UINT_32 Gfx9Rsrc3dThin64KBSwModeMask = Gfx9Rsrc3dThinSwModeMask & Gfx9Blk64KBSwModeMask;
const UINT_32 Gfx9Rsrc3dThickSwModeMask = Gfx9Rsrc3dSwModeMask & ~(Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask);
const UINT_32 Gfx9Rsrc3dThick4KBSwModeMask = Gfx9Rsrc3dThickSwModeMask & Gfx9Blk4KBSwModeMask;
const UINT_32 Gfx9Rsrc3dThick64KBSwModeMask = Gfx9Rsrc3dThickSwModeMask & Gfx9Blk64KBSwModeMask;
const UINT_32 Gfx9MsaaSwModeMask = Gfx9AllSwModeMask & ~Gfx9Blk256BSwModeMask & ~Gfx9LinearSwModeMask; const UINT_32 Gfx9MsaaSwModeMask = Gfx9AllSwModeMask & ~Gfx9Blk256BSwModeMask & ~Gfx9LinearSwModeMask;
const UINT_32 Dce12NonBpp32SwModeMask = (1u << ADDR_SW_LINEAR) | const UINT_32 Dce12NonBpp32SwModeMask = (1u << ADDR_SW_LINEAR) |
@ -204,14 +193,10 @@ const UINT_32 Dce12NonBpp32SwModeMask = (1u << ADDR_SW_LINEAR) |
(1u << ADDR_SW_4KB_R) | (1u << ADDR_SW_4KB_R) |
(1u << ADDR_SW_64KB_D) | (1u << ADDR_SW_64KB_D) |
(1u << ADDR_SW_64KB_R) | (1u << ADDR_SW_64KB_R) |
(1u << ADDR_SW_VAR_D) |
(1u << ADDR_SW_VAR_R) |
(1u << ADDR_SW_4KB_D_X) | (1u << ADDR_SW_4KB_D_X) |
(1u << ADDR_SW_4KB_R_X) | (1u << ADDR_SW_4KB_R_X) |
(1u << ADDR_SW_64KB_D_X) | (1u << ADDR_SW_64KB_D_X) |
(1u << ADDR_SW_64KB_R_X) | (1u << ADDR_SW_64KB_R_X);
(1u << ADDR_SW_VAR_D_X) |
(1u << ADDR_SW_VAR_R_X);
const UINT_32 Dce12Bpp32SwModeMask = (1u << ADDR_SW_256B_D) | const UINT_32 Dce12Bpp32SwModeMask = (1u << ADDR_SW_256B_D) |
(1u << ADDR_SW_256B_R) | (1u << ADDR_SW_256B_R) |
@ -220,19 +205,14 @@ const UINT_32 Dce12Bpp32SwModeMask = (1u << ADDR_SW_256B_D) |
const UINT_32 Dcn1NonBpp64SwModeMask = (1u << ADDR_SW_LINEAR) | const UINT_32 Dcn1NonBpp64SwModeMask = (1u << ADDR_SW_LINEAR) |
(1u << ADDR_SW_4KB_S) | (1u << ADDR_SW_4KB_S) |
(1u << ADDR_SW_64KB_S) | (1u << ADDR_SW_64KB_S) |
(1u << ADDR_SW_VAR_S) |
(1u << ADDR_SW_64KB_S_T) | (1u << ADDR_SW_64KB_S_T) |
(1u << ADDR_SW_4KB_S_X) | (1u << ADDR_SW_4KB_S_X) |
(1u << ADDR_SW_64KB_S_X) | (1u << ADDR_SW_64KB_S_X);
(1u << ADDR_SW_VAR_S_X);
const UINT_32 Dcn1Bpp64SwModeMask = (1u << ADDR_SW_4KB_D) | const UINT_32 Dcn1Bpp64SwModeMask = (1u << ADDR_SW_4KB_D) |
(1u << ADDR_SW_64KB_D) | (1u << ADDR_SW_64KB_D) |
(1u << ADDR_SW_VAR_D) |
(1u << ADDR_SW_64KB_D_T) | (1u << ADDR_SW_64KB_D_T) |
(1u << ADDR_SW_4KB_D_X) | (1u << ADDR_SW_4KB_D_X) |
(1u << ADDR_SW_64KB_D_X) | (1u << ADDR_SW_64KB_D_X) |
(1u << ADDR_SW_VAR_D_X) |
Dcn1NonBpp64SwModeMask; Dcn1NonBpp64SwModeMask;
/** /**
@ -273,9 +253,6 @@ public:
return (pMem != NULL) ? new (pMem) Gfx9Lib(pClient) : NULL; return (pMem != NULL) ? new (pMem) Gfx9Lib(pClient) : NULL;
} }
virtual BOOL_32 IsValidDisplaySwizzleMode(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
protected: protected:
Gfx9Lib(const Client* pClient); Gfx9Lib(const Client* pClient);
virtual ~Gfx9Lib(); virtual ~Gfx9Lib();
@ -376,22 +353,6 @@ protected:
AddrSwizzleMode swMode, AddrSwizzleMode swMode,
UINT_32 elementBytesLog2) const; UINT_32 elementBytesLog2) const;
UINT_32 ComputeSurfaceBaseAlignTiled(AddrSwizzleMode swizzleMode) const
{
UINT_32 baseAlign;
if (IsXor(swizzleMode))
{
baseAlign = GetBlockSize(swizzleMode);
}
else
{
baseAlign = 256;
}
return baseAlign;
}
virtual ADDR_E_RETURNCODE HwlComputePipeBankXor( virtual ADDR_E_RETURNCODE HwlComputePipeBankXor(
const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const; ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const;
@ -423,6 +384,137 @@ protected:
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
virtual UINT_32 HwlComputeMaxBaseAlignments() const;
virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;
virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn);
virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);
virtual VOID ComputeThinBlockDimension(
UINT_32* pWidth,
UINT_32* pHeight,
UINT_32* pDepth,
UINT_32 bpp,
UINT_32 numSamples,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const;
private:
VOID GetRbEquation(CoordEq* pRbEq, UINT_32 rbPerSeLog2, UINT_32 seLog2) const;
VOID GetDataEquation(CoordEq* pDataEq, Gfx9DataType dataSurfaceType,
AddrSwizzleMode swizzleMode, AddrResourceType resourceType,
UINT_32 elementBytesLog2, UINT_32 numSamplesLog2) const;
VOID GetPipeEquation(CoordEq* pPipeEq, CoordEq* pDataEq,
UINT_32 pipeInterleaveLog2, UINT_32 numPipesLog2,
UINT_32 numSamplesLog2, Gfx9DataType dataSurfaceType,
AddrSwizzleMode swizzleMode, AddrResourceType resourceType) const;
VOID GenMetaEquation(CoordEq* pMetaEq, UINT_32 maxMip,
UINT_32 elementBytesLog2, UINT_32 numSamplesLog2,
ADDR2_META_FLAGS metaFlag, Gfx9DataType dataSurfaceType,
AddrSwizzleMode swizzleMode, AddrResourceType resourceType,
UINT_32 metaBlkWidthLog2, UINT_32 metaBlkHeightLog2,
UINT_32 metaBlkDepthLog2, UINT_32 compBlkWidthLog2,
UINT_32 compBlkHeightLog2, UINT_32 compBlkDepthLog2) const;
const CoordEq* GetMetaEquation(const MetaEqParams& metaEqParams);
VOID GetMetaMipInfo(UINT_32 numMipLevels, Dim3d* pMetaBlkDim,
BOOL_32 dataThick, ADDR2_META_MIP_INFO* pInfo,
UINT_32 mip0Width, UINT_32 mip0Height, UINT_32 mip0Depth,
UINT_32* pNumMetaBlkX, UINT_32* pNumMetaBlkY, UINT_32* pNumMetaBlkZ) const;
BOOL_32 IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
ADDR_E_RETURNCODE ComputeSurfaceLinearPadding(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
UINT_32* pMipmap0PaddedWidth,
UINT_32* pSlice0PaddedHeight,
ADDR2_MIP_INFO* pMipInfo = NULL) const;
static ADDR2_BLOCK_SET GetAllowedBlockSet(ADDR2_SWMODE_SET allowedSwModeSet, AddrResourceType rsrcType)
{
ADDR2_BLOCK_SET allowedBlockSet = {};
allowedBlockSet.micro = (allowedSwModeSet.value & Gfx9Blk256BSwModeMask) ? TRUE : FALSE;
allowedBlockSet.linear = (allowedSwModeSet.value & Gfx9LinearSwModeMask) ? TRUE : FALSE;
if (rsrcType == ADDR_RSRC_TEX_3D)
{
allowedBlockSet.macroThin4KB = (allowedSwModeSet.value & Gfx9Rsrc3dThin4KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macroThick4KB = (allowedSwModeSet.value & Gfx9Rsrc3dThick4KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macroThin64KB = (allowedSwModeSet.value & Gfx9Rsrc3dThin64KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macroThick64KB = (allowedSwModeSet.value & Gfx9Rsrc3dThick64KBSwModeMask) ? TRUE : FALSE;
}
else
{
allowedBlockSet.macroThin4KB = (allowedSwModeSet.value & Gfx9Blk4KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macroThin64KB = (allowedSwModeSet.value & Gfx9Blk64KBSwModeMask) ? TRUE : FALSE;
}
return allowedBlockSet;
}
static ADDR2_SWTYPE_SET GetAllowedSwSet(ADDR2_SWMODE_SET allowedSwModeSet)
{
ADDR2_SWTYPE_SET allowedSwSet = {};
allowedSwSet.sw_Z = (allowedSwModeSet.value & Gfx9ZSwModeMask) ? TRUE : FALSE;
allowedSwSet.sw_S = (allowedSwModeSet.value & Gfx9StandardSwModeMask) ? TRUE : FALSE;
allowedSwSet.sw_D = (allowedSwModeSet.value & Gfx9DisplaySwModeMask) ? TRUE : FALSE;
allowedSwSet.sw_R = (allowedSwModeSet.value & Gfx9RotateSwModeMask) ? TRUE : FALSE;
return allowedSwSet;
}
BOOL_32 IsInMipTail(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode,
Dim3d mipTailDim,
UINT_32 width,
UINT_32 height,
UINT_32 depth) const
{
BOOL_32 inTail = ((width <= mipTailDim.w) &&
(height <= mipTailDim.h) &&
(IsThin(resourceType, swizzleMode) || (depth <= mipTailDim.d)));
return inTail;
}
BOOL_32 ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
BOOL_32 ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
UINT_32 GetBankXorBits(UINT_32 macroBlockBits) const
{
UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
// Bank xor bits
UINT_32 bankBits = Min(macroBlockBits - pipeBits - m_pipeInterleaveLog2, m_banksLog2);
return bankBits;
}
UINT_32 ComputeSurfaceBaseAlignTiled(AddrSwizzleMode swizzleMode) const
{
UINT_32 baseAlign;
if (IsXor(swizzleMode))
{
baseAlign = GetBlockSize(swizzleMode);
}
else
{
baseAlign = 256;
}
return baseAlign;
}
// Initialize equation table // Initialize equation table
VOID InitEquationTable(); VOID InitEquationTable();
@ -522,127 +614,17 @@ protected:
return compressBlkDim; return compressBlkDim;
} }
static const UINT_32 MaxSeLog2 = 3; static const UINT_32 MaxSeLog2 = 3;
static const UINT_32 MaxRbPerSeLog2 = 2; static const UINT_32 MaxRbPerSeLog2 = 2;
static const Dim3d Block256_3dS[MaxNumOfBpp]; static const Dim3d Block256_3dS[MaxNumOfBpp];
static const Dim3d Block256_3dZ[MaxNumOfBpp]; static const Dim3d Block256_3dZ[MaxNumOfBpp];
static const UINT_32 MipTailOffset256B[]; static const UINT_32 MipTailOffset256B[];
static const SwizzleModeFlags SwizzleModeTable[ADDR_SW_MAX_TYPE]; static const SwizzleModeFlags SwizzleModeTable[ADDR_SW_MAX_TYPE];
// Max number of swizzle mode supported for equation static const UINT_32 MaxCachedMetaEq = 2;
static const UINT_32 MaxSwMode = 32;
// Max number of resource type (2D/3D) supported for equation
static const UINT_32 MaxRsrcType = 2;
// Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp)
static const UINT_32 MaxElementBytesLog2 = 5;
// Almost all swizzle mode + resource type support equation
static const UINT_32 EquationTableSize = MaxElementBytesLog2 * MaxSwMode * MaxRsrcType;
// Equation table
ADDR_EQUATION m_equationTable[EquationTableSize];
// Number of equation entries in the table
UINT_32 m_numEquations;
// Equation lookup table according to bpp and tile index
UINT_32 m_equationLookupTable[MaxRsrcType][MaxSwMode][MaxElementBytesLog2];
static const UINT_32 MaxCachedMetaEq = 2;
private:
virtual UINT_32 HwlComputeMaxBaseAlignments() const;
virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;
virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn);
VOID GetRbEquation(CoordEq* pRbEq, UINT_32 rbPerSeLog2, UINT_32 seLog2) const;
VOID GetDataEquation(CoordEq* pDataEq, Gfx9DataType dataSurfaceType,
AddrSwizzleMode swizzleMode, AddrResourceType resourceType,
UINT_32 elementBytesLog2, UINT_32 numSamplesLog2) const;
VOID GetPipeEquation(CoordEq* pPipeEq, CoordEq* pDataEq,
UINT_32 pipeInterleaveLog2, UINT_32 numPipesLog2,
UINT_32 numSamplesLog2, Gfx9DataType dataSurfaceType,
AddrSwizzleMode swizzleMode, AddrResourceType resourceType) const;
VOID GenMetaEquation(CoordEq* pMetaEq, UINT_32 maxMip,
UINT_32 elementBytesLog2, UINT_32 numSamplesLog2,
ADDR2_META_FLAGS metaFlag, Gfx9DataType dataSurfaceType,
AddrSwizzleMode swizzleMode, AddrResourceType resourceType,
UINT_32 metaBlkWidthLog2, UINT_32 metaBlkHeightLog2,
UINT_32 metaBlkDepthLog2, UINT_32 compBlkWidthLog2,
UINT_32 compBlkHeightLog2, UINT_32 compBlkDepthLog2) const;
const CoordEq* GetMetaEquation(const MetaEqParams& metaEqParams);
virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);
VOID GetMetaMipInfo(UINT_32 numMipLevels, Dim3d* pMetaBlkDim,
BOOL_32 dataThick, ADDR2_META_MIP_INFO* pInfo,
UINT_32 mip0Width, UINT_32 mip0Height, UINT_32 mip0Depth,
UINT_32* pNumMetaBlkX, UINT_32* pNumMetaBlkY, UINT_32* pNumMetaBlkZ) const;
ADDR_E_RETURNCODE ComputeSurfaceLinearPadding(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
UINT_32* pMipmap0PaddedWidth,
UINT_32* pSlice0PaddedHeight,
ADDR2_MIP_INFO* pMipInfo = NULL) const;
static ADDR2_BLOCK_SET GetAllowedBlockSet(ADDR2_SWMODE_SET allowedSwModeSet)
{
ADDR2_BLOCK_SET allowedBlockSet = {};
allowedBlockSet.micro = (allowedSwModeSet.value & Gfx9Blk256BSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macro4KB = (allowedSwModeSet.value & Gfx9Blk4KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macro64KB = (allowedSwModeSet.value & Gfx9Blk64KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.var = (allowedSwModeSet.value & Gfx9BlkVarSwModeMask) ? TRUE : FALSE;
allowedBlockSet.linear = (allowedSwModeSet.value & Gfx9LinearSwModeMask) ? TRUE : FALSE;
return allowedBlockSet;
}
static ADDR2_SWTYPE_SET GetAllowedSwSet(ADDR2_SWMODE_SET allowedSwModeSet)
{
ADDR2_SWTYPE_SET allowedSwSet = {};
allowedSwSet.sw_Z = (allowedSwModeSet.value & Gfx9ZSwModeMask) ? TRUE : FALSE;
allowedSwSet.sw_S = (allowedSwModeSet.value & Gfx9StandardSwModeMask) ? TRUE : FALSE;
allowedSwSet.sw_D = (allowedSwModeSet.value & Gfx9DisplaySwModeMask) ? TRUE : FALSE;
allowedSwSet.sw_R = (allowedSwModeSet.value & Gfx9RotateSwModeMask) ? TRUE : FALSE;
return allowedSwSet;
}
BOOL_32 IsInMipTail(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode,
Dim3d mipTailDim,
UINT_32 width,
UINT_32 height,
UINT_32 depth) const
{
BOOL_32 inTail = ((width <= mipTailDim.w) &&
(height <= mipTailDim.h) &&
(IsThin(resourceType, swizzleMode) || (depth <= mipTailDim.d)));
return inTail;
}
BOOL_32 ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
BOOL_32 ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
UINT_32 GetBankXorBits(UINT_32 macroBlockBits) const
{
UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
// Bank xor bits
UINT_32 bankBits = Min(macroBlockBits - pipeBits - m_pipeInterleaveLog2, m_banksLog2);
return bankBits;
}
Gfx9ChipSettings m_settings; Gfx9ChipSettings m_settings;

View file

@ -151,7 +151,6 @@ protected:
UINT_32 mipLevel, UINT_32 numSamples, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; UINT_32 mipLevel, UINT_32 numSamples, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
private: private:
VOID ReadGbTileMode( VOID ReadGbTileMode(
UINT_32 regValue, TileConfig* pCfg) const; UINT_32 regValue, TileConfig* pCfg) const;

View file

@ -76,7 +76,6 @@ struct SiChipSettings
UINT_32 isSpectre : 1; UINT_32 isSpectre : 1;
UINT_32 isSpooky : 1; UINT_32 isSpooky : 1;
UINT_32 isKalindi : 1; UINT_32 isKalindi : 1;
// Hawaii is GFXIP 7.2
UINT_32 isHawaii : 1; UINT_32 isHawaii : 1;
// VI // VI

View file

@ -85,7 +85,6 @@ ADDR_HANDLE amdgpu_addr_create(const struct radeon_info *info,
if (addrCreateInput.chipFamily >= FAMILY_AI) { if (addrCreateInput.chipFamily >= FAMILY_AI) {
addrCreateInput.chipEngine = CIASICIDGFXENGINE_ARCTICISLAND; addrCreateInput.chipEngine = CIASICIDGFXENGINE_ARCTICISLAND;
regValue.blockVarSizeLog2 = 0;
} else { } else {
regValue.noOfBanks = amdinfo->mc_arb_ramcfg & 0x3; regValue.noOfBanks = amdinfo->mc_arb_ramcfg & 0x3;
regValue.noOfRanks = (amdinfo->mc_arb_ramcfg & 0x4) >> 2; regValue.noOfRanks = (amdinfo->mc_arb_ramcfg & 0x4) >> 2;
@ -1599,11 +1598,9 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
case ADDR_SW_256B_S: case ADDR_SW_256B_S:
case ADDR_SW_4KB_S: case ADDR_SW_4KB_S:
case ADDR_SW_64KB_S: case ADDR_SW_64KB_S:
case ADDR_SW_VAR_S:
case ADDR_SW_64KB_S_T: case ADDR_SW_64KB_S_T:
case ADDR_SW_4KB_S_X: case ADDR_SW_4KB_S_X:
case ADDR_SW_64KB_S_X: case ADDR_SW_64KB_S_X:
case ADDR_SW_VAR_S_X:
surf->micro_tile_mode = RADEON_MICRO_MODE_THIN; surf->micro_tile_mode = RADEON_MICRO_MODE_THIN;
break; break;
@ -1612,11 +1609,9 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
case ADDR_SW_256B_D: case ADDR_SW_256B_D:
case ADDR_SW_4KB_D: case ADDR_SW_4KB_D:
case ADDR_SW_64KB_D: case ADDR_SW_64KB_D:
case ADDR_SW_VAR_D:
case ADDR_SW_64KB_D_T: case ADDR_SW_64KB_D_T:
case ADDR_SW_4KB_D_X: case ADDR_SW_4KB_D_X:
case ADDR_SW_64KB_D_X: case ADDR_SW_64KB_D_X:
case ADDR_SW_VAR_D_X:
surf->micro_tile_mode = RADEON_MICRO_MODE_DISPLAY; surf->micro_tile_mode = RADEON_MICRO_MODE_DISPLAY;
break; break;
@ -1624,7 +1619,6 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
case ADDR_SW_256B_R: case ADDR_SW_256B_R:
case ADDR_SW_4KB_R: case ADDR_SW_4KB_R:
case ADDR_SW_64KB_R: case ADDR_SW_64KB_R:
case ADDR_SW_VAR_R:
case ADDR_SW_64KB_R_T: case ADDR_SW_64KB_R_T:
case ADDR_SW_4KB_R_X: case ADDR_SW_4KB_R_X:
case ADDR_SW_64KB_R_X: case ADDR_SW_64KB_R_X:
@ -1641,7 +1635,6 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
/* Z = depth. */ /* Z = depth. */
case ADDR_SW_4KB_Z: case ADDR_SW_4KB_Z:
case ADDR_SW_64KB_Z: case ADDR_SW_64KB_Z:
case ADDR_SW_VAR_Z:
case ADDR_SW_64KB_Z_T: case ADDR_SW_64KB_Z_T:
case ADDR_SW_4KB_Z_X: case ADDR_SW_4KB_Z_X:
case ADDR_SW_64KB_Z_X: case ADDR_SW_64KB_Z_X: