From e01266335b2d91b3945c8553710062d558eaeb31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 4 Mar 2024 01:16:08 -0500 Subject: [PATCH] amd: update addrlib Acked-by: Samuel Pitoiset Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/addrlib/inc/addrinterface.h | 41 +- src/amd/addrlib/src/addrinterface.cpp | 2 - src/amd/addrlib/src/amdgpu_asic_addr.h | 20 +- src/amd/addrlib/src/chip/r800/si_gb_reg.h | 43 ++ src/amd/addrlib/src/core/addrcommon.h | 18 - src/amd/addrlib/src/core/addrlib.cpp | 17 +- src/amd/addrlib/src/core/addrlib2.cpp | 6 +- src/amd/addrlib/src/gfx10/gfx10addrlib.cpp | 530 +++++++++++++++++--- src/amd/addrlib/src/gfx10/gfx10addrlib.h | 2 + src/amd/addrlib/src/gfx11/gfx11addrlib.cpp | 549 ++++++++++++++++++--- src/amd/addrlib/src/gfx11/gfx11addrlib.h | 2 + src/amd/addrlib/src/gfx9/gfx9addrlib.cpp | 3 - src/amd/addrlib/src/r800/siaddrlib.cpp | 11 +- src/amd/addrlib/src/r800/siaddrlib.h | 3 + 14 files changed, 1041 insertions(+), 206 deletions(-) diff --git a/src/amd/addrlib/inc/addrinterface.h b/src/amd/addrlib/inc/addrinterface.h index ab44a75aa62..769d91b344b 100644 --- a/src/amd/addrlib/inc/addrinterface.h +++ b/src/amd/addrlib/inc/addrinterface.h @@ -23,8 +23,8 @@ extern "C" { #endif -#define ADDRLIB_VERSION_MAJOR 8 -#define ADDRLIB_VERSION_MINOR 9 +#define ADDRLIB_VERSION_MAJOR 9 +#define ADDRLIB_VERSION_MINOR 0 #define ADDRLIB_VERSION ((ADDRLIB_VERSION_MAJOR << 16) | ADDRLIB_VERSION_MINOR) /// Virtually all interface functions need ADDR_HANDLE as first parameter @@ -33,6 +33,13 @@ typedef VOID* ADDR_HANDLE; /// Client handle used in callbacks typedef VOID* ADDR_CLIENT_HANDLE; +typedef struct _ADDR_EXTENT3D +{ + UINT_32 width; + UINT_32 height; + UINT_32 depth; // also slices for 2D images +} ADDR_EXTENT3D; + /** * ///////////////////////////////////////////////////////////////////////////////////////////////// * // Callback functions @@ -44,6 +51,8 @@ typedef VOID* ADDR_CLIENT_HANDLE; * typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_DEBUGPRINT)( * const ADDR_DEBUGPRINT_INPUT* pInput); * +**/ +/** * ///////////////////////////////////////////////////////////////////////////////////////////////// * // Create/Destroy/Config functions * ///////////////////////////////////////////////////////////////////////////////////////////////// @@ -78,11 +87,15 @@ typedef VOID* ADDR_CLIENT_HANDLE; * AddrComputeFmaskAddrFromCoord() * AddrComputeFmaskCoordFromAddr() * +**/ +/** * ///////////////////////////////////////////////////////////////////////////////////////////////// * // Element/Utility functions * ///////////////////////////////////////////////////////////////////////////////////////////////// * ElemFlt32ToDepthPixel() * ElemFlt32ToColorPixel() +**/ +/** * AddrExtractBankPipeSwizzle() * AddrCombineBankPipeSwizzle() * AddrComputeSliceSwizzle() @@ -420,7 +433,6 @@ ADDR_E_RETURNCODE ADDR_API AddrDestroy( ADDR_HANDLE hLib); - //////////////////////////////////////////////////////////////////////////////////////////////////// // Surface functions //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1498,7 +1510,6 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskCoordFromAddr( ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut); - //////////////////////////////////////////////////////////////////////////////////////////////////// // Element/utility functions //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1821,7 +1832,6 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeBaseSwizzle( ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut); - /** **************************************************************************************************** * ELEM_GETEXPORTNORM_INPUT @@ -2329,7 +2339,6 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo( ADDR_COMPUTE_DCCINFO_OUTPUT* pOut); - /** **************************************************************************************************** * ADDR_GET_MAX_ALIGNMENTS_OUTPUT @@ -2395,7 +2404,7 @@ ADDR_E_RETURNCODE ADDR_API AddrGetMaxMetaAlignments( //////////////////////////////////////////////////////////////////////////////////////////////////// -// Surface functions for Gfx9 +// Surface functions for Addr2 //////////////////////////////////////////////////////////////////////////////////////////////////// /** @@ -2449,7 +2458,7 @@ typedef struct _ADDR2_COMPUTE_SURFACE_INFO_INPUT UINT_32 size; ///< Size of this structure in bytes ADDR2_SURFACE_FLAGS flags; ///< Surface flags - AddrSwizzleMode swizzleMode; ///< Swizzle Mode for Gfx9 + AddrSwizzleMode swizzleMode; ///< Swizzle Mode for Addr2 AddrResourceType resourceType; ///< Surface type AddrFormat format; ///< Surface format UINT_32 bpp; ///< bits per pixel @@ -2578,7 +2587,7 @@ typedef struct _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT UINT_32 sample; ///< Sample index, use fragment index for EQAA UINT_32 mipId; ///< the mip ID in mip chain - AddrSwizzleMode swizzleMode; ///< Swizzle mode for Gfx9 + AddrSwizzleMode swizzleMode; ///< Swizzle mode for Addr2 ADDR2_SURFACE_FLAGS flags; ///< Surface flags AddrResourceType resourceType; ///< Surface type UINT_32 bpp; ///< Bits per pixel @@ -2644,7 +2653,7 @@ typedef struct _ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT UINT_32 bitPosition; ///< Bit position in addr. 0-7. for surface bpp < 8, /// e.g. FMT_1; - AddrSwizzleMode swizzleMode; ///< Swizzle mode for Gfx9 + AddrSwizzleMode swizzleMode; ///< Swizzle mode for Addr2 ADDR2_SURFACE_FLAGS flags; ///< Surface flags AddrResourceType resourceType; ///< Surface type UINT_32 bpp; ///< Bits per pixel @@ -2696,7 +2705,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceCoordFromAddr( //////////////////////////////////////////////////////////////////////////////////////////////////// -// HTile functions for Gfx9 +// HTile functions for Addr2 //////////////////////////////////////////////////////////////////////////////////////////////////// /** @@ -2944,7 +2953,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileCoordFromAddr( //////////////////////////////////////////////////////////////////////////////////////////////////// -// C-mask functions for Gfx9 +// C-mask functions for Addr2 //////////////////////////////////////////////////////////////////////////////////////////////////// /** @@ -3169,7 +3178,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskCoordFromAddr( //////////////////////////////////////////////////////////////////////////////////////////////////// -// F-mask functions for Gfx9 +// F-mask functions for Addr2 //////////////////////////////////////////////////////////////////////////////////////////////////// /** @@ -3374,7 +3383,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskCoordFromAddr( //////////////////////////////////////////////////////////////////////////////////////////////////// -// DCC key functions for Gfx9 +// DCC key functions for Addr2 //////////////////////////////////////////////////////////////////////////////////////////////////// /** @@ -3552,7 +3561,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeDccAddrFromCoord( ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut); //////////////////////////////////////////////////////////////////////////////////////////////////// -// Misc functions for Gfx9 +// Misc functions for Addr2 //////////////////////////////////////////////////////////////////////////////////////////////////// /** @@ -3709,7 +3718,7 @@ typedef struct _ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT { UINT_32 size; ///< Size of this structure in bytes ADDR2_SURFACE_FLAGS flags; ///< Surface flags - AddrSwizzleMode swizzleMode; ///< Swizzle Mode for Gfx9 + AddrSwizzleMode swizzleMode; ///< Swizzle Mode for Addr2 AddrResourceType resourceType; ///< Surface type AddrFormat format; ///< Surface format UINT_32 width; ///< Width of mip0 in texels (not in compressed block) diff --git a/src/amd/addrlib/src/addrinterface.cpp b/src/amd/addrlib/src/addrinterface.cpp index 8351c240fcd..504ce515a1d 100644 --- a/src/amd/addrlib/src/addrinterface.cpp +++ b/src/amd/addrlib/src/addrinterface.cpp @@ -79,7 +79,6 @@ ADDR_E_RETURNCODE ADDR_API AddrDestroy( } - //////////////////////////////////////////////////////////////////////////////////////////////////// // Surface functions //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -528,7 +527,6 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo( } - /////////////////////////////////////////////////////////////////////////////// // Below functions are element related or helper functions /////////////////////////////////////////////////////////////////////////////// diff --git a/src/amd/addrlib/src/amdgpu_asic_addr.h b/src/amd/addrlib/src/amdgpu_asic_addr.h index 26f613b9266..86834de62b4 100644 --- a/src/amd/addrlib/src/amdgpu_asic_addr.h +++ b/src/amd/addrlib/src/amdgpu_asic_addr.h @@ -21,15 +21,14 @@ #define FAMILY_CI 0x78 //# 120 / Sea Islands: Bonaire, Hawaii #define FAMILY_KV 0x7D //# 125 / Kaveri APUs: Spectre, Spooky, Kalindi, Godavari #define FAMILY_VI 0x82 //# 130 / Volcanic Islands: Iceland, Tonga, Fiji -#define FAMILY_POLARIS 0x82 //# 130 / Polaris: 10, 11, 12 #define FAMILY_CZ 0x87 //# 135 / Carrizo APUs: Carrizo, Stoney #define FAMILY_AI 0x8D //# 141 / Vega: 10, 20 #define FAMILY_RV 0x8E //# 142 / Raven #define FAMILY_NV 0x8F //# 143 / Navi: 10 #define FAMILY_VGH 0x90 //# 144 / Van Gogh #define FAMILY_NV3 0x91 //# 145 / Navi: 3x -#define FAMILY_GFX1103 0x94 #define FAMILY_GFX1150 0x96 +#define FAMILY_GFX1103 0x94 #define FAMILY_RMB 0x92 //# 146 / Rembrandt #define FAMILY_RPL 0x95 //# 149 / Raphael #define FAMILY_MDN 0x97 //# 151 / Mendocino @@ -46,8 +45,8 @@ #define FAMILY_IS_AI(f) FAMILY_IS(f, AI) #define FAMILY_IS_RV(f) FAMILY_IS(f, RV) #define FAMILY_IS_NV(f) FAMILY_IS(f, NV) -#define FAMILY_IS_NV3(f) FAMILY_IS(f, NV3) #define FAMILY_IS_RMB(f) FAMILY_IS(f, RMB) +#define FAMILY_IS_NV3(f) FAMILY_IS(f, NV3) #define AMDGPU_UNKNOWN 0xFF @@ -99,16 +98,13 @@ #define AMDGPU_NAVI31_RANGE 0x01, 0x10 //# 01 <= x < 16 #define AMDGPU_NAVI32_RANGE 0x20, 0xFF //# 32 <= x < 255 #define AMDGPU_NAVI33_RANGE 0x10, 0x20 //# 16 <= x < 32 -#define AMDGPU_GFX1103_R1_RANGE 0x01, 0x80 //# 1 <= x < 128 -#define AMDGPU_GFX1103_R2_RANGE 0x80, 0xFF //# 128 <= x < max - #define AMDGPU_GFX1150_RANGE 0x01, 0x40 //# 1 <= x < 64 -#define AMDGPU_GFX1151_RANGE 0xC0, 0xFF //# 192 <= x < max +#define AMDGPU_GFX1151_RANGE 0xC0, 0xFF //# 192 <= x < 255 +#define AMDGPU_GFX1103_R1_RANGE 0x01, 0x80 //# 1 <= x < 128 +#define AMDGPU_GFX1103_R2_RANGE 0x80, 0xC0 //# 128 <= x < 192 #define AMDGPU_REMBRANDT_RANGE 0x01, 0xFF //# 01 <= x < 255 - #define AMDGPU_RAPHAEL_RANGE 0x01, 0xFF //# 1 <= x < max - #define AMDGPU_MENDOCINO_RANGE 0x01, 0xFF //# 1 <= x < max #define AMDGPU_EXPAND_FIX(x) x @@ -174,15 +170,13 @@ #define ASICREV_IS_NAVI31_P(r) ASICREV_IS(r, NAVI31) #define ASICREV_IS_NAVI32_P(r) ASICREV_IS(r, NAVI32) #define ASICREV_IS_NAVI33_P(r) ASICREV_IS(r, NAVI33) -#define ASICREV_IS_GFX1103_R1(r) ASICREV_IS(r, GFX1103_R1) -#define ASICREV_IS_GFX1103_R2(r) ASICREV_IS(r, GFX1103_R2) #define ASICREV_IS_GFX1150(r) ASICREV_IS(r, GFX1150) #define ASICREV_IS_GFX1151(r) ASICREV_IS(r, GFX1151) +#define ASICREV_IS_GFX1103_R1(r) ASICREV_IS(r, GFX1103_R1) +#define ASICREV_IS_GFX1103_R2(r) ASICREV_IS(r, GFX1103_R2) #define ASICREV_IS_REMBRANDT(r) ASICREV_IS(r, REMBRANDT) - #define ASICREV_IS_RAPHAEL(r) ASICREV_IS(r, RAPHAEL) - #define ASICREV_IS_MENDOCINO(r) ASICREV_IS(r, MENDOCINO) #endif // _AMDGPU_ASIC_ADDR_H diff --git a/src/amd/addrlib/src/chip/r800/si_gb_reg.h b/src/amd/addrlib/src/chip/r800/si_gb_reg.h index 329c5ac5067..c5bb578f98a 100644 --- a/src/amd/addrlib/src/chip/r800/si_gb_reg.h +++ b/src/amd/addrlib/src/chip/r800/si_gb_reg.h @@ -76,9 +76,52 @@ #endif +#if defined(LITTLEENDIAN_CPU) + + typedef struct _GB_ADDR_CONFIG_N { + unsigned int num_pipes : 3; + unsigned int pipe_interleave_size : 3; + unsigned int max_compressed_frags : 2; + unsigned int bank_interleave_size : 3; + unsigned int : 1; + unsigned int num_banks : 3; + unsigned int : 1; + unsigned int shader_engine_tile_size : 3; + unsigned int num_shader_engines : 2; + unsigned int num_gpus : 3; + unsigned int multi_gpu_tile_size : 2; + unsigned int num_rb_per_se : 2; + unsigned int row_size : 2; + unsigned int num_lower_pipes : 1; + unsigned int se_enable : 1; + } GB_ADDR_CONFIG_N; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _GB_ADDR_CONFIG_N { + unsigned int se_enable : 1; + unsigned int num_lower_pipes : 1; + unsigned int row_size : 2; + unsigned int num_rb_per_se : 2; + unsigned int multi_gpu_tile_size : 2; + unsigned int num_gpus : 3; + unsigned int num_shader_engines : 2; + unsigned int shader_engine_tile_size : 3; + unsigned int : 1; + unsigned int num_banks : 3; + unsigned int : 1; + unsigned int bank_interleave_size : 3; + unsigned int max_compressed_frags : 2; + unsigned int pipe_interleave_size : 3; + unsigned int num_pipes : 3; + } GB_ADDR_CONFIG_N; + +#endif + typedef union { unsigned int val : 32; GB_ADDR_CONFIG_T f; + GB_ADDR_CONFIG_N n; } GB_ADDR_CONFIG; #if defined(LITTLEENDIAN_CPU) diff --git a/src/amd/addrlib/src/core/addrcommon.h b/src/amd/addrlib/src/core/addrcommon.h index 28770bf8fec..71231e0a87c 100644 --- a/src/amd/addrlib/src/core/addrcommon.h +++ b/src/amd/addrlib/src/core/addrcommon.h @@ -659,24 +659,6 @@ static inline VOID SafeAssign( } } -/** -**************************************************************************************************** -* SafeAssign -* -* @brief -* NULL pointer safe assignment for AddrTileMode -**************************************************************************************************** -*/ -static inline VOID SafeAssign( - AddrTileMode* pLVal, ///< [in] Pointer to left val - AddrTileMode rVal) ///< [in] Right value -{ - if (pLVal) - { - *pLVal = rVal; - } -} - /** **************************************************************************************************** * RoundHalf diff --git a/src/amd/addrlib/src/core/addrlib.cpp b/src/amd/addrlib/src/core/addrlib.cpp index 407a1b38a5e..1cb1f9243b9 100644 --- a/src/amd/addrlib/src/core/addrlib.cpp +++ b/src/amd/addrlib/src/core/addrlib.cpp @@ -190,10 +190,10 @@ ADDR_E_RETURNCODE Lib::Create( case FAMILY_SI: pLib = SiHwlInit(&client); break; - case FAMILY_VI: - case FAMILY_CZ: // VI based fusion case FAMILY_CI: case FAMILY_KV: // CI based fusion + case FAMILY_VI: + case FAMILY_CZ: // VI based fusion pLib = CiHwlInit(&client); break; default: @@ -216,8 +216,8 @@ ADDR_E_RETURNCODE Lib::Create( pLib = Gfx10HwlInit(&client); break; case FAMILY_NV3: - case FAMILY_GFX1103: case FAMILY_GFX1150: + case FAMILY_GFX1103: pLib = Gfx11HwlInit(&client); break; default: @@ -231,6 +231,10 @@ ADDR_E_RETURNCODE Lib::Create( } } + if(pLib == NULL) + { + returnCode = ADDR_OUTOFMEMORY; + } if (pLib != NULL) { BOOL_32 initValid; @@ -269,6 +273,7 @@ ADDR_E_RETURNCODE Lib::Create( { delete pLib; pLib = NULL; + returnCode = ADDR_OUTOFMEMORY; ADDR_ASSERT_ALWAYS(); } else @@ -288,12 +293,6 @@ ADDR_E_RETURNCODE Lib::Create( pLib->SetMaxAlignments(); } - else if ((pLib == NULL) && - (returnCode == ADDR_OK)) - { - // Unknown failures, we return the general error code - returnCode = ADDR_ERROR; - } return returnCode; } diff --git a/src/amd/addrlib/src/core/addrlib2.cpp b/src/amd/addrlib/src/core/addrlib2.cpp index 55a87cdd154..11f1ba40bdc 100644 --- a/src/amd/addrlib/src/core/addrlib2.cpp +++ b/src/amd/addrlib/src/core/addrlib2.cpp @@ -115,13 +115,15 @@ Lib* Lib::GetLib( ADDR_HANDLE hLib) ///< [in] handle of ADDR_HANDLE { Addr::Lib* pAddrLib = Addr::Lib::GetLib(hLib); + if ((pAddrLib != NULL) && (pAddrLib->GetChipFamily() <= ADDR_CHIP_FAMILY_VI)) { - // only valid and GFX9+ ASIC can use AddrLib2 function. + // only GFX9+ ASIC can use AddrLib2 function. ADDR_ASSERT_ALWAYS(); hLib = NULL; } + return static_cast(hLib); } @@ -1170,6 +1172,7 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoordLinear( ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels); localIn.bpp = pIn->bpp; + localIn.swizzleMode = pIn->swizzleMode; localIn.flags = pIn->flags; localIn.width = Max(pIn->unalignedWidth, 1u); localIn.height = Max(pIn->unalignedHeight, 1u); @@ -1259,6 +1262,7 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddrLinear( ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0}; ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0}; localIn.bpp = pIn->bpp; + localIn.swizzleMode = pIn->swizzleMode; localIn.flags = pIn->flags; localIn.width = Max(pIn->unalignedWidth, 1u); localIn.height = Max(pIn->unalignedHeight, 1u); diff --git a/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp b/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp index ae080bd9a4a..f79289e1136 100644 --- a/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp +++ b/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp @@ -1058,7 +1058,6 @@ ChipFamily Gfx10Lib::HwlConvertChipFamily( ADDR_ASSERT(!"Unknown chip revision"); } break; - case FAMILY_RMB: if (ASICREV_IS_REMBRANDT(chipRevision)) { @@ -1467,7 +1466,6 @@ VOID Gfx10Lib::ConvertSwizzlePatternToEquation( if (IsXor(swMode) == FALSE) { - // Use simplified logic when we only have one bit-component for (UINT_32 i = elemLog2; i < blockSizeLog2; i++) { ADDR_ASSERT(IsPow2(pSwizzle[i].value)); @@ -1497,87 +1495,475 @@ VOID Gfx10Lib::ConvertSwizzlePatternToEquation( pEquation->addr[i].valid = 1; pEquation->addr[i].index = Log2(pSwizzle[i].z); } + + pEquation->xor1[i].value = 0; + pEquation->xor2[i].value = 0; } } - else + else if (IsThin(rsrcType, swMode)) { Dim3d dim; - ComputeBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, rsrcType, swMode); + ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode); const UINT_32 blkXLog2 = Log2(dim.w); const UINT_32 blkYLog2 = Log2(dim.h); - const UINT_32 blkZLog2 = Log2(dim.d); const UINT_32 blkXMask = dim.w - 1; const UINT_32 blkYMask = dim.h - 1; - const UINT_32 blkZMask = dim.d - 1; ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {}; - memcpy(&swizzle, pSwizzle, sizeof(swizzle)); UINT_32 xMask = 0; UINT_32 yMask = 0; - UINT_32 zMask = 0; + UINT_32 bMask = (1 << elemLog2) - 1; for (UINT_32 i = elemLog2; i < blockSizeLog2; i++) { - for (UINT_32 bitComp = 0; bitComp < ADDR_MAX_EQUATION_COMP; bitComp++) + if (IsPow2(pSwizzle[i].value)) { - if (swizzle[i].value == 0) + if (pSwizzle[i].x != 0) { - ADDR_ASSERT(bitComp != 0); // Bits above element size must have at least one addr-bit - ADDR_ASSERT(bitComp <= pPatInfo->maxItemCount); - break; - } + ADDR_ASSERT((xMask & pSwizzle[i].x) == 0); + xMask |= pSwizzle[i].x; - if (swizzle[i].x != 0) - { - const UINT_32 xLog2 = BitScanForward(swizzle[i].x); - swizzle[i].x = UnsetLeastBit(swizzle[i].x); - xMask |= (1 << xLog2); + const UINT_32 xLog2 = Log2(pSwizzle[i].x); - pEquation->comps[bitComp][i].channel = 0; - pEquation->comps[bitComp][i].valid = 1; - pEquation->comps[bitComp][i].index = xLog2 + elemLog2; - } - else if (swizzle[i].y != 0) - { - const UINT_32 yLog2 = BitScanForward(swizzle[i].y); - swizzle[i].y = UnsetLeastBit(swizzle[i].y); - yMask |= (1 << yLog2); + ADDR_ASSERT(xLog2 < blkXLog2); - pEquation->comps[bitComp][i].channel = 1; - pEquation->comps[bitComp][i].valid = 1; - pEquation->comps[bitComp][i].index = yLog2; - } - else if (swizzle[i].z != 0) - { - const UINT_32 zLog2 = BitScanForward(swizzle[i].z); - swizzle[i].z = UnsetLeastBit(swizzle[i].z); - zMask |= (1 << zLog2); - - pEquation->comps[bitComp][i].channel = 2; - pEquation->comps[bitComp][i].valid = 1; - pEquation->comps[bitComp][i].index = zLog2; + pEquation->addr[i].channel = 0; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = xLog2 + elemLog2; } else { - // This function doesn't handle MSAA (must update block dims, here, and consumers) - ADDR_ASSERT_ALWAYS(); + ADDR_ASSERT(pSwizzle[i].y != 0); + ADDR_ASSERT((yMask & pSwizzle[i].y) == 0); + yMask |= pSwizzle[i].y; + + pEquation->addr[i].channel = 1; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = Log2(pSwizzle[i].y); + + ADDR_ASSERT(pEquation->addr[i].index < blkYLog2); + } + + swizzle[i].value = 0; + bMask |= 1 << i; + } + else + { + if (pSwizzle[i].z != 0) + { + ADDR_ASSERT(IsPow2(static_cast(pSwizzle[i].z))); + + pEquation->xor2[i].channel = 2; + pEquation->xor2[i].valid = 1; + pEquation->xor2[i].index = Log2(pSwizzle[i].z); + } + + swizzle[i].x = pSwizzle[i].x; + swizzle[i].y = pSwizzle[i].y; + swizzle[i].z = swizzle[i].s = 0; + + ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE); + + const UINT_32 xHi = swizzle[i].x & (~blkXMask); + + if (xHi != 0) + { + ADDR_ASSERT(IsPow2(xHi)); + ADDR_ASSERT(pEquation->xor1[i].value == 0); + + pEquation->xor1[i].channel = 0; + pEquation->xor1[i].valid = 1; + pEquation->xor1[i].index = Log2(xHi) + elemLog2; + + swizzle[i].x &= blkXMask; + } + + const UINT_32 yHi = swizzle[i].y & (~blkYMask); + + if (yHi != 0) + { + ADDR_ASSERT(IsPow2(yHi)); + + if (xHi == 0) + { + ADDR_ASSERT(pEquation->xor1[i].value == 0); + pEquation->xor1[i].channel = 1; + pEquation->xor1[i].valid = 1; + pEquation->xor1[i].index = Log2(yHi); + } + else + { + ADDR_ASSERT(pEquation->xor2[i].value == 0); + pEquation->xor2[i].channel = 1; + pEquation->xor2[i].valid = 1; + pEquation->xor2[i].index = Log2(yHi); + } + + swizzle[i].y &= blkYMask; + } + + if (swizzle[i].value == 0) + { + bMask |= 1 << i; } } - ADDR_ASSERT(swizzle[i].value == 0); // We missed an xor? Are there too many? } - // We missed an address bit for coords inside the block? - // That means two coords will land on the same addr, which is bad. - ADDR_ASSERT(((xMask & blkXMask) == blkXMask) && - ((yMask & blkYMask) == blkYMask) && - ((zMask & blkZMask) == blkZMask)); - // We're sourcing from outside our block? That won't fly for PRTs, which need to be movable. - // Non-xor modes can also be used for 2D PRTs but they're handled in the simplified logic above. - ADDR_ASSERT((IsPrt(swMode) == false) || - ((xMask == blkXMask) && - (yMask == blkYMask) && - (zMask == blkZMask))); + const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1; + const UINT_32 blockMask = (1 << blockSizeLog2) - 1; + + ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask); + + while (bMask != blockMask) + { + for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++) + { + if ((bMask & (1 << i)) == 0) + { + if (IsPow2(swizzle[i].value)) + { + if (swizzle[i].x != 0) + { + ADDR_ASSERT((xMask & swizzle[i].x) == 0); + xMask |= swizzle[i].x; + + const UINT_32 xLog2 = Log2(swizzle[i].x); + + ADDR_ASSERT(xLog2 < blkXLog2); + + pEquation->addr[i].channel = 0; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = xLog2 + elemLog2; + } + else + { + ADDR_ASSERT(swizzle[i].y != 0); + ADDR_ASSERT((yMask & swizzle[i].y) == 0); + yMask |= swizzle[i].y; + + pEquation->addr[i].channel = 1; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = Log2(swizzle[i].y); + + ADDR_ASSERT(pEquation->addr[i].index < blkYLog2); + } + + swizzle[i].value = 0; + bMask |= 1 << i; + } + else + { + const UINT_32 x = swizzle[i].x & xMask; + const UINT_32 y = swizzle[i].y & yMask; + + if (x != 0) + { + ADDR_ASSERT(IsPow2(x)); + + if (pEquation->xor1[i].value == 0) + { + pEquation->xor1[i].channel = 0; + pEquation->xor1[i].valid = 1; + pEquation->xor1[i].index = Log2(x) + elemLog2; + } + else + { + ADDR_ASSERT(pEquation->xor2[i].value == 0); + pEquation->xor2[i].channel = 0; + pEquation->xor2[i].valid = 1; + pEquation->xor2[i].index = Log2(x) + elemLog2; + } + } + + if (y != 0) + { + ADDR_ASSERT(IsPow2(y)); + + if (pEquation->xor1[i].value == 0) + { + pEquation->xor1[i].channel = 1; + pEquation->xor1[i].valid = 1; + pEquation->xor1[i].index = Log2(y); + } + else + { + ADDR_ASSERT(pEquation->xor2[i].value == 0); + pEquation->xor2[i].channel = 1; + pEquation->xor2[i].valid = 1; + pEquation->xor2[i].index = Log2(y); + } + } + + swizzle[i].x &= ~x; + swizzle[i].y &= ~y; + } + } + } + } + + ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask)); + } + else + { + const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w; + const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h; + const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d; + const UINT_32 blkXMask = (1 << blkXLog2) - 1; + const UINT_32 blkYMask = (1 << blkYLog2) - 1; + const UINT_32 blkZMask = (1 << blkZLog2) - 1; + + ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {}; + UINT_32 xMask = 0; + UINT_32 yMask = 0; + UINT_32 zMask = 0; + UINT_32 bMask = (1 << elemLog2) - 1; + + for (UINT_32 i = elemLog2; i < blockSizeLog2; i++) + { + if (IsPow2(pSwizzle[i].value)) + { + if (pSwizzle[i].x != 0) + { + ADDR_ASSERT((xMask & pSwizzle[i].x) == 0); + xMask |= pSwizzle[i].x; + + const UINT_32 xLog2 = Log2(pSwizzle[i].x); + + ADDR_ASSERT(xLog2 < blkXLog2); + + pEquation->addr[i].channel = 0; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = xLog2 + elemLog2; + } + else if (pSwizzle[i].y != 0) + { + ADDR_ASSERT((yMask & pSwizzle[i].y) == 0); + yMask |= pSwizzle[i].y; + + pEquation->addr[i].channel = 1; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = Log2(pSwizzle[i].y); + + ADDR_ASSERT(pEquation->addr[i].index < blkYLog2); + } + else + { + ADDR_ASSERT(pSwizzle[i].z != 0); + ADDR_ASSERT((zMask & pSwizzle[i].z) == 0); + zMask |= pSwizzle[i].z; + + pEquation->addr[i].channel = 2; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = Log2(pSwizzle[i].z); + + ADDR_ASSERT(pEquation->addr[i].index < blkZLog2); + } + + swizzle[i].value = 0; + bMask |= 1 << i; + } + else + { + swizzle[i].x = pSwizzle[i].x; + swizzle[i].y = pSwizzle[i].y; + swizzle[i].z = pSwizzle[i].z; + swizzle[i].s = 0; + + ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE); + + const UINT_32 xHi = swizzle[i].x & (~blkXMask); + const UINT_32 yHi = swizzle[i].y & (~blkYMask); + const UINT_32 zHi = swizzle[i].z & (~blkZMask); + + ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0)); + + if (xHi != 0) + { + ADDR_ASSERT(IsPow2(xHi)); + ADDR_ASSERT(pEquation->xor1[i].value == 0); + + pEquation->xor1[i].channel = 0; + pEquation->xor1[i].valid = 1; + pEquation->xor1[i].index = Log2(xHi) + elemLog2; + + swizzle[i].x &= blkXMask; + } + + if (yHi != 0) + { + ADDR_ASSERT(IsPow2(yHi)); + + if (pEquation->xor1[i].value == 0) + { + pEquation->xor1[i].channel = 1; + pEquation->xor1[i].valid = 1; + pEquation->xor1[i].index = Log2(yHi); + } + else + { + ADDR_ASSERT(pEquation->xor2[i].value == 0); + pEquation->xor2[i].channel = 1; + pEquation->xor2[i].valid = 1; + pEquation->xor2[i].index = Log2(yHi); + } + + swizzle[i].y &= blkYMask; + } + + if (zHi != 0) + { + ADDR_ASSERT(IsPow2(zHi)); + + if (pEquation->xor1[i].value == 0) + { + pEquation->xor1[i].channel = 2; + pEquation->xor1[i].valid = 1; + pEquation->xor1[i].index = Log2(zHi); + } + else + { + ADDR_ASSERT(pEquation->xor2[i].value == 0); + pEquation->xor2[i].channel = 2; + pEquation->xor2[i].valid = 1; + pEquation->xor2[i].index = Log2(zHi); + } + + swizzle[i].z &= blkZMask; + } + + if (swizzle[i].value == 0) + { + bMask |= 1 << i; + } + } + } + + const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1; + const UINT_32 blockMask = (1 << blockSizeLog2) - 1; + + ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask); + + while (bMask != blockMask) + { + for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++) + { + if ((bMask & (1 << i)) == 0) + { + if (IsPow2(swizzle[i].value)) + { + if (swizzle[i].x != 0) + { + ADDR_ASSERT((xMask & swizzle[i].x) == 0); + xMask |= swizzle[i].x; + + const UINT_32 xLog2 = Log2(swizzle[i].x); + + ADDR_ASSERT(xLog2 < blkXLog2); + + pEquation->addr[i].channel = 0; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = xLog2 + elemLog2; + } + else if (swizzle[i].y != 0) + { + ADDR_ASSERT((yMask & swizzle[i].y) == 0); + yMask |= swizzle[i].y; + + pEquation->addr[i].channel = 1; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = Log2(swizzle[i].y); + + ADDR_ASSERT(pEquation->addr[i].index < blkYLog2); + } + else + { + ADDR_ASSERT(swizzle[i].z != 0); + ADDR_ASSERT((zMask & swizzle[i].z) == 0); + zMask |= swizzle[i].z; + + pEquation->addr[i].channel = 2; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = Log2(swizzle[i].z); + + ADDR_ASSERT(pEquation->addr[i].index < blkZLog2); + } + + swizzle[i].value = 0; + bMask |= 1 << i; + } + else + { + const UINT_32 x = swizzle[i].x & xMask; + const UINT_32 y = swizzle[i].y & yMask; + const UINT_32 z = swizzle[i].z & zMask; + + if (x != 0) + { + ADDR_ASSERT(IsPow2(x)); + + if (pEquation->xor1[i].value == 0) + { + pEquation->xor1[i].channel = 0; + pEquation->xor1[i].valid = 1; + pEquation->xor1[i].index = Log2(x) + elemLog2; + } + else + { + ADDR_ASSERT(pEquation->xor2[i].value == 0); + pEquation->xor2[i].channel = 0; + pEquation->xor2[i].valid = 1; + pEquation->xor2[i].index = Log2(x) + elemLog2; + } + } + + if (y != 0) + { + ADDR_ASSERT(IsPow2(y)); + + if (pEquation->xor1[i].value == 0) + { + pEquation->xor1[i].channel = 1; + pEquation->xor1[i].valid = 1; + pEquation->xor1[i].index = Log2(y); + } + else + { + ADDR_ASSERT(pEquation->xor2[i].value == 0); + pEquation->xor2[i].channel = 1; + pEquation->xor2[i].valid = 1; + pEquation->xor2[i].index = Log2(y); + } + } + + if (z != 0) + { + ADDR_ASSERT(IsPow2(z)); + + if (pEquation->xor1[i].value == 0) + { + pEquation->xor1[i].channel = 2; + pEquation->xor1[i].valid = 1; + pEquation->xor1[i].index = Log2(z); + } + else + { + ADDR_ASSERT(pEquation->xor2[i].value == 0); + pEquation->xor2[i].channel = 2; + pEquation->xor2[i].valid = 1; + pEquation->xor2[i].index = Log2(z); + } + } + + swizzle[i].x &= ~x; + swizzle[i].y &= ~y; + swizzle[i].z &= ~z; + } + } + } + } + + ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask)); } } @@ -1620,17 +2006,28 @@ VOID Gfx10Lib::InitEquationTable() if (pPatInfo != NULL) { ADDR_ASSERT(IsValidSwMode(swMode)); - ADDR_EQUATION equation = {}; + if (pPatInfo->maxItemCount <= 3) // Get a valid equationIndex + { + ADDR_EQUATION equation = {}; - // Passing in pPatInfo to get the addr equation - ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation); + // Passing in pPatInfo to get the addr equation + ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation); - equationIndex = m_numEquations; - ADDR_ASSERT(equationIndex < EquationTableSize); - // Updates m_equationTable[m_numEquations] to be the addr equation for this PatInfo - m_equationTable[equationIndex] = equation; - // Increment m_numEquations - m_numEquations++; + equationIndex = m_numEquations; + ADDR_ASSERT(equationIndex < EquationTableSize); + // Updates m_equationTable[m_numEquations] to be the addr equation for this PatInfo + m_equationTable[equationIndex] = equation; + // Increment m_numEquations + m_numEquations++; + } + else // There is no equationIndex + { + // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case + ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4)); + ADDR_ASSERT(rsrcTypeIdx == 1); + ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X); + ADDR_ASSERT(m_settings.supportRbPlus == 1); + } } // equationIndex, which is used to look up equations in m_equationTable, will be cached for every // iteration in this nested for-loop @@ -2254,7 +2651,7 @@ BOOL_32 Gfx10Lib::ValidateSwModeParams( { if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) || (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) || - (thin3d && ((swizzleMask & Gfx10Rsrc3dThinSwModeMask) == 0))) + (thin3d && ((swizzleMask & Gfx10Rsrc3dViewAs2dSwModeMask) == 0))) { ADDR_ASSERT_ALWAYS(); valid = FALSE; @@ -2571,7 +2968,8 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( if (pIn->flags.view3dAs2dArray) { - allowedSwModeSet.value &= Gfx10Rsrc3dThinSwModeMask; + // SW_LINEAR can be used for 3D thin images, including BCn image format. + allowedSwModeSet.value &= Gfx10Rsrc3dViewAs2dSwModeMask; } break; diff --git a/src/amd/addrlib/src/gfx10/gfx10addrlib.h b/src/amd/addrlib/src/gfx10/gfx10addrlib.h index f13dcb2f399..a8421e92ad1 100644 --- a/src/amd/addrlib/src/gfx10/gfx10addrlib.h +++ b/src/amd/addrlib/src/gfx10/gfx10addrlib.h @@ -146,6 +146,8 @@ const UINT_32 Gfx10Rsrc3dThin64KBSwModeMask = (1u << ADDR_SW_64KB_Z_X) | const UINT_32 Gfx10Rsrc3dThinSwModeMask = Gfx10Rsrc3dThin64KBSwModeMask | Gfx10BlkVarSwModeMask; +const UINT_32 Gfx10Rsrc3dViewAs2dSwModeMask = Gfx10Rsrc3dThinSwModeMask | Gfx10LinearSwModeMask; + const UINT_32 Gfx10Rsrc3dThickSwModeMask = Gfx10Rsrc3dSwModeMask & ~(Gfx10Rsrc3dThinSwModeMask | Gfx10LinearSwModeMask); const UINT_32 Gfx10Rsrc3dThick4KBSwModeMask = Gfx10Rsrc3dThickSwModeMask & Gfx10Blk4KBSwModeMask; diff --git a/src/amd/addrlib/src/gfx11/gfx11addrlib.cpp b/src/amd/addrlib/src/gfx11/gfx11addrlib.cpp index 2d9ef38513e..30f1be8b882 100644 --- a/src/amd/addrlib/src/gfx11/gfx11addrlib.cpp +++ b/src/amd/addrlib/src/gfx11/gfx11addrlib.cpp @@ -740,7 +740,10 @@ ChipFamily Gfx11Lib::HwlConvertChipFamily( } break; case FAMILY_GFX1150: - if (ASICREV_IS_GFX1150(chipRevision) || ASICREV_IS_GFX1151(chipRevision)) + if (false + || ASICREV_IS_GFX1150(chipRevision) + || ASICREV_IS_GFX1151(chipRevision) + ) { m_settings.isGfx1150 = 1; } @@ -1021,15 +1024,6 @@ UINT_32 Gfx11Lib::GetMetaBlkSize( // For htile surfaces, pad meta block size to 2K * num_pipes metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2); } - - const INT_32 compFragLog2 = numSamplesLog2; - - if (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1)) - { - const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1); - - metablkSizeLog2 = Max(metablkSizeLog2, tmp); - } } const INT_32 metablkBitsLog2 = @@ -1110,7 +1104,6 @@ VOID Gfx11Lib::ConvertSwizzlePatternToEquation( if (IsXor(swMode) == FALSE) { - // Use simplified logic when we only have one bit-component for (UINT_32 i = elemLog2; i < blockSizeLog2; i++) { ADDR_ASSERT(IsPow2(pSwizzle[i].value)); @@ -1140,87 +1133,479 @@ VOID Gfx11Lib::ConvertSwizzlePatternToEquation( pEquation->addr[i].valid = 1; pEquation->addr[i].index = Log2(pSwizzle[i].z); } + + pEquation->xor1[i].value = 0; + pEquation->xor2[i].value = 0; } } - else + else if (IsThin(rsrcType, swMode)) { Dim3d dim; - ComputeBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, rsrcType, swMode); + ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode); const UINT_32 blkXLog2 = Log2(dim.w); const UINT_32 blkYLog2 = Log2(dim.h); - const UINT_32 blkZLog2 = Log2(dim.d); const UINT_32 blkXMask = dim.w - 1; const UINT_32 blkYMask = dim.h - 1; - const UINT_32 blkZMask = dim.d - 1; - ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {}; - memcpy(&swizzle, pSwizzle, sizeof(swizzle)); + ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT]; UINT_32 xMask = 0; UINT_32 yMask = 0; - UINT_32 zMask = 0; + UINT_32 bMask = (1 << elemLog2) - 1; for (UINT_32 i = elemLog2; i < blockSizeLog2; i++) { - for (UINT_32 bitComp = 0; bitComp < ADDR_MAX_EQUATION_COMP; bitComp++) + if (IsPow2(pSwizzle[i].value)) { - if (swizzle[i].value == 0) + if (pSwizzle[i].x != 0) { - ADDR_ASSERT(bitComp != 0); // Bits above element size must have at least one addr-bit - ADDR_ASSERT(bitComp <= pPatInfo->maxItemCount); - break; - } + ADDR_ASSERT((xMask & pSwizzle[i].x) == 0); + xMask |= pSwizzle[i].x; - if (swizzle[i].x != 0) - { - const UINT_32 xLog2 = BitScanForward(swizzle[i].x); - swizzle[i].x = UnsetLeastBit(swizzle[i].x); - xMask |= (1 << xLog2); + const UINT_32 xLog2 = Log2(pSwizzle[i].x); - pEquation->comps[bitComp][i].channel = 0; - pEquation->comps[bitComp][i].valid = 1; - pEquation->comps[bitComp][i].index = xLog2 + elemLog2; - } - else if (swizzle[i].y != 0) - { - const UINT_32 yLog2 = BitScanForward(swizzle[i].y); - swizzle[i].y = UnsetLeastBit(swizzle[i].y); - yMask |= (1 << yLog2); + ADDR_ASSERT(xLog2 < blkXLog2); - pEquation->comps[bitComp][i].channel = 1; - pEquation->comps[bitComp][i].valid = 1; - pEquation->comps[bitComp][i].index = yLog2; - } - else if (swizzle[i].z != 0) - { - const UINT_32 zLog2 = BitScanForward(swizzle[i].z); - swizzle[i].z = UnsetLeastBit(swizzle[i].z); - zMask |= (1 << zLog2); - - pEquation->comps[bitComp][i].channel = 2; - pEquation->comps[bitComp][i].valid = 1; - pEquation->comps[bitComp][i].index = zLog2; + pEquation->addr[i].channel = 0; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = xLog2 + elemLog2; } else { - // This function doesn't handle MSAA (must update block dims, here, and consumers) - ADDR_ASSERT_ALWAYS(); + ADDR_ASSERT(pSwizzle[i].y != 0); + ADDR_ASSERT((yMask & pSwizzle[i].y) == 0); + yMask |= pSwizzle[i].y; + + pEquation->addr[i].channel = 1; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = Log2(pSwizzle[i].y); + + ADDR_ASSERT(pEquation->addr[i].index < blkYLog2); + } + + swizzle[i].value = 0; + bMask |= 1 << i; + } + else + { + if (pSwizzle[i].z != 0) + { + ADDR_ASSERT(IsPow2(static_cast(pSwizzle[i].z))); + + pEquation->xor2[i].channel = 2; + pEquation->xor2[i].valid = 1; + pEquation->xor2[i].index = Log2(pSwizzle[i].z); + } + + swizzle[i].x = pSwizzle[i].x; + swizzle[i].y = pSwizzle[i].y; + swizzle[i].z = swizzle[i].s = 0; + + ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE); + + const UINT_32 xHi = swizzle[i].x & (~blkXMask); + + if (xHi != 0) + { + ADDR_ASSERT(IsPow2(xHi)); + ADDR_ASSERT(pEquation->xor1[i].value == 0); + + pEquation->xor1[i].channel = 0; + pEquation->xor1[i].valid = 1; + pEquation->xor1[i].index = Log2(xHi) + elemLog2; + + swizzle[i].x &= blkXMask; + } + + const UINT_32 yHi = swizzle[i].y & (~blkYMask); + + if (yHi != 0) + { + ADDR_ASSERT(IsPow2(yHi)); + + if (xHi == 0) + { + ADDR_ASSERT(pEquation->xor1[i].value == 0); + pEquation->xor1[i].channel = 1; + pEquation->xor1[i].valid = 1; + pEquation->xor1[i].index = Log2(yHi); + } + else + { + ADDR_ASSERT(pEquation->xor2[i].value == 0); + pEquation->xor2[i].channel = 1; + pEquation->xor2[i].valid = 1; + pEquation->xor2[i].index = Log2(yHi); + } + + swizzle[i].y &= blkYMask; + } + + if (swizzle[i].value == 0) + { + bMask |= 1 << i; } } - ADDR_ASSERT(swizzle[i].value == 0); // We missed an xor? Are there too many? } - // We missed an address bit for coords inside the block? - // That means two coords will land on the same addr, which is bad. - ADDR_ASSERT(((xMask & blkXMask) == blkXMask) && - ((yMask & blkYMask) == blkYMask) && - ((zMask & blkZMask) == blkZMask)); - // We're sourcing from outside our block? That won't fly for PRTs, which need to be movable. - // Non-xor modes can also be used for 2D PRTs but they're handled in the simplified logic above. - ADDR_ASSERT((IsPrt(swMode) == false) || - ((xMask == blkXMask) && - (yMask == blkYMask) && - (zMask == blkZMask))); + const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1; + const UINT_32 blockMask = (1 << blockSizeLog2) - 1; + + ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask); + + while (bMask != blockMask) + { + for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++) + { + if ((bMask & (1 << i)) == 0) + { + if (IsPow2(swizzle[i].value)) + { + if (swizzle[i].x != 0) + { + ADDR_ASSERT((xMask & swizzle[i].x) == 0); + xMask |= swizzle[i].x; + + const UINT_32 xLog2 = Log2(swizzle[i].x); + + ADDR_ASSERT(xLog2 < blkXLog2); + + pEquation->addr[i].channel = 0; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = xLog2 + elemLog2; + } + else + { + ADDR_ASSERT(swizzle[i].y != 0); + ADDR_ASSERT((yMask & swizzle[i].y) == 0); + yMask |= swizzle[i].y; + + pEquation->addr[i].channel = 1; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = Log2(swizzle[i].y); + + ADDR_ASSERT(pEquation->addr[i].index < blkYLog2); + } + + swizzle[i].value = 0; + bMask |= 1 << i; + } + else + { + const UINT_32 x = swizzle[i].x & xMask; + const UINT_32 y = swizzle[i].y & yMask; + + if (x != 0) + { + ADDR_ASSERT(IsPow2(x)); + + if (pEquation->xor1[i].value == 0) + { + pEquation->xor1[i].channel = 0; + pEquation->xor1[i].valid = 1; + pEquation->xor1[i].index = Log2(x) + elemLog2; + } + else + { + ADDR_ASSERT(pEquation->xor2[i].value == 0); + pEquation->xor2[i].channel = 0; + pEquation->xor2[i].valid = 1; + pEquation->xor2[i].index = Log2(x) + elemLog2; + } + } + + if (y != 0) + { + ADDR_ASSERT(IsPow2(y)); + + if (pEquation->xor1[i].value == 0) + { + pEquation->xor1[i].channel = 1; + pEquation->xor1[i].valid = 1; + pEquation->xor1[i].index = Log2(y); + } + else + { + ADDR_ASSERT(pEquation->xor2[i].value == 0); + pEquation->xor2[i].channel = 1; + pEquation->xor2[i].valid = 1; + pEquation->xor2[i].index = Log2(y); + } + } + + swizzle[i].x &= ~x; + swizzle[i].y &= ~y; + } + } + } + } + + ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask)); + } + else + { + const Dim3d& blkDim = (blockSizeLog2 == Log2Size256K) ? + Block256K_Log2_3d[elemLog2] : + ((blockSizeLog2 == Log2Size4K) ? Block4K_Log2_3d[elemLog2] : Block64K_Log2_3d[elemLog2]); + + const UINT_32 blkXLog2 = blkDim.w; + const UINT_32 blkYLog2 = blkDim.h; + const UINT_32 blkZLog2 = blkDim.d; + const UINT_32 blkXMask = (1 << blkXLog2) - 1; + const UINT_32 blkYMask = (1 << blkYLog2) - 1; + const UINT_32 blkZMask = (1 << blkZLog2) - 1; + + ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {}; + UINT_32 xMask = 0; + UINT_32 yMask = 0; + UINT_32 zMask = 0; + UINT_32 bMask = (1 << elemLog2) - 1; + + for (UINT_32 i = elemLog2; i < blockSizeLog2; i++) + { + if (IsPow2(pSwizzle[i].value)) + { + if (pSwizzle[i].x != 0) + { + ADDR_ASSERT((xMask & pSwizzle[i].x) == 0); + xMask |= pSwizzle[i].x; + + const UINT_32 xLog2 = Log2(pSwizzle[i].x); + + ADDR_ASSERT(xLog2 < blkXLog2); + + pEquation->addr[i].channel = 0; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = xLog2 + elemLog2; + } + else if (pSwizzle[i].y != 0) + { + ADDR_ASSERT((yMask & pSwizzle[i].y) == 0); + yMask |= pSwizzle[i].y; + + pEquation->addr[i].channel = 1; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = Log2(pSwizzle[i].y); + + ADDR_ASSERT(pEquation->addr[i].index < blkYLog2); + } + else + { + ADDR_ASSERT(pSwizzle[i].z != 0); + ADDR_ASSERT((zMask & pSwizzle[i].z) == 0); + zMask |= pSwizzle[i].z; + + pEquation->addr[i].channel = 2; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = Log2(pSwizzle[i].z); + + ADDR_ASSERT(pEquation->addr[i].index < blkZLog2); + } + + swizzle[i].value = 0; + bMask |= 1 << i; + } + else + { + swizzle[i].x = pSwizzle[i].x; + swizzle[i].y = pSwizzle[i].y; + swizzle[i].z = pSwizzle[i].z; + swizzle[i].s = 0; + + ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE); + + const UINT_32 xHi = swizzle[i].x & (~blkXMask); + const UINT_32 yHi = swizzle[i].y & (~blkYMask); + const UINT_32 zHi = swizzle[i].z & (~blkZMask); + + ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0)); + + if (xHi != 0) + { + ADDR_ASSERT(IsPow2(xHi)); + ADDR_ASSERT(pEquation->xor1[i].value == 0); + + pEquation->xor1[i].channel = 0; + pEquation->xor1[i].valid = 1; + pEquation->xor1[i].index = Log2(xHi) + elemLog2; + + swizzle[i].x &= blkXMask; + } + + if (yHi != 0) + { + ADDR_ASSERT(IsPow2(yHi)); + + if (pEquation->xor1[i].value == 0) + { + pEquation->xor1[i].channel = 1; + pEquation->xor1[i].valid = 1; + pEquation->xor1[i].index = Log2(yHi); + } + else + { + ADDR_ASSERT(pEquation->xor2[i].value == 0); + pEquation->xor2[i].channel = 1; + pEquation->xor2[i].valid = 1; + pEquation->xor2[i].index = Log2(yHi); + } + + swizzle[i].y &= blkYMask; + } + + if (zHi != 0) + { + ADDR_ASSERT(IsPow2(zHi)); + + if (pEquation->xor1[i].value == 0) + { + pEquation->xor1[i].channel = 2; + pEquation->xor1[i].valid = 1; + pEquation->xor1[i].index = Log2(zHi); + } + else + { + ADDR_ASSERT(pEquation->xor2[i].value == 0); + pEquation->xor2[i].channel = 2; + pEquation->xor2[i].valid = 1; + pEquation->xor2[i].index = Log2(zHi); + } + + swizzle[i].z &= blkZMask; + } + + if (swizzle[i].value == 0) + { + bMask |= 1 << i; + } + } + } + + const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1; + const UINT_32 blockMask = (1 << blockSizeLog2) - 1; + + ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask); + + while (bMask != blockMask) + { + for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++) + { + if ((bMask & (1 << i)) == 0) + { + if (IsPow2(swizzle[i].value)) + { + if (swizzle[i].x != 0) + { + ADDR_ASSERT((xMask & swizzle[i].x) == 0); + xMask |= swizzle[i].x; + + const UINT_32 xLog2 = Log2(swizzle[i].x); + + ADDR_ASSERT(xLog2 < blkXLog2); + + pEquation->addr[i].channel = 0; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = xLog2 + elemLog2; + } + else if (swizzle[i].y != 0) + { + ADDR_ASSERT((yMask & swizzle[i].y) == 0); + yMask |= swizzle[i].y; + + pEquation->addr[i].channel = 1; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = Log2(swizzle[i].y); + + ADDR_ASSERT(pEquation->addr[i].index < blkYLog2); + } + else + { + ADDR_ASSERT(swizzle[i].z != 0); + ADDR_ASSERT((zMask & swizzle[i].z) == 0); + zMask |= swizzle[i].z; + + pEquation->addr[i].channel = 2; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = Log2(swizzle[i].z); + + ADDR_ASSERT(pEquation->addr[i].index < blkZLog2); + } + + swizzle[i].value = 0; + bMask |= 1 << i; + } + else + { + const UINT_32 x = swizzle[i].x & xMask; + const UINT_32 y = swizzle[i].y & yMask; + const UINT_32 z = swizzle[i].z & zMask; + + if (x != 0) + { + ADDR_ASSERT(IsPow2(x)); + + if (pEquation->xor1[i].value == 0) + { + pEquation->xor1[i].channel = 0; + pEquation->xor1[i].valid = 1; + pEquation->xor1[i].index = Log2(x) + elemLog2; + } + else + { + ADDR_ASSERT(pEquation->xor2[i].value == 0); + pEquation->xor2[i].channel = 0; + pEquation->xor2[i].valid = 1; + pEquation->xor2[i].index = Log2(x) + elemLog2; + } + } + + if (y != 0) + { + ADDR_ASSERT(IsPow2(y)); + + if (pEquation->xor1[i].value == 0) + { + pEquation->xor1[i].channel = 1; + pEquation->xor1[i].valid = 1; + pEquation->xor1[i].index = Log2(y); + } + else + { + ADDR_ASSERT(pEquation->xor2[i].value == 0); + pEquation->xor2[i].channel = 1; + pEquation->xor2[i].valid = 1; + pEquation->xor2[i].index = Log2(y); + } + } + + if (z != 0) + { + ADDR_ASSERT(IsPow2(z)); + + if (pEquation->xor1[i].value == 0) + { + pEquation->xor1[i].channel = 2; + pEquation->xor1[i].valid = 1; + pEquation->xor1[i].index = Log2(z); + } + else + { + ADDR_ASSERT(pEquation->xor2[i].value == 0); + pEquation->xor2[i].channel = 2; + pEquation->xor2[i].valid = 1; + pEquation->xor2[i].index = Log2(z); + } + } + + swizzle[i].x &= ~x; + swizzle[i].y &= ~y; + swizzle[i].z &= ~z; + } + } + } + } + + ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask)); } } @@ -1255,16 +1640,28 @@ VOID Gfx11Lib::InitEquationTable() if (pPatInfo != NULL) { ADDR_ASSERT(IsValidSwMode(swMode)); - ADDR_EQUATION equation = {}; - ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation); + if (pPatInfo->maxItemCount <= 3) // Get a valid equationIndex + { + ADDR_EQUATION equation = {}; - equationIndex = m_numEquations; - ADDR_ASSERT(equationIndex < EquationTableSize); + // Passing in pPatInfo to get the addr equation + ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation); - m_equationTable[equationIndex] = equation; - - m_numEquations++; + equationIndex = m_numEquations; + ADDR_ASSERT(equationIndex < EquationTableSize); + // Updates m_equationTable[m_numEquations] to be the addr equation for this PatInfo + m_equationTable[equationIndex] = equation; + // Increment m_numEquations + m_numEquations++; + } + else // There is no equationIndex + { + // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X + ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4)); + ADDR_ASSERT(rsrcType == ADDR_RSRC_TEX_3D); + ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X); + } } m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex; @@ -1874,7 +2271,7 @@ BOOL_32 Gfx11Lib::ValidateSwModeParams( { if (((swizzleMask & Gfx11Rsrc3dSwModeMask) == 0) || (prt && ((swizzleMask & Gfx11Rsrc3dPrtSwModeMask) == 0)) || - (thin3d && ((swizzleMask & Gfx11Rsrc3dThinSwModeMask) == 0))) + (thin3d && ((swizzleMask & Gfx11Rsrc3dViewAs2dSwModeMask) == 0))) { ADDR_ASSERT_ALWAYS(); valid = FALSE; @@ -2110,7 +2507,8 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting( if (pIn->flags.view3dAs2dArray) { - allowedSwModeSet.value &= Gfx11Rsrc3dThinSwModeMask; + // SW_LINEAR can be used for 3D thin images, including BCn image format. + allowedSwModeSet.value &= Gfx11Rsrc3dViewAs2dSwModeMask; } break; @@ -2594,7 +2992,8 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlGetPossibleSwizzleModes( if (pIn->flags.view3dAs2dArray) { - allowedSwModeSet.value &= Gfx11Rsrc3dThinSwModeMask; + // SW_LINEAR can be used for 3D thin images, including BCn image format. + allowedSwModeSet.value &= Gfx11Rsrc3dViewAs2dSwModeMask; } break; diff --git a/src/amd/addrlib/src/gfx11/gfx11addrlib.h b/src/amd/addrlib/src/gfx11/gfx11addrlib.h index b07c989126c..da8da6c3b1e 100644 --- a/src/amd/addrlib/src/gfx11/gfx11addrlib.h +++ b/src/amd/addrlib/src/gfx11/gfx11addrlib.h @@ -142,6 +142,8 @@ const UINT_32 Gfx11Rsrc3dThin256KBSwModeMask = (1u << ADDR_SW_256KB_Z_X) | const UINT_32 Gfx11Rsrc3dThinSwModeMask = Gfx11Rsrc3dThin64KBSwModeMask | Gfx11Rsrc3dThin256KBSwModeMask; +const UINT_32 Gfx11Rsrc3dViewAs2dSwModeMask = Gfx11Rsrc3dThinSwModeMask | Gfx11LinearSwModeMask; + const UINT_32 Gfx11Rsrc3dThickSwModeMask = Gfx11Rsrc3dSwModeMask & ~(Gfx11Rsrc3dThinSwModeMask | Gfx11LinearSwModeMask); const UINT_32 Gfx11Rsrc3dThick4KBSwModeMask = Gfx11Rsrc3dThickSwModeMask & Gfx11Blk4KBSwModeMask; diff --git a/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp b/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp index 65e491ff4ad..eaedbca831c 100644 --- a/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp +++ b/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp @@ -1305,11 +1305,8 @@ BOOL_32 Gfx9Lib::HwlInitGlobalParams( ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2))))) { ADDR_ASSERT(m_settings.isVega10 == FALSE); - ADDR_ASSERT(m_settings.isRaven == FALSE); - ADDR_ASSERT(m_settings.isVega20 == FALSE); - if (m_settings.isVega12) { m_settings.htileCacheRbConflict = 1; diff --git a/src/amd/addrlib/src/r800/siaddrlib.cpp b/src/amd/addrlib/src/r800/siaddrlib.cpp index 17aea569329..afe00ae81a3 100644 --- a/src/amd/addrlib/src/r800/siaddrlib.cpp +++ b/src/amd/addrlib/src/r800/siaddrlib.cpp @@ -1645,7 +1645,9 @@ UINT_32 SiLib::HwlGetPitchAlignmentLinear( } else { - pitchAlign = Max(8u, 64 / BITS_TO_BYTES(bpp)); + { + pitchAlign = Max(8u, 64 / BITS_TO_BYTES(bpp)); + } } return pitchAlign; @@ -2263,7 +2265,10 @@ BOOL_32 SiLib::DecodeGbRegs( reg.val = pRegValue->gbAddrConfig; - switch (reg.f.pipe_interleave_size) + UINT_32 pipe_interleave_size = reg.f.pipe_interleave_size; + UINT_32 row_size = reg.f.row_size; + + switch (pipe_interleave_size) { case ADDR_CONFIG_PIPE_INTERLEAVE_256B: m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B; @@ -2277,7 +2282,7 @@ BOOL_32 SiLib::DecodeGbRegs( break; } - switch (reg.f.row_size) + switch (row_size) { case ADDR_CONFIG_1KB_ROW: m_rowSize = ADDR_ROWSIZE_1KB; diff --git a/src/amd/addrlib/src/r800/siaddrlib.h b/src/amd/addrlib/src/r800/siaddrlib.h index ca3f0e52169..d0dd841cebe 100644 --- a/src/amd/addrlib/src/r800/siaddrlib.h +++ b/src/amd/addrlib/src/r800/siaddrlib.h @@ -68,8 +68,11 @@ struct SiChipSettings UINT_32 isPolaris10 : 1; UINT_32 isPolaris11 : 1; UINT_32 isPolaris12 : 1; + // VI fusion UINT_32 isVegaM : 1; UINT_32 isCarrizo : 1; + + UINT_32 : 2; }; /**