amd: update addrlib

Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32687>
This commit is contained in:
Marek Olšák 2024-12-13 19:25:59 -05:00 committed by Marge Bot
parent 33a73203b0
commit c0e5e8f932
37 changed files with 4957 additions and 2578 deletions

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/
@ -23,8 +23,8 @@ extern "C"
{
#endif
#define ADDRLIB_VERSION_MAJOR 9
#define ADDRLIB_VERSION_MINOR 11
#define ADDRLIB_VERSION_MAJOR 10
#define ADDRLIB_VERSION_MINOR 1
#define ADDRLIB_MAKE_VERSION(major, minor) ((major << 16) | minor)
#define ADDRLIB_VERSION ADDRLIB_MAKE_VERSION(ADDRLIB_VERSION_MAJOR, ADDRLIB_VERSION_MINOR)
@ -34,6 +34,25 @@ typedef VOID* ADDR_HANDLE;
/// Client handle used in callbacks
typedef VOID* ADDR_CLIENT_HANDLE;
typedef struct _ADDR_COORD2D
{
UINT_32 x;
UINT_32 y;
} ADDR_COORD2D;
typedef struct _ADDR_COORD3D
{
UINT_32 x;
UINT_32 y;
UINT_32 z; // also slices for 2D images
} ADDR_COORD3D;
typedef struct _ADDR_EXTENT2D
{
UINT_32 width;
UINT_32 height;
} ADDR_EXTENT2D;
typedef struct _ADDR_EXTENT3D
{
UINT_32 width;
@ -1525,6 +1544,16 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskCoordFromAddr(
*/
UINT_32 ADDR_API AddrGetVersion(ADDR_HANDLE hLib);
/**
****************************************************************************************************
* AddrGetInterfaceVersion
*
* @brief
* Get AddrLib interface version number (eg. Addr2 = 2)
****************************************************************************************************
*/
UINT_32 ADDR_API AddrGetInterfaceVersion(ADDR_HANDLE hLib);
/**
****************************************************************************************************
* AddrUseTileIndex
@ -2637,6 +2666,89 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceAddrFromCoord(
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut);
/**
****************************************************************************************************
* ADDR2_COPY_MEMSURFACE_REGION
*
* @brief
* Input structure for Addr2CopyMemToSurface and Addr2CopySurfaceToMem
****************************************************************************************************
*/
typedef struct _ADDR2_COPY_MEMSURFACE_REGION
{
UINT_32 size; ///< Size of this structure in bytes
UINT_32 x; ///< Starting X coordinate, in elements
UINT_32 y; ///< Starting Y coordinate, in elements
UINT_32 slice; ///< Starting slice index or Z coordinate, in elements
UINT_32 mipId; ///< The mip ID in mip chain
ADDR_EXTENT3D copyDims; ///< Size of the region to copy, in elements
void* pMem; ///< Pointer to memory to copy
UINT_64 memRowPitch; ///< Pitch between rows in bytes
UINT_64 memSlicePitch; ///< Pitch between array/depth slices in bytes
} ADDR2_COPY_MEMSURFACE_REGION;
/**
****************************************************************************************************
* ADDR2_COPY_MEMSURFACE_INPUT
*
* @brief
* Input structure for Addr2CopyMemToSurface and Addr2CopySurfaceToMem
****************************************************************************************************
*/
typedef struct _ADDR2_COPY_MEMSURFACE_INPUT
{
UINT_32 size; ///< Size of this structure in bytes
AddrSwizzleMode swizzleMode; ///< Swizzle mode
AddrFormat format; ///< Format
ADDR2_SURFACE_FLAGS flags; ///< Surface flags
AddrResourceType resourceType; ///< Surface type
UINT_32 bpp; ///< Bits per pixel
ADDR_EXTENT3D unAlignedDims; ///< Surface original dimensions (of mip0), in pixels
UINT_32 numMipLevels; ///< Total mipmap levels
UINT_32 numSamples; ///< Number of samples
UINT_32 pitchInElement; ///< Pitch in elements (blocks for compressed formats)
UINT_32 pbXor; ///< Xor value
void* pMappedSurface; ///< Pointer to the image surface, mapped to CPU memory
BOOL_32 singleSubres; ///< Pointer is to the base of the subresource, not to the
/// base of the surface image data. Requires:
/// - copyDims.depth == 1
/// - all copy regions target the same mip
/// - all copy regions target the same slice/depth
} ADDR2_COPY_MEMSURFACE_INPUT;
/**
****************************************************************************************************
* Addr2CopyMemToSurface
*
* @brief
* Copy an image region from memory to an uncompressed CPU-mapped surface
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2CopyMemToSurface(
ADDR_HANDLE hLib,
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount
);
/**
****************************************************************************************************
* Addr2CopySurfaceToMem
*
* @brief
* Copy an image region from an uncompressed CPU-mapped surface to memory
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2CopySurfaceToMem(
ADDR_HANDLE hLib,
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount
);
/**
@ -3724,7 +3836,7 @@ typedef struct _ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT
AddrResourceType resourceType; ///< Surface type
AddrFormat format; ///< Surface format
UINT_32 width; ///< Width of mip0 in texels (not in compressed block)
UINT_32 height; ///< Height of mip0 in texels (not in compressed block)
UINT_32 height; ///< Height of mip0 in texels (not in compressed block)
UINT_32 numSlices; ///< Number surface slice/depth of mip0
UINT_32 numMipLevels; ///< Total mipmap levels.
UINT_32 pipeBankXor; ///< Combined swizzle used to do bank/pipe rotation
@ -3977,6 +4089,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2GetPossibleSwizzleModes(
*
* @brief
* Return whether the swizzle mode is supported by display engine
pResult: whether it is displayAble or not for the given displaySwizzleMode
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2IsValidDisplaySwizzleMode(
@ -4056,23 +4169,22 @@ typedef union _ADDR3_SURFACE_FLAGS
{
struct
{
UINT_32 color : 1; ///< This resource is a color buffer, can be used with RTV
UINT_32 depth : 1; ///< This resource is a depth buffer, can be used with DSV
UINT_32 stencil : 1; ///< This resource is a stencil buffer, can be used with DSV
UINT_32 texture : 1; ///< This resource can be used with SRV
UINT_32 unordered : 1; ///< This resource can be used with UAV
UINT_32 hiZHiS : 1;
UINT_32 blockCompressed : 1;
UINT_32 nv12 : 1;
UINT_32 p010 : 1;
UINT_32 view3dAs2dArray : 1;
UINT_32 isVrsImage : 1; ///< This resource is a VRS source image
UINT_32 standardPrt : 1; ///< This resource is a PRT resource with the specific block
/// dimensions that some APIs want
UINT_32 reserved1 : 2;
UINT_32 denseSliceExact : 1; ///< Pad dimensions such that
/// Pow2Align(pitch*height, surfAlign)==pitch*height
UINT_32 qbStereo : 1; ///< Quad buffer stereo surface
UINT_32 display : 1; ///< This resource is displayable, can be used with DRV
UINT_32 reserved : 16; ///< Reserved bits
UINT_32 reserved : 18; ///< Reserved bits
};
UINT_32 value;
@ -4323,6 +4435,91 @@ ADDR_E_RETURNCODE ADDR_API Addr3ComputeSurfaceAddrFromCoord(
const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut);
/**
****************************************************************************************************
* ADDR3_COPY_MEMSURFACE_REGION
*
* @brief
* Input structure for Addr3CopyMemToSurface and Addr3CopySurfaceToMem
****************************************************************************************************
*/
typedef struct _ADDR3_COPY_MEMSURFACE_REGION
{
UINT_32 size; ///< Size of this structure in bytes
UINT_32 x; ///< Starting X coordinate, in elements
UINT_32 y; ///< Starting Y coordinate, in elements
UINT_32 slice; ///< Starting slice index or Z coordinate, in elements
UINT_32 mipId; ///< The mip ID in mip chain
ADDR_EXTENT3D copyDims; ///< Size of the region to copy, in elements
void* pMem; ///< Pointer to memory to copy
UINT_64 memRowPitch; ///< Pitch between rows in bytes
UINT_64 memSlicePitch; ///< Pitch between array/depth slices in bytes
} ADDR3_COPY_MEMSURFACE_REGION;
/**
****************************************************************************************************
* ADDR3_COPY_MEMSURFACE_INPUT
*
* @brief
* Input structure for Addr3CopyMemToSurface and Addr3CopySurfaceToMem
****************************************************************************************************
*/
typedef struct _ADDR3_COPY_MEMSURFACE_INPUT
{
UINT_32 size; ///< Size of this structure in bytes
Addr3SwizzleMode swizzleMode; ///< Swizzle mode for Gfx12
ADDR3_SURFACE_FLAGS flags; ///< Surface flags
AddrFormat format; ///< Format
AddrResourceType resourceType; ///< Surface type
UINT_32 bpp; ///< Bits per pixel
ADDR_EXTENT3D unAlignedDims; ///< Surface original dimensions (of mip0), in pixels
UINT_32 numMipLevels; ///< Total mipmap levels
UINT_32 numSamples; ///< Number of samples
UINT_32 pitchInElement; ///< Pitch in elements (blocks for compressed formats)
UINT_32 pbXor; ///< Xor value
void* pMappedSurface; ///< Pointer to the image surface, mapped to CPU memory
BOOL_32 singleSubres; ///< Pointer is to the base of the subresource, not to the
/// base of the surface image data. Requires:
/// - copyDims.depth == 1
/// - all copy regions target the same mip
/// - all copy regions target the same slice/depth
} ADDR3_COPY_MEMSURFACE_INPUT;
/**
****************************************************************************************************
* Addr3CopyMemToSurface
*
* @brief
* Copy an image region from memory to an uncompressed CPU-mapped surface
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr3CopyMemToSurface(
ADDR_HANDLE hLib,
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount
);
/**
****************************************************************************************************
* Addr3CopySurfaceToMem
*
* @brief
* Copy an image region from an uncompressed CPU-mapped surface to memory
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr3CopySurfaceToMem(
ADDR_HANDLE hLib,
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount
);
/**
****************************************************************************************************
* ADDR3_COMPUTE_PIPEBANKXOR_INPUT

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

View file

@ -18,6 +18,8 @@ files_addrlib = files(
'src/core/addrlib3.h',
'src/core/addrobject.cpp',
'src/core/addrobject.h',
'src/core/addrswizzler.cpp',
'src/core/addrswizzler.h',
'src/core/coord.cpp',
'src/core/coord.h',
'src/gfx9/gfx9addrlib.cpp',
@ -54,6 +56,12 @@ else
cpp_args_addrlib += '-DBIGENDIAN_CPU'
endif
if with_mesa_ndebug
cpp_args_addrlib += '-DDEBUG=0'
else
cpp_args_addrlib += '-DDEBUG=1'
endif
cpp_args_addrlib += cpp.get_supported_arguments(
['-Wno-unused-variable', '-Wno-unused-local-typedefs',
'-Wno-unused-but-set-variable', '-Wno-maybe-uninitialized',

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/
@ -573,6 +573,32 @@ UINT_32 ADDR_API AddrGetVersion(ADDR_HANDLE hLib)
return version;
}
/**
****************************************************************************************************
* AddrGetInterfaceVersion
*
* @brief
* Get AddrLib interface version number. Client may use this to know what AddrN functions to
* use.
****************************************************************************************************
*/
UINT_32 ADDR_API AddrGetInterfaceVersion(ADDR_HANDLE hLib)
{
UINT_32 version = 0;
Addr::Lib* pLib = Lib::GetLib(hLib);
ADDR_ASSERT(pLib != NULL);
if (pLib)
{
version = pLib->GetInterfaceVersion();
}
ADDR_RESET_DEBUG_PRINTERS();
return version;
}
/**
****************************************************************************************************
* AddrUseTileIndex
@ -1219,6 +1245,72 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceAddrFromCoord(
return returnCode;
}
/**
****************************************************************************************************
* Addr2CopyMemToSurface
*
* @brief
* Copy an image region from memory to an uncompressed CPU-mapped surface
*
* @return
* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2CopyMemToSurface(
ADDR_HANDLE hLib, ///< address lib handle
const ADDR2_COPY_MEMSURFACE_INPUT* pIn, ///< [in] description of image and mapping
const ADDR2_COPY_MEMSURFACE_REGION* pRegions, ///< [in] list of copy regions
UINT_32 regionCount) ///< [in] count of copy regions in list
{
V2::Lib* pLib = V2::Lib::GetLib(hLib);
ADDR_E_RETURNCODE returnCode = ADDR_OK;
if (pLib != NULL)
{
returnCode = pLib->CopyMemToSurface(pIn, pRegions, regionCount);
}
else
{
returnCode = ADDR_ERROR;
}
return returnCode;
}
/**
****************************************************************************************************
* Addr2CopySurfaceToMem
*
* @brief
* Copy an image region from an uncompressed CPU-mapped surface to memory
*
* @return
* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2CopySurfaceToMem(
ADDR_HANDLE hLib, ///< address lib handle
const ADDR2_COPY_MEMSURFACE_INPUT* pIn, ///< [in] description of image and mapping
const ADDR2_COPY_MEMSURFACE_REGION* pRegions, ///< [in] list of copy regions
UINT_32 regionCount) ///< [in] count of copy regions in list
{
V2::Lib* pLib = V2::Lib::GetLib(hLib);
ADDR_E_RETURNCODE returnCode = ADDR_OK;
if (pLib != NULL)
{
returnCode = pLib->CopySurfaceToMem(pIn, pRegions, regionCount);
}
else
{
returnCode = ADDR_ERROR;
}
return returnCode;
}
/**
****************************************************************************************************
@ -1822,7 +1914,12 @@ ADDR_E_RETURNCODE ADDR_API Addr2IsValidDisplaySwizzleMode(
in.swizzleMode = swizzleMode;
in.bpp = bpp;
*pResult = pLib->IsValidDisplaySwizzleMode(&in);
BOOL_32 result = pLib->IsValidDisplaySwizzleMode(&in);
if (pResult != NULL)
{
*pResult = result;
}
returnCode = ADDR_OK;
}
else
@ -2105,6 +2202,72 @@ ADDR_E_RETURNCODE ADDR_API Addr3ComputeSurfaceAddrFromCoord(
return returnCode;
}
/**
****************************************************************************************************
* Addr3CopyMemToSurface
*
* @brief
* Copy an image region from memory to an uncompressed CPU-mapped surface
*
* @return
* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr3CopyMemToSurface(
ADDR_HANDLE hLib, ///< address lib handle
const ADDR3_COPY_MEMSURFACE_INPUT* pIn, ///< [in] description of image and mapping
const ADDR3_COPY_MEMSURFACE_REGION* pRegions, ///< [in] list of copy regions
UINT_32 regionCount) ///< [in] count of copy regions in list
{
V3::Lib* pLib = V3::Lib::GetLib(hLib);
ADDR_E_RETURNCODE returnCode = ADDR_OK;
if (pLib != NULL)
{
returnCode = pLib->CopyMemToSurface(pIn, pRegions, regionCount);
}
else
{
returnCode = ADDR_ERROR;
}
return returnCode;
}
/**
****************************************************************************************************
* Addr3CopySurfaceToMem
*
* @brief
* Copy an image region from an uncompressed CPU-mapped surface to memory
*
* @return
* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr3CopySurfaceToMem(
ADDR_HANDLE hLib, ///< address lib handle
const ADDR3_COPY_MEMSURFACE_INPUT* pIn, ///< [in] description of image and mapping
const ADDR3_COPY_MEMSURFACE_REGION* pRegions, ///< [in] list of copy regions
UINT_32 regionCount) ///< [in] count of copy regions in list
{
V3::Lib* pLib = V3::Lib::GetLib(hLib);
ADDR_E_RETURNCODE returnCode = ADDR_OK;
if (pLib != NULL)
{
returnCode = pLib->CopySurfaceToMem(pIn, pRegions, regionCount);
}
else
{
returnCode = ADDR_ERROR;
}
return returnCode;
}
/**
****************************************************************************************************
* Addr3ComputePipeBankXor

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2017-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2017-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/
@ -81,7 +81,7 @@
#define AMDGPU_VEGA10_RANGE 0x01, 0x14 //# 1 <= x < 20
#define AMDGPU_VEGA12_RANGE 0x14, 0x28 //# 20 <= x < 40
#define AMDGPU_VEGA20_RANGE 0x28, 0x32 //# 40 <= x < max
#define AMDGPU_VEGA20_RANGE 0x28, 0xFF //# 40 <= x < max
#define AMDGPU_RAVEN_RANGE 0x01, 0x81 //# 1 <= x < 129
#define AMDGPU_RAVEN2_RANGE 0x81, 0x90 //# 129 <= x < 144
@ -183,8 +183,7 @@
#define ASICREV_IS_GFX1151(r) ASICREV_IS(r, GFX1151)
#define ASICREV_IS_GFX1152(r) ASICREV_IS(r, GFX1152)
#define ASICREV_IS_GFX1153(r) ASICREV_IS(r, GFX1153)
#define ASICREV_IS_PHOENIX1(r) ASICREV_IS(r, PHOENIX1)
#define ASICREV_IS_PHOENIX(r) ASICREV_IS(r, PHOENIX)
#define ASICREV_IS_PHOENIX2(r) ASICREV_IS(r, PHOENIX2)
#define ASICREV_IS_HAWK_POINT1(r) ASICREV_IS(r, HAWK_POINT1)
#define ASICREV_IS_HAWK_POINT2(r) ASICREV_IS(r, HAWK_POINT2)

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/
@ -17,6 +17,7 @@
#define __ADDR_COMMON_H__
#include "addrinterface.h"
#include <stdint.h>
#if !defined(__APPLE__) || defined(HAVE_TSERVER)
@ -26,7 +27,6 @@
#if defined(__GNUC__)
#include <signal.h>
#include <assert.h>
#endif
#if defined(_WIN32)
@ -36,17 +36,9 @@
////////////////////////////////////////////////////////////////////////////////////////////////////
// Platform specific debug break defines
////////////////////////////////////////////////////////////////////////////////////////////////////
#if !defined(DEBUG)
#ifdef NDEBUG
#define DEBUG 0
#else
#define DEBUG 1
#endif
#endif
#if DEBUG
#if defined(__GNUC__)
#define ADDR_DBG_BREAK() { assert(false); }
#define ADDR_DBG_BREAK() { raise(SIGTRAP); }
#elif defined(__APPLE__)
#define ADDR_DBG_BREAK() { IOPanic("");}
#else
@ -191,21 +183,21 @@ do { if (!(cond)) \
////////////////////////////////////////////////////////////////////////////////////////////////////
#if defined(static_assert)
#if 1
#define ADDR_C_ASSERT(__e) static_assert(__e, "")
#else
/* This version of STATIC_ASSERT() relies on VLAs. If COND is
* false/zero, the array size will be -1 and we'll get a compile
* error
*/
# define ADDR_C_ASSERT(__e) do { \
(void) sizeof(char [1 - 2*!(__e)]); \
} while (0)
#define ADDR_C_ASSERT(__e) typedef char __ADDR_C_ASSERT__[(__e) ? 1 : -1]
#endif
namespace Addr
{
////////////////////////////////////////////////////////////////////////////////////////////////////
// Common constants
////////////////////////////////////////////////////////////////////////////////////////////////////
static const UINT_32 MaxElementBytesLog2 = 5; ///< Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp)
namespace V1
{
////////////////////////////////////////////////////////////////////////////////////////////////////
@ -382,6 +374,36 @@ static inline UINT_32 BitScanForward(
return out;
}
/**
****************************************************************************************************
* BitScanReverse
*
* @brief
* Returns the reverse-position of the most-significant '1' bit. Must not be 0.
****************************************************************************************************
*/
static inline UINT_32 BitScanReverse(
UINT_32 mask) ///< [in] Bitmask to scan
{
ADDR_ASSERT(mask > 0);
unsigned long out = 0;
#if (defined(_WIN32) || defined(_WIN64))
::_BitScanReverse(&out, mask);
out ^= 31;
#elif defined(__GNUC__)
out = __builtin_clz(mask);
#else
out = 32;
while (mask != 0)
{
mask >>= 1;
out++;
}
out = sizeof(mask) * 8 - out;
#endif
return out;
}
/**
****************************************************************************************************
* IsPow2
@ -414,10 +436,10 @@ static inline UINT_64 IsPow2(
/**
****************************************************************************************************
* ByteAlign
* PowTwoAlign
*
* @brief
* Align UINT_32 "x" to "align" alignment, "align" should be power of 2
* Align UINT_32 "x" up to "align" alignment, "align" should be power of 2
****************************************************************************************************
*/
static inline UINT_32 PowTwoAlign(
@ -433,10 +455,10 @@ static inline UINT_32 PowTwoAlign(
/**
****************************************************************************************************
* ByteAlign
* PowTwoAlign
*
* @brief
* Align UINT_64 "x" to "align" alignment, "align" should be power of 2
* Align UINT_64 "x" up to "align" alignment, "align" should be power of 2
****************************************************************************************************
*/
static inline UINT_64 PowTwoAlign(
@ -450,6 +472,44 @@ static inline UINT_64 PowTwoAlign(
return (x + (align - 1)) & (~(align - 1));
}
/**
****************************************************************************************************
* PowTwoAlignDown
*
* @brief
* Align UINT_32 "x" down to "align" alignment, "align" should be power of 2
****************************************************************************************************
*/
static inline UINT_32 PowTwoAlignDown(
UINT_32 x,
UINT_32 align)
{
//
// Assert that x is a power of two.
//
ADDR_ASSERT(IsPow2(align));
return (x & ~(align - 1));
}
/**
****************************************************************************************************
* PowTwoAlignDown
*
* @brief
* Align UINT_64 "x" down to "align" alignment, "align" should be power of 2
****************************************************************************************************
*/
static inline UINT_64 PowTwoAlignDown(
UINT_64 x,
UINT_64 align)
{
//
// Assert that x is a power of two.
//
ADDR_ASSERT(IsPow2(align));
return (x & ~(align - 1));
}
/**
****************************************************************************************************
* Min
@ -571,44 +631,18 @@ static inline UINT_32 NextPow2(
return newDim;
}
/**
****************************************************************************************************
* Log2NonPow2
*
* @brief
* Compute log of base 2 no matter the target is power of 2 or not
****************************************************************************************************
*/
static inline UINT_32 Log2NonPow2(
UINT_32 x) ///< [in] the value should calculate log based 2
{
UINT_32 y;
y = 0;
while (x > 1)
{
x >>= 1;
y++;
}
return y;
}
/**
****************************************************************************************************
* Log2
*
* @brief
* Compute log of base 2
* Compute log of base 2 no matter the target is power of 2 or not. Returns 0 if 0.
****************************************************************************************************
*/
static inline UINT_32 Log2(
UINT_32 x) ///< [in] the value should calculate log based 2
{
// Assert that x is a power of two.
ADDR_ASSERT(IsPow2(x));
return Log2NonPow2(x);
return (x != 0) ? (31 ^ BitScanReverse(x)) : 0;
}
/**
@ -1081,6 +1115,72 @@ static inline UINT_32 ShiftRight(
return Max(a >> b, 1u);
}
/**
****************************************************************************************************
* VoidPtrDec
*
* @brief
* Subtracts a value to the given pointer directly.
****************************************************************************************************
*/
static inline void* VoidPtrDec(
void* pIn,
size_t offset)
{
return (void*)(((char*)(pIn)) - offset);
}
static inline const void* VoidPtrDec(
const void* pIn,
size_t offset)
{
return (const void*)(((const char*)(pIn)) - offset);
}
/**
****************************************************************************************************
* VoidPtrInc
*
* @brief
* Adds a value to the given pointer directly.
****************************************************************************************************
*/
static inline void* VoidPtrInc(
void* pIn,
size_t offset)
{
return (void*)(((char*)(pIn)) + offset);
}
static inline const void* VoidPtrInc(
const void* pIn,
size_t offset)
{
return (const void*)(((const char*)(pIn)) + offset);
}
/**
****************************************************************************************************
* VoidPtrXor
*
* @brief
* Xors a value to the given pointer directly.
****************************************************************************************************
*/
static inline void* VoidPtrXor(
void* pIn,
size_t offset)
{
return (void*)(((uintptr_t)(pIn)) ^ offset);
}
static inline const void* VoidPtrXor(
const void* pIn,
size_t offset)
{
return (const void*)(((uintptr_t)(pIn)) ^ offset);
}
} // Addr
#endif // __ADDR_COMMON_H__

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/
@ -668,4 +668,105 @@ UINT_32 Lib::GetBpe(AddrFormat format) const
return GetElemLib()->GetBitsPerPixel(format);
}
/**
************************************************************************************************************************
* Lib::ComputeOffsetFromSwizzlePattern
*
* @brief
* Compute offset from swizzle pattern
*
* @return
* Offset
************************************************************************************************************************
*/
UINT_32 Lib::ComputeOffsetFromSwizzlePattern(
const UINT_64* pPattern, ///< Swizzle pattern
UINT_32 numBits, ///< Number of bits in pattern
UINT_32 x, ///< x coord in pixel
UINT_32 y, ///< y coord in pixel
UINT_32 z, ///< z coord in slice
UINT_32 s ///< sample id
)
{
UINT_32 offset = 0;
const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
for (UINT_32 i = 0; i < numBits; i++)
{
UINT_32 v = 0;
if (pSwizzlePattern[i].x != 0)
{
UINT_16 mask = pSwizzlePattern[i].x;
UINT_32 xBits = x;
while (mask != 0)
{
if (mask & 1)
{
v ^= xBits & 1;
}
xBits >>= 1;
mask >>= 1;
}
}
if (pSwizzlePattern[i].y != 0)
{
UINT_16 mask = pSwizzlePattern[i].y;
UINT_32 yBits = y;
while (mask != 0)
{
if (mask & 1)
{
v ^= yBits & 1;
}
yBits >>= 1;
mask >>= 1;
}
}
if (pSwizzlePattern[i].z != 0)
{
UINT_16 mask = pSwizzlePattern[i].z;
UINT_32 zBits = z;
while (mask != 0)
{
if (mask & 1)
{
v ^= zBits & 1;
}
zBits >>= 1;
mask >>= 1;
}
}
if (pSwizzlePattern[i].s != 0)
{
UINT_16 mask = pSwizzlePattern[i].s;
UINT_32 sBits = s;
while (mask != 0)
{
if (mask & 1)
{
v ^= sBits & 1;
}
sBits >>= 1;
mask >>= 1;
}
}
offset |= (v << i);
}
return offset;
}
} // Addr

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/
@ -214,6 +214,73 @@ enum ShaderEngineTileSizeConfig
ADDR_CONFIG_SE_TILE_32 = 0x00000001,
};
/**
************************************************************************************************************************
* @brief Bit setting for swizzle pattern
************************************************************************************************************************
*/
union ADDR_BIT_SETTING
{
struct
{
UINT_16 x;
UINT_16 y;
UINT_16 z;
UINT_16 s;
};
UINT_64 value;
};
/**
************************************************************************************************************************
* InitBit
*
* @brief
* Initialize bit setting value via a return value
************************************************************************************************************************
*/
#define InitBit(c, index) (1ull << ((c << 4) + index))
const UINT_64 X0 = InitBit(0, 0);
const UINT_64 X1 = InitBit(0, 1);
const UINT_64 X2 = InitBit(0, 2);
const UINT_64 X3 = InitBit(0, 3);
const UINT_64 X4 = InitBit(0, 4);
const UINT_64 X5 = InitBit(0, 5);
const UINT_64 X6 = InitBit(0, 6);
const UINT_64 X7 = InitBit(0, 7);
const UINT_64 X8 = InitBit(0, 8);
const UINT_64 X9 = InitBit(0, 9);
const UINT_64 X10 = InitBit(0, 10);
const UINT_64 X11 = InitBit(0, 11);
const UINT_64 Y0 = InitBit(1, 0);
const UINT_64 Y1 = InitBit(1, 1);
const UINT_64 Y2 = InitBit(1, 2);
const UINT_64 Y3 = InitBit(1, 3);
const UINT_64 Y4 = InitBit(1, 4);
const UINT_64 Y5 = InitBit(1, 5);
const UINT_64 Y6 = InitBit(1, 6);
const UINT_64 Y7 = InitBit(1, 7);
const UINT_64 Y8 = InitBit(1, 8);
const UINT_64 Y9 = InitBit(1, 9);
const UINT_64 Y10 = InitBit(1, 10);
const UINT_64 Y11 = InitBit(1, 11);
const UINT_64 Z0 = InitBit(2, 0);
const UINT_64 Z1 = InitBit(2, 1);
const UINT_64 Z2 = InitBit(2, 2);
const UINT_64 Z3 = InitBit(2, 3);
const UINT_64 Z4 = InitBit(2, 4);
const UINT_64 Z5 = InitBit(2, 5);
const UINT_64 Z6 = InitBit(2, 6);
const UINT_64 Z7 = InitBit(2, 7);
const UINT_64 Z8 = InitBit(2, 8);
const UINT_64 S0 = InitBit(3, 0);
const UINT_64 S1 = InitBit(3, 1);
const UINT_64 S2 = InitBit(3, 2);
/**
****************************************************************************************************
* @brief This class contains asic independent address lib functionalities
@ -234,6 +301,9 @@ public:
}
static Lib* GetLib(ADDR_HANDLE hLib);
/// Returns which version of addrlib functions should be used.
virtual UINT_32 GetInterfaceVersion() const = 0;
/// Returns AddrLib version (from compiled binary instead include file)
UINT_32 GetVersion()
@ -263,6 +333,15 @@ public:
UINT_32 GetBpe(AddrFormat format) const;
static UINT_32 ComputeOffsetFromSwizzlePattern(
const UINT_64* pPattern,
UINT_32 numBits,
UINT_32 x,
UINT_32 y,
UINT_32 z,
UINT_32 s);
protected:
Lib(); // Constructor is protected
Lib(const Client* pClient);

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/
@ -70,6 +70,11 @@ public:
static Lib* GetLib(
ADDR_HANDLE hLib);
virtual UINT_32 GetInterfaceVersion() const
{
return 1;
}
/// Returns tileIndex support
BOOL_32 UseTileIndex(INT_32 index) const
{

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/
@ -382,6 +382,238 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoord(
return returnCode;
}
/**
************************************************************************************************************************
* Lib::CopyLinearSurface
*
* @brief
* Implements uncompressed linear copies between memory and images.
*
* @return
* ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::CopyLinearSurface(
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount,
bool surfaceIsDst) const
{
ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {{0}};
ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
ADDR_E_RETURNCODE returnCode = ADDR_OK;
if (pIn->numSamples > 1)
{
returnCode = ADDR_INVALIDPARAMS;
}
localIn.size = sizeof(localIn);
localIn.flags = pIn->flags;
localIn.swizzleMode = pIn->swizzleMode;
localIn.resourceType = pIn->resourceType;
localIn.format = pIn->format;
localIn.bpp = pIn->bpp;
localIn.width = Max(pIn->unAlignedDims.width, 1u);
localIn.height = Max(pIn->unAlignedDims.height, 1u);
localIn.numSlices = Max(pIn->unAlignedDims.depth, 1u);
localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
localIn.numSamples = Max(pIn->numSamples, 1u);
if (localIn.numMipLevels <= 1)
{
localIn.pitchInElement = pIn->pitchInElement;
}
localOut.size = sizeof(localOut);
localOut.pMipInfo = mipInfo;
if (returnCode == ADDR_OK)
{
returnCode = ComputeSurfaceInfo(&localIn, &localOut);
}
if (returnCode == ADDR_OK)
{
for (UINT_32 regionIdx = 0; regionIdx < regionCount; regionIdx++)
{
const ADDR2_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
void* pMipBase = VoidPtrInc(pIn->pMappedSurface,
(pIn->singleSubres ? 0 : mipInfo[pCurRegion->mipId].offset));
const size_t lineSizeBytes = (localIn.bpp >> 3) * pCurRegion->copyDims.width;
const size_t lineImgPitchBytes = (localIn.bpp >> 3) * mipInfo[pCurRegion->mipId].pitch;
for (UINT_32 sliceIdx = 0; sliceIdx < pCurRegion->copyDims.depth; sliceIdx++)
{
UINT_32 sliceCoord = sliceIdx + pCurRegion->slice;
size_t imgOffsetInMip = (localOut.sliceSize * sliceCoord) +
(lineImgPitchBytes * pCurRegion->y) +
(pCurRegion->x * (pIn->bpp >> 3));
size_t memOffset = sliceIdx * pCurRegion->memSlicePitch;
for (UINT_32 yIdx = 0; yIdx < pCurRegion->copyDims.height; yIdx++)
{
if (surfaceIsDst)
{
memcpy(VoidPtrInc(pMipBase, imgOffsetInMip), VoidPtrInc(pCurRegion->pMem, memOffset), lineSizeBytes);
}
else
{
memcpy(VoidPtrInc(pCurRegion->pMem, memOffset), VoidPtrInc(pMipBase, imgOffsetInMip), lineSizeBytes);
}
imgOffsetInMip += lineImgPitchBytes;
memOffset += pCurRegion->memRowPitch;
}
}
}
}
return returnCode;
}
/**
************************************************************************************************************************
* Lib::CopyMemToSurface
*
* @brief
* Interface function stub of Addr2CopyMemToSurface.
*
* @return
* ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::CopyMemToSurface(
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount) const
{
ADDR_E_RETURNCODE returnCode = ADDR_OK;
if ((regionCount == 0) || (pRegions == NULL))
{
returnCode = ADDR_INVALIDPARAMS;
}
else if (GetFillSizeFieldsFlags() == TRUE)
{
if (pIn->size != sizeof(ADDR2_COPY_MEMSURFACE_INPUT))
{
returnCode = ADDR_INVALIDPARAMS;
}
else
{
UINT_32 baseSlice = pRegions[0].slice;
UINT_32 baseMip = pRegions[0].mipId;
BOOL_32 singleSubres = pIn->singleSubres;
for (UINT_32 i = 0; i < regionCount; i++)
{
if (pRegions[i].size != sizeof(ADDR2_COPY_MEMSURFACE_REGION))
{
returnCode = ADDR_INVALIDPARAMS;
break;
}
if (singleSubres &&
((pRegions[i].copyDims.depth != 1) ||
(pRegions[i].slice != baseSlice) ||
(pRegions[i].mipId != baseMip)))
{
// Copy will cover multiple/interleaved subresources, a
// mapped pointer to a single subres cannot be valid.
returnCode = ADDR_INVALIDPARAMS;
break;
}
}
}
}
if (returnCode == ADDR_OK)
{
if (IsLinear(pIn->swizzleMode))
{
returnCode = CopyLinearSurface(pIn, pRegions, regionCount, true);
}
else
{
returnCode = HwlCopyMemToSurface(pIn, pRegions, regionCount);
}
}
return returnCode;
}
/**
************************************************************************************************************************
* Lib::CopySurfaceToMem
*
* @brief
* Interface function stub of Addr2CopySurfaceToMem.
*
* @return
* ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::CopySurfaceToMem(
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount) const
{
ADDR_E_RETURNCODE returnCode = ADDR_OK;
if (regionCount == 0)
{
returnCode = ADDR_INVALIDPARAMS;
}
else if (GetFillSizeFieldsFlags() == TRUE)
{
if (pIn->size != sizeof(ADDR2_COPY_MEMSURFACE_INPUT))
{
returnCode = ADDR_INVALIDPARAMS;
}
else
{
UINT_32 baseSlice = pRegions[0].slice;
UINT_32 baseMip = pRegions[0].mipId;
BOOL_32 singleSubres = pIn->singleSubres;
for (UINT_32 i = 0; i < regionCount; i++)
{
if (pRegions[i].size != sizeof(ADDR2_COPY_MEMSURFACE_REGION))
{
returnCode = ADDR_INVALIDPARAMS;
break;
}
if (singleSubres &&
((pRegions[i].copyDims.depth != 1) ||
(pRegions[i].slice != baseSlice) ||
(pRegions[i].mipId != baseMip)))
{
// Copy will cover multiple/interleaved subresources, a
// mapped pointer to a single subres cannot be valid.
returnCode = ADDR_INVALIDPARAMS;
break;
}
}
}
}
if (returnCode == ADDR_OK)
{
if (IsLinear(pIn->swizzleMode))
{
returnCode = CopyLinearSurface(pIn, pRegions, regionCount, false);
}
else
{
returnCode = HwlCopySurfaceToMem(pIn, pRegions, regionCount);
}
}
return returnCode;
}
/**
************************************************************************************************************************
* Lib::ComputeSurfaceCoordFromAddr

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/
@ -74,23 +74,6 @@ struct Dim3d
UINT_32 d;
};
// Macro define resource block type
enum AddrBlockType
{
AddrBlockLinear = 0, // Resource uses linear swizzle mode
AddrBlockMicro = 1, // Resource uses 256B block
AddrBlockThin4KB = 2, // Resource uses thin 4KB block
AddrBlockThick4KB = 3, // Resource uses thick 4KB block
AddrBlockThin64KB = 4, // Resource uses thin 64KB block
AddrBlockThick64KB = 5, // Resource uses thick 64KB block
AddrBlockThinVar = 6, // Resource uses thin var block
AddrBlockThickVar = 7, // Resource uses thick var block
AddrBlockMaxTiledType,
AddrBlockThin256KB = AddrBlockThinVar,
AddrBlockThick256KB = AddrBlockThickVar,
};
enum AddrSwSet
{
AddrSwSetZ = 1 << ADDR_SW_Z,
@ -109,23 +92,6 @@ const UINT_32 Log2Size256 = 8u;
const UINT_32 Log2Size4K = 12u;
const UINT_32 Log2Size64K = 16u;
/**
************************************************************************************************************************
* @brief Bit setting for swizzle pattern
************************************************************************************************************************
*/
union ADDR_BIT_SETTING
{
struct
{
UINT_16 x;
UINT_16 y;
UINT_16 z;
UINT_16 s;
};
UINT_64 value;
};
/**
************************************************************************************************************************
* @brief Swizzle pattern information
@ -142,55 +108,6 @@ struct ADDR_SW_PATINFO
UINT_8 nibble4Idx;
};
/**
************************************************************************************************************************
* InitBit
*
* @brief
* Initialize bit setting value via a return value
************************************************************************************************************************
*/
#define InitBit(c, index) (1ull << ((c << 4) + index))
const UINT_64 X0 = InitBit(0, 0);
const UINT_64 X1 = InitBit(0, 1);
const UINT_64 X2 = InitBit(0, 2);
const UINT_64 X3 = InitBit(0, 3);
const UINT_64 X4 = InitBit(0, 4);
const UINT_64 X5 = InitBit(0, 5);
const UINT_64 X6 = InitBit(0, 6);
const UINT_64 X7 = InitBit(0, 7);
const UINT_64 X8 = InitBit(0, 8);
const UINT_64 X9 = InitBit(0, 9);
const UINT_64 X10 = InitBit(0, 10);
const UINT_64 X11 = InitBit(0, 11);
const UINT_64 Y0 = InitBit(1, 0);
const UINT_64 Y1 = InitBit(1, 1);
const UINT_64 Y2 = InitBit(1, 2);
const UINT_64 Y3 = InitBit(1, 3);
const UINT_64 Y4 = InitBit(1, 4);
const UINT_64 Y5 = InitBit(1, 5);
const UINT_64 Y6 = InitBit(1, 6);
const UINT_64 Y7 = InitBit(1, 7);
const UINT_64 Y8 = InitBit(1, 8);
const UINT_64 Y9 = InitBit(1, 9);
const UINT_64 Y10 = InitBit(1, 10);
const UINT_64 Y11 = InitBit(1, 11);
const UINT_64 Z0 = InitBit(2, 0);
const UINT_64 Z1 = InitBit(2, 1);
const UINT_64 Z2 = InitBit(2, 2);
const UINT_64 Z3 = InitBit(2, 3);
const UINT_64 Z4 = InitBit(2, 4);
const UINT_64 Z5 = InitBit(2, 5);
const UINT_64 Z6 = InitBit(2, 6);
const UINT_64 Z7 = InitBit(2, 7);
const UINT_64 Z8 = InitBit(2, 8);
const UINT_64 S0 = InitBit(3, 0);
const UINT_64 S1 = InitBit(3, 1);
const UINT_64 S2 = InitBit(3, 2);
/**
************************************************************************************************************************
@ -205,6 +122,10 @@ public:
static Lib* GetLib(
ADDR_HANDLE hLib);
virtual UINT_32 GetInterfaceVersion() const
{
return 2;
}
//
// Interface stubs
//
@ -221,6 +142,16 @@ public:
ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddr(
const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
ADDR_E_RETURNCODE CopyMemToSurface(
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount) const;
ADDR_E_RETURNCODE CopySurfaceToMem(
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount) const;
// For HTile
ADDR_E_RETURNCODE ComputeHtileInfo(
@ -473,7 +404,7 @@ protected:
sample = (sample == 0) ? 1 : sample;
frag = (frag == 0) ? sample : frag;
UINT_32 fmaskBpp = QLog2(frag);
UINT_32 fmaskBpp = Log2(frag);
if (sample > frag)
{
@ -725,6 +656,24 @@ protected:
return ADDR_NOTIMPLEMENTED;
}
virtual ADDR_E_RETURNCODE HwlCopyMemToSurface(
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount) const
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
}
virtual ADDR_E_RETURNCODE HwlCopySurfaceToMem(
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount) const
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
}
ADDR_E_RETURNCODE ComputeBlock256Equation(
AddrResourceType rsrcType,
AddrSwizzleMode swMode,
@ -754,6 +703,12 @@ protected:
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
ADDR_E_RETURNCODE CopyLinearSurface(
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount,
bool surfaceIsDst) const;
ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordLinear(
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
@ -892,13 +847,13 @@ protected:
{
case ADDR_RSRC_TEX_3D:
// Fall through to share 2D case
actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->numSlices) + 1);
actualMipLevels = Max(actualMipLevels, Log2(pIn->numSlices) + 1);
case ADDR_RSRC_TEX_2D:
// Fall through to share 1D case
actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->height) + 1);
actualMipLevels = Max(actualMipLevels, Log2(pIn->height) + 1);
case ADDR_RSRC_TEX_1D:
// Base 1D case
actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->width) + 1);
actualMipLevels = Max(actualMipLevels, Log2(pIn->width) + 1);
break;
default:
ADDR_ASSERT_ALWAYS();
@ -976,8 +931,6 @@ protected:
static const UINT_32 MaxSwModeType = 32;
// Max number of resource type (2D/3D) supported for equation
static const UINT_32 MaxRsrcType = 2;
// Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp)
static const UINT_32 MaxElementBytesLog2 = 5;
// Almost all swizzle mode + resource type support equation
static const UINT_32 EquationTableSize = MaxElementBytesLog2 * MaxSwModeType * MaxRsrcType;
// Equation table

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/
@ -82,7 +82,7 @@ void Lib::Init()
// There is no equation table entry for linear, so start at the "next" swizzle mode entry.
for (UINT_32 swizzleModeIdx = ADDR3_LINEAR + 1; swizzleModeIdx < ADDR3_MAX_TYPE; swizzleModeIdx++)
{
for (UINT_32 msaaRateIdx = 0; msaaRateIdx < MaxMsaaRateLog2; msaaRateIdx++)
for (UINT_32 msaaRateIdx = 0; msaaRateIdx < MaxNumMsaaRates; msaaRateIdx++)
{
for (UINT_32 log2BytesIdx = 0; log2BytesIdx < MaxElementBytesLog2; log2BytesIdx++)
{
@ -276,6 +276,11 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceInfo(
}
}
if (returnCode == ADDR_OK)
{
returnCode = ComputeSurfaceInfoSanityCheck(&localIn);
}
if (returnCode == ADDR_OK)
{
returnCode = HwlComputeSurfaceInfo(&localIn, pOut);
@ -480,6 +485,242 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoord(
return returnCode;
}
/**
************************************************************************************************************************
* Lib::CopyLinearSurface
*
* @brief
* Implements uncompressed linear copies between memory and images.
*
* @return
* ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::CopyLinearSurface(
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount,
bool surfaceIsDst) const
{
ADDR3_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
ADDR3_MIP_INFO mipInfo[Addr3MaxMipLevels] = {{0}};
ADDR_ASSERT(pIn->numMipLevels <= Addr3MaxMipLevels);
ADDR_E_RETURNCODE returnCode = ADDR_OK;
if (pIn->numSamples > 1)
{
returnCode = ADDR_INVALIDPARAMS;
}
localIn.size = sizeof(localIn);
localIn.flags = pIn->flags;
localIn.swizzleMode = ADDR3_LINEAR;
localIn.resourceType = pIn->resourceType;
localIn.format = pIn->format;
localIn.bpp = pIn->bpp;
localIn.width = Max(pIn->unAlignedDims.width, 1u);
localIn.height = Max(pIn->unAlignedDims.height, 1u);
localIn.numSlices = Max(pIn->unAlignedDims.depth, 1u);
localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
localIn.numSamples = Max(pIn->numSamples, 1u);
if (localIn.numMipLevels <= 1)
{
localIn.pitchInElement = pIn->pitchInElement;
}
localOut.size = sizeof(localOut);
localOut.pMipInfo = mipInfo;
if (returnCode == ADDR_OK)
{
returnCode = ComputeSurfaceInfo(&localIn, &localOut);
}
if (returnCode == ADDR_OK)
{
for (UINT_32 regionIdx = 0; regionIdx < regionCount; regionIdx++)
{
const ADDR3_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
void* pMipBase = VoidPtrInc(pIn->pMappedSurface,
(pIn->singleSubres ? 0 : mipInfo[pCurRegion->mipId].offset));
const size_t lineSizeBytes = (localIn.bpp >> 3) * pCurRegion->copyDims.width;
const size_t lineImgPitchBytes = (localIn.bpp >> 3) * mipInfo[pCurRegion->mipId].pitch;
for (UINT_32 sliceIdx = 0; sliceIdx < pCurRegion->copyDims.depth; sliceIdx++)
{
UINT_32 sliceCoord = sliceIdx + pCurRegion->slice;
size_t imgOffsetInMip = (localOut.sliceSize * sliceCoord) +
(lineImgPitchBytes * pCurRegion->y) +
(pCurRegion->x * (pIn->bpp >> 3));
size_t memOffset = sliceIdx * pCurRegion->memSlicePitch;
for (UINT_32 yIdx = 0; yIdx < pCurRegion->copyDims.height; yIdx++)
{
if (surfaceIsDst)
{
memcpy(VoidPtrInc(pMipBase, imgOffsetInMip),
VoidPtrInc(pCurRegion->pMem, memOffset),
lineSizeBytes);
}
else
{
memcpy(VoidPtrInc(pCurRegion->pMem, memOffset),
VoidPtrInc(pMipBase, imgOffsetInMip),
lineSizeBytes);
}
imgOffsetInMip += lineImgPitchBytes;
memOffset += pCurRegion->memRowPitch;
}
}
}
}
return returnCode;
}
/**
************************************************************************************************************************
* Lib::CopyMemToSurface
*
* @brief
* Interface function stub of Addr3CopyMemToSurface.
*
* @return
* ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::CopyMemToSurface(
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount) const
{
ADDR_E_RETURNCODE returnCode = ADDR_OK;
if ((regionCount == 0) || (pRegions == NULL))
{
returnCode = ADDR_INVALIDPARAMS;
}
else if (GetFillSizeFieldsFlags() == TRUE)
{
if (pIn->size != sizeof(ADDR3_COPY_MEMSURFACE_INPUT))
{
returnCode = ADDR_INVALIDPARAMS;
}
else
{
UINT_32 baseSlice = pRegions[0].slice;
UINT_32 baseMip = pRegions[0].mipId;
BOOL_32 singleSubres = pIn->singleSubres;
for (UINT_32 i = 0; i < regionCount; i++)
{
if (pRegions[i].size != sizeof(ADDR3_COPY_MEMSURFACE_REGION))
{
returnCode = ADDR_INVALIDPARAMS;
break;
}
if (singleSubres &&
((pRegions[i].copyDims.depth != 1) ||
(pRegions[i].slice != baseSlice) ||
(pRegions[i].mipId != baseMip)))
{
// Copy will cover multiple/interleaved subresources, a
// mapped pointer to a single subres cannot be valid.
returnCode = ADDR_INVALIDPARAMS;
break;
}
}
}
}
if (returnCode == ADDR_OK)
{
if (IsLinear(pIn->swizzleMode))
{
returnCode = CopyLinearSurface(pIn, pRegions, regionCount, true);
}
else
{
returnCode = HwlCopyMemToSurface(pIn, pRegions, regionCount);
}
}
return returnCode;
}
/**
************************************************************************************************************************
* Lib::CopySurfaceToMem
*
* @brief
* Interface function stub of Addr3CopySurfaceToMem.
*
* @return
* ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::CopySurfaceToMem(
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount) const
{
ADDR_E_RETURNCODE returnCode = ADDR_OK;
if (regionCount == 0)
{
returnCode = ADDR_INVALIDPARAMS;
}
else if (GetFillSizeFieldsFlags() == TRUE)
{
if (pIn->size != sizeof(ADDR3_COPY_MEMSURFACE_INPUT))
{
returnCode = ADDR_INVALIDPARAMS;
}
else
{
UINT_32 baseSlice = pRegions[0].slice;
UINT_32 baseMip = pRegions[0].mipId;
BOOL_32 singleSubres = pIn->singleSubres;
for (UINT_32 i = 0; i < regionCount; i++)
{
if (pRegions[i].size != sizeof(ADDR3_COPY_MEMSURFACE_REGION))
{
returnCode = ADDR_INVALIDPARAMS;
break;
}
if (singleSubres &&
((pRegions[i].copyDims.depth != 1) ||
(pRegions[i].slice != baseSlice) ||
(pRegions[i].mipId != baseMip)))
{
// Copy will cover multiple/interleaved subresources, a
// mapped pointer to a single subres cannot be valid.
returnCode = ADDR_INVALIDPARAMS;
break;
}
}
}
}
if (returnCode == ADDR_OK)
{
if (IsLinear(pIn->swizzleMode))
{
returnCode = CopyLinearSurface(pIn, pRegions, regionCount, false);
}
else
{
returnCode = HwlCopySurfaceToMem(pIn, pRegions, regionCount);
}
}
return returnCode;
}
/**
************************************************************************************************************************
* Lib::ComputeSurfaceAddrFromCoord
@ -776,14 +1017,21 @@ ADDR_E_RETURNCODE Lib::ApplyCustomizedPitchHeight(
const UINT_32 elementBytes = pIn->bpp >> 3;
// Normal pitch of image data
const UINT_32 pitchAlignmentBytes = 1 << GetBlockSizeLog2(pIn->swizzleMode, TRUE);
const UINT_32 pitchAlignmentElements = pitchAlignmentBytes / elementBytes;
pOut->pitch = PowTwoAlign(pIn->width, pitchAlignmentElements);
UINT_32 pitchAlignmentElements = pOut->blockExtent.width;
UINT_32 pitchSliceAlignmentElements = pOut->blockExtent.width;
// Pitch of image data used for slice sizing (same except for linear images)
const UINT_32 pitchSliceAlignmentBytes = 1 << GetBlockSizeLog2(pIn->swizzleMode, CanTrimLinearPadding(pIn));
const UINT_32 pitchSliceAlignmentElements = pitchSliceAlignmentBytes / elementBytes;
if (IsLinear(pIn->swizzleMode))
{
// Normal pitch of image data
const UINT_32 pitchAlignmentBytes = 1 << GetBlockSizeLog2(pIn->swizzleMode, TRUE);
pitchAlignmentElements = pitchAlignmentBytes / elementBytes;
// Pitch of image data used for slice sizing
const UINT_32 pitchSliceAlignmentBytes = 1 << GetBlockSizeLog2(pIn->swizzleMode, CanTrimLinearPadding(pIn));
pitchSliceAlignmentElements = pitchSliceAlignmentBytes / elementBytes;
}
pOut->pitch = PowTwoAlign(pIn->width, pitchAlignmentElements);
pOut->pitchForSlice = PowTwoAlign(pIn->width, pitchSliceAlignmentElements);
UINT_32 heightAlign = pOut->blockExtent.height;
@ -854,6 +1102,7 @@ ADDR_E_RETURNCODE Lib::ApplyCustomizedPitchHeight(
return returnCode;
}
/**
************************************************************************************************************************
* Lib::ComputeQbStereoInfo
@ -889,5 +1138,34 @@ VOID Lib::ComputeQbStereoInfo(
pOut->sliceSize <<= 1;
}
/**
************************************************************************************************************************
* Lib::ComputeSurfaceInfoSanityCheck
*
* @brief
* Internal function to do basic sanity check before compute surface info
*
* @return
* ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeSurfaceInfoSanityCheck(
const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
) const
{
ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT localIn = {};
localIn.size = sizeof(ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT);
localIn.flags = pIn->flags;
localIn.resourceType = pIn->resourceType;
localIn.bpp = pIn->bpp;
localIn.width = pIn->width;
localIn.height = pIn->height;
localIn.numSlices = pIn->numSlices;
localIn.numMipLevels = pIn->numMipLevels;
localIn.numSamples = pIn->numSamples;
return HwlValidateNonSwModeParams(&localIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
}
} // V3
} // Addr

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/
@ -28,6 +28,7 @@ constexpr UINT_32 Size256 = 256u;
constexpr UINT_32 Size4K = 4 * 1024;
constexpr UINT_32 Size64K = 64 * 1024;
constexpr UINT_32 Size256K = 256 * 1024;
constexpr UINT_32 Addr3MaxMipLevels = 16; // Max Mip Levels across all addr3 chips
struct ADDR3_COORD
{
@ -46,23 +47,6 @@ struct ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT
void* pvAddrParams;
};
/**
************************************************************************************************************************
* @brief Bit setting for swizzle pattern
************************************************************************************************************************
*/
union ADDR_BIT_SETTING
{
struct
{
UINT_16 x;
UINT_16 y;
UINT_16 z;
UINT_16 s;
};
UINT_64 value;
};
/**
************************************************************************************************************************
* @brief Flags for SwizzleModeTable
@ -108,53 +92,6 @@ struct ADDR_SW_PATINFO
UINT_8 nibble4Idx;
};
/**
************************************************************************************************************************
* InitBit
*
* @brief
* Initialize bit setting value via a return value
************************************************************************************************************************
*/
#define InitBit(c, index) (1ull << ((c << 4) + index))
const UINT_64 X0 = InitBit(0, 0);
const UINT_64 X1 = InitBit(0, 1);
const UINT_64 X2 = InitBit(0, 2);
const UINT_64 X3 = InitBit(0, 3);
const UINT_64 X4 = InitBit(0, 4);
const UINT_64 X5 = InitBit(0, 5);
const UINT_64 X6 = InitBit(0, 6);
const UINT_64 X7 = InitBit(0, 7);
const UINT_64 X8 = InitBit(0, 8);
const UINT_64 Y0 = InitBit(1, 0);
const UINT_64 Y1 = InitBit(1, 1);
const UINT_64 Y2 = InitBit(1, 2);
const UINT_64 Y3 = InitBit(1, 3);
const UINT_64 Y4 = InitBit(1, 4);
const UINT_64 Y5 = InitBit(1, 5);
const UINT_64 Y6 = InitBit(1, 6);
const UINT_64 Y7 = InitBit(1, 7);
const UINT_64 Y8 = InitBit(1, 8);
const UINT_64 Z0 = InitBit(2, 0);
const UINT_64 Z1 = InitBit(2, 1);
const UINT_64 Z2 = InitBit(2, 2);
const UINT_64 Z3 = InitBit(2, 3);
const UINT_64 Z4 = InitBit(2, 4);
const UINT_64 Z5 = InitBit(2, 5);
const UINT_64 S0 = InitBit(3, 0);
const UINT_64 S1 = InitBit(3, 1);
const UINT_64 S2 = InitBit(3, 2);
/**
************************************************************************************************************************
* @brief Bit setting for swizzle pattern
************************************************************************************************************************
*/
/**
************************************************************************************************************************
* @brief This class contains asic independent address lib functionalities
@ -168,6 +105,11 @@ public:
static Lib* GetLib(
ADDR_HANDLE hLib);
virtual UINT_32 GetInterfaceVersion() const
{
return 3;
}
//
// Interface stubs
//
@ -185,6 +127,16 @@ public:
const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
ADDR_E_RETURNCODE CopyMemToSurface(
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount) const;
ADDR_E_RETURNCODE CopySurfaceToMem(
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount) const;
// Misc
ADDR_E_RETURNCODE ComputePipeBankXor(
const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn,
@ -212,18 +164,16 @@ protected:
SwizzleModeFlags m_swizzleModeTable[ADDR3_MAX_TYPE]; ///< Swizzle mode table
// Number of unique MSAA sample rates (1/2/4/8)
static const UINT_32 MaxMsaaRateLog2 = 4;
// Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp)
static const UINT_32 MaxElementBytesLog2 = 5;
static const UINT_32 MaxNumMsaaRates = 4;
// Number of equation entries in the table
UINT_32 m_numEquations;
// Swizzle equation lookup table according to swizzle mode, MSAA sample rate and bpp. This does not include linear.
UINT_32 m_equationLookupTable[ADDR3_MAX_TYPE - 1][MaxMsaaRateLog2][MaxElementBytesLog2];
UINT_32 m_equationLookupTable[ADDR3_MAX_TYPE - 1][MaxNumMsaaRates][MaxElementBytesLog2];
// Block dimension lookup table according to swizzle mode, MSAA sample rate and bpp. This includes linear.
ADDR_EXTENT3D m_blockDimensionTable[ADDR3_MAX_TYPE][MaxMsaaRateLog2][MaxElementBytesLog2];
ADDR_EXTENT3D m_blockDimensionTable[ADDR3_MAX_TYPE][MaxNumMsaaRates][MaxElementBytesLog2];
virtual ADDR_E_RETURNCODE HwlComputeStereoInfo(
const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn,
@ -333,7 +283,7 @@ protected:
// The max alignment is tied to the swizzle mode and since the largest swizzle mode is 256kb, so the maximal
// alignment is also 256kb.
virtual UINT_32 HwlComputeMaxBaseAlignments() const { return Size256K; }
virtual UINT_32 HwlComputeMaxBaseAlignments() const { return Size256K; }
virtual ADDR_E_RETURNCODE HwlGetPossibleSwizzleModes(
const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT* pIn,
@ -357,6 +307,24 @@ protected:
return ADDR_NOTSUPPORTED;
}
virtual ADDR_E_RETURNCODE HwlCopyMemToSurface(
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount) const
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
}
virtual ADDR_E_RETURNCODE HwlCopySurfaceToMem(
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount) const
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
}
virtual ADDR_E_RETURNCODE HwlComputePipeBankXor(
const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn,
ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
@ -373,6 +341,12 @@ protected:
const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
const ADDR_EXTENT3D& blockDims) const;
ADDR_E_RETURNCODE CopyLinearSurface(
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount,
bool surfaceIsDst) const;
ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordLinear(
const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
@ -457,6 +431,8 @@ protected:
virtual BOOL_32 HwlValidateNonSwModeParams(const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT* pIn) const = 0;
ADDR_E_RETURNCODE ComputeSurfaceInfoSanityCheck(const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
private:
// Disallow the copy constructor
Lib(const Lib& a);

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

View file

@ -0,0 +1,441 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
***********************************************************************************************************************/
/**
****************************************************************************************************
* @file addrswizzler.cpp
* @brief Contains code for efficient CPU swizzling.
****************************************************************************************************
*/
#include "addrswizzler.h"
namespace Addr
{
/**
****************************************************************************************************
* LutAddresser::LutAddresser
*
* @brief
* Constructor for the LutAddresser class.
****************************************************************************************************
*/
LutAddresser::LutAddresser()
:
m_pXLut(&m_lutData[0]),
m_pYLut(&m_lutData[0]),
m_pZLut(&m_lutData[0]),
m_pSLut(&m_lutData[0]),
m_xLutMask(0),
m_yLutMask(0),
m_zLutMask(0),
m_sLutMask(0),
m_blockBits(0),
m_blockSize(),
m_bpeLog2(0),
m_bit(),
m_lutData()
{
}
/**
****************************************************************************************************
* LutAddresser::Init
*
* @brief
* Calculates general properties about the swizzle
****************************************************************************************************
*/
void LutAddresser::Init(
const ADDR_BIT_SETTING* pEq,
UINT_32 eqSize,
ADDR_EXTENT3D blockSize,
UINT_8 blockBits)
{
ADDR_ASSERT(eqSize <= ADDR_MAX_EQUATION_BIT);
memcpy(&m_bit[0], pEq, sizeof(ADDR_BIT_SETTING) * eqSize);
m_blockSize = blockSize;
m_blockBits = blockBits;
InitSwizzleProps();
InitLuts();
}
/**
****************************************************************************************************
* LutAddresser::InitSwizzleProps
*
* @brief
* Calculates general properties about the swizzle
****************************************************************************************************
*/
void LutAddresser::InitSwizzleProps()
{
// Calculate BPE from the swizzle. This can be derived from the number of invalid low bits.
m_bpeLog2 = 0;
for (UINT_32 i = 0; i < MaxElementBytesLog2; i++)
{
if (m_bit[i].value != 0)
{
break;
}
m_bpeLog2++;
}
// Generate a mask/size for each channel's LUT. This may be larger than the block size.
// If a given 'source' bit (eg. 'x0') is used for any part of the equation, fill that in the mask.
for (UINT_32 i = 0; i < ADDR_MAX_EQUATION_BIT; i++)
{
m_xLutMask |= m_bit[i].x;
m_yLutMask |= m_bit[i].y;
m_zLutMask |= m_bit[i].z;
m_sLutMask |= m_bit[i].s;
}
// An expandX of 1 is a no-op
m_maxExpandX = 1;
if (m_sLutMask == 0)
{
// Calculate expandX from the swizzle. This can be derived from the number of consecutive,
// increasing low x bits
for (UINT_32 i = 0; i < 3; i++)
{
const auto& curBit = m_bit[m_bpeLog2 + i];
ADDR_ASSERT(curBit.value != 0);
if ((IsPow2(curBit.value) == false) || // More than one bit contributes
(curBit.x == 0) || // Bit is from Y/Z/S channel
(curBit.x != m_maxExpandX)) // X bits are out of order
{
break;
}
m_maxExpandX *= 2;
}
}
}
/**
****************************************************************************************************
* LutAddresser::InitLuts
*
* @brief
* Creates lookup tables for each channel.
****************************************************************************************************
*/
void LutAddresser::InitLuts()
{
UINT_32 curOffset = 0;
m_pXLut = &m_lutData[0];
for (UINT_32 x = 0; x < (m_xLutMask + 1); x++)
{
m_pXLut[x] = EvalEquation(x, 0, 0, 0);
}
curOffset += m_xLutMask + 1;
ADDR_ASSERT(curOffset <= MaxLutSize);
if (m_yLutMask != 0)
{
m_pYLut = &m_lutData[curOffset];
for (UINT_32 y = 0; y < (m_yLutMask + 1); y++)
{
m_pYLut[y] = EvalEquation(0, y, 0, 0);
}
curOffset += m_yLutMask + 1;
ADDR_ASSERT(curOffset <= MaxLutSize);
}
else
{
m_pYLut = &m_lutData[0];
ADDR_ASSERT(m_pYLut[0] == 0);
}
if (m_zLutMask != 0)
{
m_pZLut = &m_lutData[curOffset];
for (UINT_32 z = 0; z < (m_zLutMask + 1); z++)
{
m_pZLut[z] = EvalEquation(0, 0, z, 0);
}
curOffset += m_zLutMask + 1;
ADDR_ASSERT(curOffset <= MaxLutSize);
}
else
{
m_pZLut = &m_lutData[0];
ADDR_ASSERT(m_pZLut[0] == 0);
}
if (m_sLutMask != 0)
{
m_pSLut = &m_lutData[curOffset];
for (UINT_32 s = 0; s < (m_sLutMask + 1); s++)
{
m_pSLut[s] = EvalEquation(0, 0, 0, s);
}
curOffset += m_sLutMask + 1;
ADDR_ASSERT(curOffset <= MaxLutSize);
}
else
{
m_pSLut = &m_lutData[0];
ADDR_ASSERT(m_pSLut[0] == 0);
}
}
/**
****************************************************************************************************
* LutAddresser::EvalEquation
*
* @brief
* Evaluates the equation at a given coordinate manually.
****************************************************************************************************
*/
UINT_32 LutAddresser::EvalEquation(
UINT_32 x,
UINT_32 y,
UINT_32 z,
UINT_32 s)
{
UINT_32 out = 0;
for (UINT_32 i = 0; i < ADDR_MAX_EQUATION_BIT; i++)
{
if (m_bit[i].value == 0)
{
if (out != 0)
{
// Invalid bits at the top of the equation
break;
}
else
{
continue;
}
}
if (x != 0)
{
UINT_32 xSrcs = m_bit[i].x;
while (xSrcs != 0)
{
UINT_32 xIdx = BitScanForward(xSrcs);
out ^= (((x >> xIdx) & 1) << i);
xSrcs = UnsetLeastBit(xSrcs);
}
}
if (y != 0)
{
UINT_32 ySrcs = m_bit[i].y;
while (ySrcs != 0)
{
UINT_32 yIdx = BitScanForward(ySrcs);
out ^= (((y >> yIdx) & 1) << i);
ySrcs = UnsetLeastBit(ySrcs);
}
}
if (z != 0)
{
UINT_32 zSrcs = m_bit[i].z;
while (zSrcs != 0)
{
UINT_32 zIdx = BitScanForward(zSrcs);
out ^= (((z >> zIdx) & 1) << i);
zSrcs = UnsetLeastBit(zSrcs);
}
}
if (s != 0)
{
UINT_32 sSrcs = m_bit[i].s;
while (sSrcs != 0)
{
UINT_32 sIdx = BitScanForward(sSrcs);
out ^= (((s >> sIdx) & 1) << i);
sSrcs = UnsetLeastBit(sSrcs);
}
}
}
return out;
}
/**
****************************************************************************************************
* Copy2DSliceUnaligned
*
* @brief
* Copies an arbitrary 2D pixel region to or from a surface.
****************************************************************************************************
*/
template <int BPELog2, int ExpandX, bool ImgIsDest>
void Copy2DSliceUnaligned(
void* pImgBlockSliceStart, // Block corresponding to beginning of slice
void* pBuf, // Pointer to data starting from the copy origin.
size_t bufStrideY, // Stride of each row in pBuf
UINT_32 imageBlocksY, // Width of the image slice, in blocks.
ADDR_COORD2D origin, // Absolute origin, in elements
ADDR_EXTENT2D extent, // Size to copy, in elements
UINT_32 sliceXor, // Includes pipeBankXor and z XOR
const LutAddresser& addresser)
{
UINT_32 xStart = origin.x;
UINT_32 xEnd = origin.x + extent.width;
constexpr UINT_32 PixBytes = (1 << BPELog2);
// Apply a negative offset now so later code can do eg. pBuf[x] instead of pBuf[x - origin.x]
pBuf = VoidPtrDec(pBuf, xStart * PixBytes);
// Do things one row at a time for unaligned regions.
for (UINT_32 y = origin.y; y < (origin.y + extent.height); y++)
{
UINT_32 yBlk = (y >> addresser.GetBlockYBits()) * imageBlocksY;
UINT_32 rowXor = sliceXor ^ addresser.GetAddressY(y);
UINT_32 x = xStart;
// Most swizzles pack 2-4 pixels horizontally. Take advantage of this even in non-microblock-aligned
// regions to commonly do 2-4x less work. This is still way less good than copying by whole microblocks though.
if (ExpandX > 1)
{
// Unaligned left edge
for (; x < Min(xEnd, PowTwoAlign(xStart, ExpandX)); x++)
{
UINT_32 blk = (yBlk + (x >> addresser.GetBlockXBits()));
void* pImgBlock = VoidPtrInc(pImgBlockSliceStart, blk << addresser.GetBlockBits());
void* pPix = VoidPtrInc(pImgBlock, rowXor ^ addresser.GetAddressX(x));
if (ImgIsDest)
{
memcpy(pPix, VoidPtrInc(pBuf, x * PixBytes), PixBytes);
}
else
{
memcpy(VoidPtrInc(pBuf, x * PixBytes), pPix, PixBytes);
}
}
// Aligned middle
for (; x < PowTwoAlignDown(xEnd, ExpandX); x += ExpandX)
{
UINT_32 blk = (yBlk + (x >> addresser.GetBlockXBits()));
void* pImgBlock = VoidPtrInc(pImgBlockSliceStart, blk << addresser.GetBlockBits());
void* pPix = VoidPtrInc(pImgBlock, rowXor ^ addresser.GetAddressX(x));
if (ImgIsDest)
{
memcpy(pPix, VoidPtrInc(pBuf, x * PixBytes), PixBytes * ExpandX);
}
else
{
memcpy(VoidPtrInc(pBuf, x * PixBytes), pPix, PixBytes * ExpandX);
}
}
}
// Unaligned end (or the whole thing when ExpandX == 1)
for (; x < xEnd; x++)
{
// Get the index of the block within the slice
UINT_32 blk = (yBlk + (x >> addresser.GetBlockXBits()));
// Apply that index to get the base address of the current block.
void* pImgBlock = VoidPtrInc(pImgBlockSliceStart, blk << addresser.GetBlockBits());
// Grab the x-xor and XOR it all together, adding to get the final address
void* pPix = VoidPtrInc(pImgBlock, rowXor ^ addresser.GetAddressX(x));
if (ImgIsDest)
{
memcpy(pPix, VoidPtrInc(pBuf, x * PixBytes), PixBytes);
}
else
{
memcpy(VoidPtrInc(pBuf, x * PixBytes), pPix, PixBytes);
}
}
pBuf = VoidPtrInc(pBuf, bufStrideY);
}
}
/**
****************************************************************************************************
* LutAddresser::GetCopyMemImgFunc
*
* @brief
* Determines and returns which copy function to use for copying to images
****************************************************************************************************
*/
UnalignedCopyMemImgFunc LutAddresser::GetCopyMemImgFunc() const
{
// While these are all the same function, the codegen gets really bad if the size of each pixel
// is not known at compile time. Hence, templates.
const UnalignedCopyMemImgFunc Funcs[MaxElementBytesLog2][3] =
{
// ExpandX = 1, 2, 4
{ Copy2DSliceUnaligned<0, 1, true>, Copy2DSliceUnaligned<0, 2, true>, Copy2DSliceUnaligned<0, 4, true> }, // 1BPE
{ Copy2DSliceUnaligned<1, 1, true>, Copy2DSliceUnaligned<1, 2, true>, Copy2DSliceUnaligned<1, 4, true> }, // 2BPE
{ Copy2DSliceUnaligned<2, 1, true>, Copy2DSliceUnaligned<2, 2, true>, Copy2DSliceUnaligned<2, 4, true> }, // 4BPE
{ Copy2DSliceUnaligned<3, 1, true>, Copy2DSliceUnaligned<3, 2, true>, Copy2DSliceUnaligned<3, 4, true> }, // 8BPE
{ Copy2DSliceUnaligned<4, 1, true>, Copy2DSliceUnaligned<4, 2, true>, Copy2DSliceUnaligned<4, 4, true> }, // 16BPE
};
UnalignedCopyMemImgFunc pfnRet = nullptr;
ADDR_ASSERT(m_bpeLog2 < MaxElementBytesLog2);
if (m_maxExpandX >= 4)
{
pfnRet = Funcs[m_bpeLog2][2];
}
else if (m_maxExpandX >= 2)
{
pfnRet = Funcs[m_bpeLog2][1];
}
else
{
pfnRet = Funcs[m_bpeLog2][0];
}
return pfnRet;
}
/**
****************************************************************************************************
* LutAddresser::GetCopyImgMemFunc
*
* @brief
* Determines and returns which copy function to use for copying from images
****************************************************************************************************
*/
UnalignedCopyMemImgFunc LutAddresser::GetCopyImgMemFunc() const
{
// While these are all the same function, the codegen gets really bad if the size of each pixel
// is not known at compile time. Hence, templates.
const UnalignedCopyMemImgFunc Funcs[MaxElementBytesLog2][3] =
{
// ExpandX = 1, 2, 4
{ Copy2DSliceUnaligned<0, 1, false>, Copy2DSliceUnaligned<0, 2, false>, Copy2DSliceUnaligned<0, 4, false> }, // 1BPE
{ Copy2DSliceUnaligned<1, 1, false>, Copy2DSliceUnaligned<1, 2, false>, Copy2DSliceUnaligned<1, 4, false> }, // 2BPE
{ Copy2DSliceUnaligned<2, 1, false>, Copy2DSliceUnaligned<2, 2, false>, Copy2DSliceUnaligned<2, 4, false> }, // 4BPE
{ Copy2DSliceUnaligned<3, 1, false>, Copy2DSliceUnaligned<3, 2, false>, Copy2DSliceUnaligned<3, 4, false> }, // 8BPE
{ Copy2DSliceUnaligned<4, 1, false>, Copy2DSliceUnaligned<4, 2, false>, Copy2DSliceUnaligned<4, 4, false> }, // 16BPE
};
UnalignedCopyMemImgFunc pfnRet = nullptr;
ADDR_ASSERT(m_bpeLog2 < MaxElementBytesLog2);
if (m_maxExpandX >= 4)
{
pfnRet = Funcs[m_bpeLog2][2];
}
else if (m_maxExpandX >= 2)
{
pfnRet = Funcs[m_bpeLog2][1];
}
else
{
pfnRet = Funcs[m_bpeLog2][0];
}
return pfnRet;
}
}

View file

@ -0,0 +1,119 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
***********************************************************************************************************************/
/**
****************************************************************************************************
* @file addrswizzler.cpp
* @brief Contains code for efficient CPU swizzling.
****************************************************************************************************
*/
#ifndef __ADDR_SWIZZLER_H__
#define __ADDR_SWIZZLER_H__
#include "addrlib.h"
#include "addrcommon.h"
namespace Addr
{
// Forward decl
class LutAddresser;
typedef void (*UnalignedCopyMemImgFunc)(
void* pImgBlockSliceStart, // Block corresponding to beginning of slice
void* pBuf, // Pointer to data starting from the copy origin.
size_t bufStrideY, // Stride of each row in pBuf
UINT_32 imageBlocksY, // Width of the image slice, in blocks.
ADDR_COORD2D origin, // Absolute origin, in elements
ADDR_EXTENT2D extent, // Size to copy, in elements
UINT_32 sliceXor, // Includes pipeBankXor and z XOR
const LutAddresser& addresser);
// This class calculates and holds up to four lookup tables (x/y/z/s) which can be used to cheaply calculate the
// position of a pixel within a block at the cost of some precomputation and memory usage.
//
// This works for all equations and does something like this:
// offset = blockAddr ^ XLut[x & xMask] ^ YLut[Y & ymask]...
class LutAddresser
{
public:
constexpr static UINT_32 MaxLutSize = 2100; // Sized to fit the largest non-VAR LUT size
LutAddresser();
void Init(const ADDR_BIT_SETTING* pEq, UINT_32 eqSize, ADDR_EXTENT3D blockSize, UINT_8 blkBits);
// Does a full calculation to get the offset within a block. Takes an *absolute* coordinate,
// not the coordinate within the block.
UINT_32 GetBlockOffset(
UINT_32 x,
UINT_32 y,
UINT_32 z,
UINT_32 s = 0,
UINT_32 pipeBankXor = 0)
{
return GetAddressX(x) ^ GetAddressY(y) ^ GetAddressZ(z) ^ GetAddressS(s) ^ pipeBankXor;
}
// Get the block size
UINT_32 GetBlockBits() const { return m_blockBits; }
UINT_32 GetBlockXBits() const { return Log2(m_blockSize.width); }
UINT_32 GetBlockYBits() const { return Log2(m_blockSize.height); }
UINT_32 GetBlockZBits() const { return Log2(m_blockSize.depth); }
// "Fast single channel" functions to get the part that each channel contributes to be XORd together.
UINT_32 GetAddressX(UINT_32 x) const { return m_pXLut[x & m_xLutMask];}
UINT_32 GetAddressY(UINT_32 y) const { return m_pYLut[y & m_yLutMask];}
UINT_32 GetAddressZ(UINT_32 z) const { return m_pZLut[z & m_zLutMask];}
UINT_32 GetAddressS(UINT_32 s) const { return m_pSLut[s & m_sLutMask];}
// Get a function that can copy a single 2D slice of an image with this swizzle.
UnalignedCopyMemImgFunc GetCopyMemImgFunc() const;
UnalignedCopyMemImgFunc GetCopyImgMemFunc() const;
private:
// Calculate general properties of the swizzle equations
void InitSwizzleProps();
// Fills a LUT for each channel.
void InitLuts();
// Evaluate coordinate without LUTs
UINT_32 EvalEquation(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s);
// Pointers within m_lutData corresponding to where each LUT starts
// m_lutData[0] always has a value of 0 and thus can be considered an empty 1-entry LUT for "don't care" channels
UINT_32* m_pXLut;
UINT_32* m_pYLut;
UINT_32* m_pZLut;
UINT_32* m_pSLut;
// Size of each LUT, minus 1 to form a mask. A mask of 0 is valid for an empty LUT.
UINT_32 m_xLutMask;
UINT_32 m_yLutMask;
UINT_32 m_zLutMask;
UINT_32 m_sLutMask;
// Number of bits in the block (aka Log2(blkSize))
UINT_32 m_blockBits;
// The block size
ADDR_EXTENT3D m_blockSize;
// Number of 'x' bits at the bottom of the equation. Must be a pow2 and at least 1.
// This will be used as a simple optimization to batch together operations on adjacent x pixels.
UINT_32 m_maxExpandX;
// BPE for this equation.
UINT_32 m_bpeLog2;
// The full equation
ADDR_BIT_SETTING m_bit[ADDR_MAX_EQUATION_BIT];
// Backing store for the LUT tables.
UINT_32 m_lutData[MaxLutSize];
};
}
#endif // __ADDR_SWIZZLER_H__

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/
@ -3721,6 +3721,7 @@ const ADDR_SW_PATINFO GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO[] =
{ 3, 27, 344, 365, 124, } , // 64 pipes (32 PKRs) 16 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
};
const UINT_64 GFX10_SW_PATTERN_NIBBLE01[][8] =
{
{X0, X1, X2, X3, Y0, Y1, Y2, Y3, }, // 0

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/
@ -41,8 +41,7 @@ struct Gfx10ChipSettings
UINT_32 supportRbPlus : 1;
UINT_32 dsMipmapHtileFix : 1;
UINT_32 dccUnsup3DSwDis : 1;
UINT_32 : 4;
UINT_32 reserved2 : 24;
UINT_32 reserved2 : 28;
};
};
@ -142,7 +141,6 @@ const UINT_32 Gfx10Rsrc3dPrtSwModeMask = Gfx10Rsrc2dPrtSwModeMask & ~Gfx10Displa
const UINT_32 Gfx10Rsrc3dThin64KBSwModeMask = (1u << ADDR_SW_64KB_Z_X) |
(1u << ADDR_SW_64KB_R_X);
const UINT_32 Gfx10Rsrc3dThinSwModeMask = Gfx10Rsrc3dThin64KBSwModeMask |
Gfx10BlkVarSwModeMask;
@ -155,8 +153,7 @@ const UINT_32 Gfx10Rsrc3dThick4KBSwModeMask = Gfx10Rsrc3dThickSwModeMask & Gfx10
const UINT_32 Gfx10Rsrc3dThick64KBSwModeMask = Gfx10Rsrc3dThickSwModeMask & Gfx10Blk64KBSwModeMask;
const UINT_32 Gfx10MsaaSwModeMask = (Gfx10ZSwModeMask |
Gfx10RenderSwModeMask)
;
Gfx10RenderSwModeMask);
const UINT_32 Dcn20NonBpp64SwModeMask = (1u << ADDR_SW_LINEAR) |
(1u << ADDR_SW_4KB_S) |
@ -299,6 +296,10 @@ protected:
const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlGetPossibleSwizzleModes(
const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoSanityCheck(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
@ -313,6 +314,16 @@ protected:
virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled(
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlCopyMemToSurface(
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount) const;
virtual ADDR_E_RETURNCODE HwlCopySurfaceToMem(
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount) const;
virtual UINT_32 HwlComputeMaxBaseAlignments() const;
@ -342,14 +353,6 @@ private:
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
UINT_32 ComputeOffsetFromSwizzlePattern(
const UINT_64* pPattern,
UINT_32 numBits,
UINT_32 x,
UINT_32 y,
UINT_32 z,
UINT_32 s) const;
UINT_32 ComputeOffsetFromEquation(
const ADDR_EQUATION* pEq,
UINT_32 x,
@ -393,7 +396,7 @@ private:
*/
VOID GetSwizzlePatternFromPatternInfo(
const ADDR_SW_PATINFO* pPatInfo,
ADDR_BIT_SETTING (&pSwizzle)[20]) const
ADDR_BIT_SETTING (&pSwizzle)[ADDR_MAX_EQUATION_BIT]) const
{
memcpy(pSwizzle,
GFX10_SW_PATTERN_NIBBLE01[pPatInfo->nibble01Idx],

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/
@ -15,6 +15,7 @@
#include "gfx11addrlib.h"
#include "gfx11_gb_reg.h"
#include "addrswizzler.h"
#include "amdgpu_asic_addr.h"
@ -1874,7 +1875,7 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSlicePipeBankXor(
if (pPatInfo != NULL)
{
ADDR_BIT_SETTING fullSwizzlePattern[20];
ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT];
GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
const UINT_32 pipeBankXorOffset =
@ -2751,7 +2752,7 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting(
}
// Select the biggest allowed block type
minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
minSizeBlk = Log2(allowedBlockSet.value) + 1;
if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
{
@ -2897,7 +2898,7 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting(
// Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
// swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
// available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2(allowedSwModeSet.value));
}
}
else
@ -3690,6 +3691,245 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceAddrFromCoordTiled(
return ret;
}
/**
************************************************************************************************************************
* Gfx11Lib::HwlCopyMemToSurface
*
* @brief
* Copy multiple regions from memory to a non-linear surface.
*
* @return
* Error or success.
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::HwlCopyMemToSurface(
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount
) const
{
// Copy memory to tiled surface. We will use the 'swizzler' object to dispatch to a version of the copy routine
// optimized for a particular micro-swizzle mode if available.
ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {{0}};
ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
ADDR_E_RETURNCODE returnCode = ADDR_OK;
if (pIn->numSamples > 1)
{
// TODO: MSAA
returnCode = ADDR_NOTIMPLEMENTED;
}
if (IsBlockVariable(pIn->swizzleMode))
{
// TODO: larger LUTs for worst-case 256KB swizzle.
returnCode = ADDR_NOTIMPLEMENTED;
}
localIn.size = sizeof(localIn);
localIn.flags = pIn->flags;
localIn.swizzleMode = pIn->swizzleMode;
localIn.resourceType = pIn->resourceType;
localIn.format = pIn->format;
localIn.bpp = pIn->bpp;
localIn.width = Max(pIn->unAlignedDims.width, 1u);
localIn.height = Max(pIn->unAlignedDims.height, 1u);
localIn.numSlices = Max(pIn->unAlignedDims.depth, 1u);
localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
localIn.numSamples = Max(pIn->numSamples, 1u);
localOut.size = sizeof(localOut);
localOut.pMipInfo = mipInfo;
if (returnCode == ADDR_OK)
{
returnCode = ComputeSurfaceInfo(&localIn, &localOut);
}
const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
pIn->resourceType,
Log2(pIn->bpp >> 3),
pIn->numSamples);
ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT] = {};
GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
ADDR_EXTENT3D blockExtent = {
localOut.blockWidth,
localOut.blockHeight,
localOut.blockSlices
};
LutAddresser addresser = LutAddresser();
addresser.Init(fullSwizzlePattern, ADDR_MAX_EQUATION_BIT, blockExtent, blkSizeLog2);
UnalignedCopyMemImgFunc pfnCopyUnaligned = addresser.GetCopyMemImgFunc();
if (pfnCopyUnaligned == nullptr)
{
ADDR_ASSERT_ALWAYS();
returnCode = ADDR_INVALIDPARAMS;
}
if (returnCode == ADDR_OK)
{
for (UINT_32 regionIdx = 0; regionIdx < regionCount; regionIdx++)
{
const ADDR2_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
const ADDR2_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
UINT_32 yBlks = pMipInfo->pitch / localOut.blockWidth;
UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
{
// The copy functions take the base address of the hardware slice, not the logical slice. Those are
// not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
// for unaligned copies.
UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockSlices);
UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
ADDR_COORD2D sliceOrigin = { xStart, yStart };
ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
VoidPtrInc(pCurRegion->pMem, memOffset),
pCurRegion->memRowPitch,
yBlks,
sliceOrigin,
sliceExtent,
sliceXor,
addresser);
}
}
}
return returnCode;
}
/**
************************************************************************************************************************
* Gfx11Lib::HwlCopySurfaceToMem
*
* @brief
* Copy multiple regions from a non-linear surface to memory.
*
* @return
* Error or success.
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::HwlCopySurfaceToMem(
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount
) const
{
// Copy memory to tiled surface. We will use the 'swizzler' object to dispatch to a version of the copy routine
// optimized for a particular micro-swizzle mode if available.
ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {{0}};
ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
ADDR_E_RETURNCODE returnCode = ADDR_OK;
if (pIn->numSamples > 1)
{
// TODO: MSAA
returnCode = ADDR_NOTIMPLEMENTED;
}
if (IsBlockVariable(pIn->swizzleMode))
{
// TODO: larger LUTs for worst-case 256KB swizzle.
returnCode = ADDR_NOTIMPLEMENTED;
}
localIn.size = sizeof(localIn);
localIn.flags = pIn->flags;
localIn.swizzleMode = pIn->swizzleMode;
localIn.resourceType = pIn->resourceType;
localIn.format = pIn->format;
localIn.bpp = pIn->bpp;
localIn.width = Max(pIn->unAlignedDims.width, 1u);
localIn.height = Max(pIn->unAlignedDims.height, 1u);
localIn.numSlices = Max(pIn->unAlignedDims.depth, 1u);
localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
localIn.numSamples = Max(pIn->numSamples, 1u);
localOut.size = sizeof(localOut);
localOut.pMipInfo = mipInfo;
if (returnCode == ADDR_OK)
{
returnCode = ComputeSurfaceInfo(&localIn, &localOut);
}
const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
pIn->resourceType,
Log2(pIn->bpp >> 3),
pIn->numSamples);
ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT] = {};
GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
ADDR_EXTENT3D blockExtent = {
localOut.blockWidth,
localOut.blockHeight,
localOut.blockSlices
};
LutAddresser addresser = LutAddresser();
addresser.Init(fullSwizzlePattern, ADDR_MAX_EQUATION_BIT, blockExtent, blkSizeLog2);
UnalignedCopyMemImgFunc pfnCopyUnaligned = addresser.GetCopyImgMemFunc();
if (pfnCopyUnaligned == nullptr)
{
ADDR_ASSERT_ALWAYS();
returnCode = ADDR_INVALIDPARAMS;
}
if (returnCode == ADDR_OK)
{
for (UINT_32 regionIdx = 0; regionIdx < regionCount; regionIdx++)
{
const ADDR2_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
const ADDR2_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
UINT_32 yBlks = pMipInfo->pitch / localOut.blockWidth;
UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
{
// The copy functions take the base address of the hardware slice, not the logical slice. Those are
// not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
// for unaligned copies.
UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockSlices);
UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
ADDR_COORD2D sliceOrigin = { xStart, yStart };
ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
VoidPtrInc(pCurRegion->pMem, memOffset),
pCurRegion->memRowPitch,
yBlks,
sliceOrigin,
sliceExtent,
sliceXor,
addresser);
}
}
}
return returnCode;
}
/**
************************************************************************************************************************
* Gfx11Lib::ComputeOffsetFromEquation
@ -3740,107 +3980,6 @@ UINT_32 Gfx11Lib::ComputeOffsetFromEquation(
return offset;
}
/**
************************************************************************************************************************
* Gfx11Lib::ComputeOffsetFromSwizzlePattern
*
* @brief
* Compute offset from swizzle pattern
*
* @return
* Offset
************************************************************************************************************************
*/
UINT_32 Gfx11Lib::ComputeOffsetFromSwizzlePattern(
const UINT_64* pPattern, ///< Swizzle pattern
UINT_32 numBits, ///< Number of bits in pattern
UINT_32 x, ///< x coord in pixel
UINT_32 y, ///< y coord in pixel
UINT_32 z, ///< z coord in slice
UINT_32 s ///< sample id
) const
{
UINT_32 offset = 0;
const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
for (UINT_32 i = 0; i < numBits; i++)
{
UINT_32 v = 0;
if (pSwizzlePattern[i].x != 0)
{
UINT_16 mask = pSwizzlePattern[i].x;
UINT_32 xBits = x;
while (mask != 0)
{
if (mask & 1)
{
v ^= xBits & 1;
}
xBits >>= 1;
mask >>= 1;
}
}
if (pSwizzlePattern[i].y != 0)
{
UINT_16 mask = pSwizzlePattern[i].y;
UINT_32 yBits = y;
while (mask != 0)
{
if (mask & 1)
{
v ^= yBits & 1;
}
yBits >>= 1;
mask >>= 1;
}
}
if (pSwizzlePattern[i].z != 0)
{
UINT_16 mask = pSwizzlePattern[i].z;
UINT_32 zBits = z;
while (mask != 0)
{
if (mask & 1)
{
v ^= zBits & 1;
}
zBits >>= 1;
mask >>= 1;
}
}
if (pSwizzlePattern[i].s != 0)
{
UINT_16 mask = pSwizzlePattern[i].s;
UINT_32 sBits = s;
while (mask != 0)
{
if (mask & 1)
{
v ^= sBits & 1;
}
sBits >>= 1;
mask >>= 1;
}
}
offset |= (v << i);
}
return offset;
}
/**
************************************************************************************************************************
* Gfx11Lib::GetSwizzlePatternInfo
@ -4200,7 +4339,7 @@ ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceAddrFromCoordMacroTiled(
const UINT_32 xb = pIn->x / localOut.blockWidth;
const UINT_64 blkIdx = yb * pb + xb;
ADDR_BIT_SETTING fullSwizzlePattern[20];
ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT];
GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
const UINT_32 blkOffset =

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/
@ -300,6 +300,16 @@ protected:
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlCopyMemToSurface(
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount) const;
virtual ADDR_E_RETURNCODE HwlCopySurfaceToMem(
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount) const;
virtual UINT_32 HwlComputeMaxBaseAlignments() const;
virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;
@ -328,14 +338,6 @@ private:
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
UINT_32 ComputeOffsetFromSwizzlePattern(
const UINT_64* pPattern,
UINT_32 numBits,
UINT_32 x,
UINT_32 y,
UINT_32 z,
UINT_32 s) const;
UINT_32 ComputeOffsetFromEquation(
const ADDR_EQUATION* pEq,
UINT_32 x,
@ -373,7 +375,7 @@ private:
VOID GetSwizzlePatternFromPatternInfo(
const ADDR_SW_PATINFO* pPatInfo,
ADDR_BIT_SETTING (&pSwizzle)[20]) const
ADDR_BIT_SETTING (&pSwizzle)[ADDR_MAX_EQUATION_BIT]) const
{
memcpy(pSwizzle,
GFX11_SW_PATTERN_NIBBLE01[pPatInfo->nibble01Idx],

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/
@ -15,6 +15,7 @@
#include "gfx12addrlib.h"
#include "gfx12_gb_reg.h"
#include "addrswizzler.h"
#include "amdgpu_asic_addr.h"
@ -187,7 +188,7 @@ VOID Gfx12Lib::InitEquationTable()
// Skip linear equation (data table is not useful for 2D/3D images-- only contains x-coordinate bits)
if (IsValidSwMode(swMode) && (IsLinear(swMode) == false))
{
const UINT_32 maxMsaa = Is2dSwizzle(swMode) ? MaxMsaaRateLog2 : 1;
const UINT_32 maxMsaa = Is2dSwizzle(swMode) ? MaxNumMsaaRates : 1;
for (UINT_32 msaaIdx = 0; msaaIdx < maxMsaa; msaaIdx++)
{
@ -266,7 +267,7 @@ VOID Gfx12Lib::InitBlockDimensionTable()
if (IsValidSwMode(swMode))
{
surfaceInfo.swizzleMode = swMode;
const UINT_32 maxMsaa = Is2dSwizzle(swMode) ? MaxMsaaRateLog2 : 1;
const UINT_32 maxMsaa = Is2dSwizzle(swMode) ? MaxNumMsaaRates : 1;
for (UINT_32 msaaIdx = 0; msaaIdx < maxMsaa; msaaIdx++)
{
@ -621,10 +622,9 @@ ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSurfaceInfo(
// Slices must be exact multiples of the block sizes. However:
// - with 3D images, one block will contain multiple slices, so that needs to be taken into account.
//
// Note that with linear images that have only one slice, we can always guarantee pOut->sliceSize is 256B
// alignment so there is no need to worry about it.
ADDR_ASSERT(((pOut->sliceSize * pOut->blockExtent.depth) % GetBlockSize(pSurfInfo->swizzleMode)) == 0);
// - with linear images that have only one slice, we may trim and use the pitch alignment for size.
ADDR_ASSERT(((pOut->sliceSize * pOut->blockExtent.depth) %
GetBlockSize(pSurfInfo->swizzleMode, CanTrimLinearPadding(pSurfInfo))) == 0);
}
return returnCode;
@ -880,6 +880,231 @@ ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSurfaceAddrFromCoordTiled(
return ret;
}
/**
************************************************************************************************************************
* Gfx12Lib::HwlCopyMemToSurface
*
* @brief
* Copy multiple regions from memory to a non-linear surface.
*
* @return
* Error or success.
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx12Lib::HwlCopyMemToSurface(
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount
) const
{
// Copy memory to tiled surface. We will use the 'swizzler' object to dispatch to a version of the copy routine
// optimized for a particular micro-swizzle mode if available.
ADDR3_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
ADDR3_MIP_INFO mipInfo[MaxMipLevels] = {{0}};
ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
ADDR_E_RETURNCODE returnCode = ADDR_OK;
if (pIn->numSamples > 1)
{
// TODO: MSAA
returnCode = ADDR_NOTIMPLEMENTED;
}
localIn.size = sizeof(localIn);
localIn.flags = pIn->flags;
localIn.swizzleMode = pIn->swizzleMode;
localIn.resourceType = pIn->resourceType;
localIn.format = pIn->format;
localIn.bpp = pIn->bpp;
localIn.width = Max(pIn->unAlignedDims.width, 1u);
localIn.height = Max(pIn->unAlignedDims.height, 1u);
localIn.numSlices = Max(pIn->unAlignedDims.depth, 1u);
localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
localIn.numSamples = Max(pIn->numSamples, 1u);
localOut.size = sizeof(localOut);
localOut.pMipInfo = mipInfo;
if (returnCode == ADDR_OK)
{
returnCode = ComputeSurfaceInfo(&localIn, &localOut);
}
LutAddresser addresser = LutAddresser();
UnalignedCopyMemImgFunc pfnCopyUnaligned = nullptr;
if (returnCode == ADDR_OK)
{
const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
Log2(pIn->bpp >> 3),
pIn->numSamples);
ADDR_BIT_SETTING fullSwizzlePattern[Log2Size256K] = {};
GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
addresser.Init(fullSwizzlePattern, Log2Size256K, localOut.blockExtent, blkSizeLog2);
pfnCopyUnaligned = addresser.GetCopyMemImgFunc();
if (pfnCopyUnaligned == nullptr)
{
ADDR_ASSERT_ALWAYS(); // What format is this?
returnCode = ADDR_INVALIDPARAMS;
}
}
if (returnCode == ADDR_OK)
{
for (UINT_32 regionIdx = 0; regionIdx < regionCount; regionIdx++)
{
const ADDR3_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
const ADDR3_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
UINT_32 yBlks = pMipInfo->pitch / localOut.blockExtent.width;
UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
{
// The copy functions take the base address of the hardware slice, not the logical slice. Those are
// not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
// for unaligned copies.
UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockExtent.depth);
UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
ADDR_COORD2D sliceOrigin = { xStart, yStart };
ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
VoidPtrInc(pCurRegion->pMem, memOffset),
pCurRegion->memRowPitch,
yBlks,
sliceOrigin,
sliceExtent,
sliceXor,
addresser);
}
}
}
return returnCode;
}
/**
************************************************************************************************************************
* Gfx12Lib::HwlCopySurfaceToMem
*
* @brief
* Copy multiple regions from a non-linear surface to memory.
*
* @return
* Error or success.
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx12Lib::HwlCopySurfaceToMem(
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount
) const
{
// Copy memory to tiled surface. We will use the 'swizzler' object to dispatch to a version of the copy routine
// optimized for a particular micro-swizzle mode if available.
ADDR3_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
ADDR3_MIP_INFO mipInfo[MaxMipLevels] = {{0}};
ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
ADDR_E_RETURNCODE returnCode = ADDR_OK;
if (pIn->numSamples > 1)
{
// TODO: MSAA
returnCode = ADDR_NOTIMPLEMENTED;
}
localIn.size = sizeof(localIn);
localIn.flags = pIn->flags;
localIn.swizzleMode = pIn->swizzleMode;
localIn.resourceType = pIn->resourceType;
localIn.format = pIn->format;
localIn.bpp = pIn->bpp;
localIn.width = Max(pIn->unAlignedDims.width, 1u);
localIn.height = Max(pIn->unAlignedDims.height, 1u);
localIn.numSlices = Max(pIn->unAlignedDims.depth, 1u);
localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
localIn.numSamples = Max(pIn->numSamples, 1u);
localOut.size = sizeof(localOut);
localOut.pMipInfo = mipInfo;
if (returnCode == ADDR_OK)
{
returnCode = ComputeSurfaceInfo(&localIn, &localOut);
}
LutAddresser addresser = LutAddresser();
UnalignedCopyMemImgFunc pfnCopyUnaligned = nullptr;
if (returnCode == ADDR_OK)
{
const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
Log2(pIn->bpp >> 3),
pIn->numSamples);
ADDR_BIT_SETTING fullSwizzlePattern[Log2Size256K] = {};
GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
addresser.Init(fullSwizzlePattern, Log2Size256K, localOut.blockExtent, blkSizeLog2);
pfnCopyUnaligned = addresser.GetCopyImgMemFunc();
if (pfnCopyUnaligned == nullptr)
{
ADDR_ASSERT_ALWAYS(); // What format is this?
returnCode = ADDR_INVALIDPARAMS;
}
}
if (returnCode == ADDR_OK)
{
for (UINT_32 regionIdx = 0; regionIdx < regionCount; regionIdx++)
{
const ADDR3_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
const ADDR3_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
UINT_32 yBlks = pMipInfo->pitch / localOut.blockExtent.width;
UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
{
// The copy functions take the base address of the hardware slice, not the logical slice. Those are
// not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
// for unaligned copies.
UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockExtent.depth);
UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
ADDR_COORD2D sliceOrigin = { xStart, yStart };
ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
VoidPtrInc(pCurRegion->pMem, memOffset),
pCurRegion->memRowPitch,
yBlks,
sliceOrigin,
sliceExtent,
sliceXor,
addresser);
}
}
}
return returnCode;
}
/**
************************************************************************************************************************
* Gfx12Lib::HwlComputePipeBankXor
@ -1768,43 +1993,36 @@ ADDR_E_RETURNCODE Gfx12Lib::HwlGetPossibleSwizzleModes(
pOut->validModes.sw2d64kB = 1;
pOut->validModes.sw2d256kB = 1;
}
// Block-compressed images need to be either using 2D or linear swizzle modes.
else if (flags.blockCompressed)
// Some APIs (like Vulkan) require that PRT should always use 64KB blocks
else if (flags.standardPrt)
{
pOut->validModes.swLinear = 1;
// We find cases where Tex3d BlockCompressed image adopts 2D_256B should be prohibited.
if (IsTex3d(pIn->resourceType) == FALSE)
if (IsTex3d(pIn->resourceType) && (flags.view3dAs2dArray == 0))
{
pOut->validModes.sw2d256B = 1;
pOut->validModes.sw3d64kB = 1;
}
else
{
pOut->validModes.sw2d64kB = 1;
}
pOut->validModes.sw2d4kB = 1;
pOut->validModes.sw2d64kB = 1;
pOut->validModes.sw2d256kB = 1;
}
else if (IsTex1d(pIn->resourceType))
else if (// Block-compressed images need to be either using 2D or linear swizzle modes.
flags.blockCompressed ||
// Only 3D w/ view3dAs2dArray == 0 will use 1D/2D block swizzle modes
(IsTex3d(pIn->resourceType) == FALSE) || flags.view3dAs2dArray ||
// NV12 and P010 support
// SW_LINEAR, SW_256B_2D, SW_4KB_2D, SW_64KB_2D, SW_256KB_2D
// There could be more multimedia formats that require more hw specific tiling modes...
flags.nv12 || flags.p010)
{
pOut->validModes.swLinear = 1;
pOut->validModes.sw2d256B = 1;
pOut->validModes.sw2d4kB = 1;
pOut->validModes.sw2d64kB = 1;
pOut->validModes.sw2d256kB = 1;
}
else if (flags.nv12 || flags.p010 || IsTex2d(pIn->resourceType) || flags.view3dAs2dArray)
{
// NV12 and P010 support
// SW_LINEAR, SW_256B_2D, SW_4KB_2D, SW_64KB_2D, SW_256KB_2D
// There could be more multimedia formats that require more hw specific tiling modes...
// The exception is VRS images.
// Linear is not allowed for VRS images.
if (flags.isVrsImage == 0)
{
pOut->validModes.swLinear = 1;
}
if (flags.view3dAs2dArray == 0)
// 3D resources can't use SW_256B_2D
if (IsTex3d(pIn->resourceType) == FALSE)
{
// ADDR3_256B_2D can't support 3D images.
pOut->validModes.sw2d256B = 1;
}
pOut->validModes.sw2d4kB = 1;

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/
@ -162,6 +162,16 @@ private:
const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
const ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlCopyMemToSurface(
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount) const override;
virtual ADDR_E_RETURNCODE HwlCopySurfaceToMem(
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
UINT_32 regionCount) const override;
UINT_32 m_numSwizzleBits;
// Initialize equation table

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/
@ -3834,7 +3834,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
}
// Select the biggest allowed block type
minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
minSizeBlk = Log2(allowedBlockSet.value) + 1;
if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
{
@ -3960,7 +3960,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
// Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle
// type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
// available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2(allowedSwModeSet.value));
}
returnCode = ADDR_OK;

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

View file

@ -1,7 +1,7 @@
/*
************************************************************************************************************************
*
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
* SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/
@ -68,11 +68,10 @@ struct SiChipSettings
UINT_32 isPolaris10 : 1;
UINT_32 isPolaris11 : 1;
UINT_32 isPolaris12 : 1;
// VI fusion
UINT_32 isVegaM : 1;
UINT_32 isCarrizo : 1;
UINT_32 : 2;
UINT_32 : 1;
};
/**

View file

@ -3205,7 +3205,6 @@ static bool gfx12_compute_surface(struct ac_addrlib *addrlib, const struct radeo
struct radeon_surf *surf)
{
bool compressed = surf->blk_w == 4 && surf->blk_h == 4;
bool is_color_surface = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
bool stencil_only = (surf->flags & RADEON_SURF_SBUFFER) && !(surf->flags & RADEON_SURF_ZBUFFER);
ADDR3_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
@ -3220,13 +3219,11 @@ static bool gfx12_compute_surface(struct ac_addrlib *addrlib, const struct radeo
AddrSurfInfoIn.bpp = surf->bpe * 8;
}
AddrSurfInfoIn.flags.color = is_color_surface && !(surf->flags & RADEON_SURF_NO_RENDER_TARGET);
AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
AddrSurfInfoIn.flags.depth = !!(surf->flags & RADEON_SURF_ZBUFFER);
AddrSurfInfoIn.flags.stencil = stencil_only;
AddrSurfInfoIn.flags.texture = !(surf->flags & RADEON_SURF_NO_TEXTURE);
AddrSurfInfoIn.flags.unordered = !(surf->flags & RADEON_SURF_NO_TEXTURE);
AddrSurfInfoIn.flags.blockCompressed = compressed;
AddrSurfInfoIn.flags.isVrsImage = !!(surf->flags & RADEON_SURF_VRS_RATE);
AddrSurfInfoIn.flags.standardPrt = !!(surf->flags & RADEON_SURF_PRT);
if (config->is_3d)
AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_3D;
@ -3254,11 +3251,6 @@ static bool gfx12_compute_surface(struct ac_addrlib *addrlib, const struct radeo
AddrSurfInfoIn.swizzleMode = ac_get_modifier_swizzle_mode(info->gfx_level, surf->modifier);
} else if (surf->flags & RADEON_SURF_IMPORTED) {
AddrSurfInfoIn.swizzleMode = surf->u.gfx9.swizzle_mode;
} else if (surf->flags & RADEON_SURF_PRT) {
if (config->is_3d)
AddrSurfInfoIn.swizzleMode = ADDR3_64KB_3D;
else
AddrSurfInfoIn.swizzleMode = ADDR3_64KB_2D;
} else if (mode == RADEON_SURF_MODE_LINEAR_ALIGNED) {
assert(config->info.samples <= 1 && !(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
AddrSurfInfoIn.swizzleMode = ADDR3_LINEAR;

View file

@ -221,8 +221,6 @@ static void gfx12_generate_hash(struct ac_addrlib *ac_addrlib,
ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT input = {0};
input.size = sizeof(input);
input.swizzleMode = surf->u.gfx9.swizzle_mode;
input.flags.color = 1;
input.flags.texture = 1;
input.resourceType = ADDR_RSRC_TEX_2D;
input.bpp = util_format_get_blocksizebits(entry->format);
input.unAlignedDims.width = entry->w;