mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 15:38:09 +02:00
amd: update addrlib
Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32687>
This commit is contained in:
parent
33a73203b0
commit
c0e5e8f932
37 changed files with 4957 additions and 2578 deletions
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -23,8 +23,8 @@ extern "C"
|
|||
{
|
||||
#endif
|
||||
|
||||
#define ADDRLIB_VERSION_MAJOR 9
|
||||
#define ADDRLIB_VERSION_MINOR 11
|
||||
#define ADDRLIB_VERSION_MAJOR 10
|
||||
#define ADDRLIB_VERSION_MINOR 1
|
||||
#define ADDRLIB_MAKE_VERSION(major, minor) ((major << 16) | minor)
|
||||
#define ADDRLIB_VERSION ADDRLIB_MAKE_VERSION(ADDRLIB_VERSION_MAJOR, ADDRLIB_VERSION_MINOR)
|
||||
|
||||
|
|
@ -34,6 +34,25 @@ typedef VOID* ADDR_HANDLE;
|
|||
/// Client handle used in callbacks
|
||||
typedef VOID* ADDR_CLIENT_HANDLE;
|
||||
|
||||
typedef struct _ADDR_COORD2D
|
||||
{
|
||||
UINT_32 x;
|
||||
UINT_32 y;
|
||||
} ADDR_COORD2D;
|
||||
|
||||
typedef struct _ADDR_COORD3D
|
||||
{
|
||||
UINT_32 x;
|
||||
UINT_32 y;
|
||||
UINT_32 z; // also slices for 2D images
|
||||
} ADDR_COORD3D;
|
||||
|
||||
typedef struct _ADDR_EXTENT2D
|
||||
{
|
||||
UINT_32 width;
|
||||
UINT_32 height;
|
||||
} ADDR_EXTENT2D;
|
||||
|
||||
typedef struct _ADDR_EXTENT3D
|
||||
{
|
||||
UINT_32 width;
|
||||
|
|
@ -1525,6 +1544,16 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskCoordFromAddr(
|
|||
*/
|
||||
UINT_32 ADDR_API AddrGetVersion(ADDR_HANDLE hLib);
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* AddrGetInterfaceVersion
|
||||
*
|
||||
* @brief
|
||||
* Get AddrLib interface version number (eg. Addr2 = 2)
|
||||
****************************************************************************************************
|
||||
*/
|
||||
UINT_32 ADDR_API AddrGetInterfaceVersion(ADDR_HANDLE hLib);
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* AddrUseTileIndex
|
||||
|
|
@ -2637,6 +2666,89 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceAddrFromCoord(
|
|||
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
|
||||
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut);
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* ADDR2_COPY_MEMSURFACE_REGION
|
||||
*
|
||||
* @brief
|
||||
* Input structure for Addr2CopyMemToSurface and Addr2CopySurfaceToMem
|
||||
****************************************************************************************************
|
||||
*/
|
||||
typedef struct _ADDR2_COPY_MEMSURFACE_REGION
|
||||
{
|
||||
UINT_32 size; ///< Size of this structure in bytes
|
||||
|
||||
UINT_32 x; ///< Starting X coordinate, in elements
|
||||
UINT_32 y; ///< Starting Y coordinate, in elements
|
||||
UINT_32 slice; ///< Starting slice index or Z coordinate, in elements
|
||||
UINT_32 mipId; ///< The mip ID in mip chain
|
||||
ADDR_EXTENT3D copyDims; ///< Size of the region to copy, in elements
|
||||
|
||||
void* pMem; ///< Pointer to memory to copy
|
||||
UINT_64 memRowPitch; ///< Pitch between rows in bytes
|
||||
UINT_64 memSlicePitch; ///< Pitch between array/depth slices in bytes
|
||||
} ADDR2_COPY_MEMSURFACE_REGION;
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* ADDR2_COPY_MEMSURFACE_INPUT
|
||||
*
|
||||
* @brief
|
||||
* Input structure for Addr2CopyMemToSurface and Addr2CopySurfaceToMem
|
||||
****************************************************************************************************
|
||||
*/
|
||||
typedef struct _ADDR2_COPY_MEMSURFACE_INPUT
|
||||
{
|
||||
UINT_32 size; ///< Size of this structure in bytes
|
||||
|
||||
AddrSwizzleMode swizzleMode; ///< Swizzle mode
|
||||
AddrFormat format; ///< Format
|
||||
ADDR2_SURFACE_FLAGS flags; ///< Surface flags
|
||||
AddrResourceType resourceType; ///< Surface type
|
||||
UINT_32 bpp; ///< Bits per pixel
|
||||
ADDR_EXTENT3D unAlignedDims; ///< Surface original dimensions (of mip0), in pixels
|
||||
UINT_32 numMipLevels; ///< Total mipmap levels
|
||||
UINT_32 numSamples; ///< Number of samples
|
||||
UINT_32 pitchInElement; ///< Pitch in elements (blocks for compressed formats)
|
||||
UINT_32 pbXor; ///< Xor value
|
||||
|
||||
void* pMappedSurface; ///< Pointer to the image surface, mapped to CPU memory
|
||||
BOOL_32 singleSubres; ///< Pointer is to the base of the subresource, not to the
|
||||
/// base of the surface image data. Requires:
|
||||
/// - copyDims.depth == 1
|
||||
/// - all copy regions target the same mip
|
||||
/// - all copy regions target the same slice/depth
|
||||
} ADDR2_COPY_MEMSURFACE_INPUT;
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* Addr2CopyMemToSurface
|
||||
*
|
||||
* @brief
|
||||
* Copy an image region from memory to an uncompressed CPU-mapped surface
|
||||
****************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE ADDR_API Addr2CopyMemToSurface(
|
||||
ADDR_HANDLE hLib,
|
||||
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount
|
||||
);
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* Addr2CopySurfaceToMem
|
||||
*
|
||||
* @brief
|
||||
* Copy an image region from an uncompressed CPU-mapped surface to memory
|
||||
****************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE ADDR_API Addr2CopySurfaceToMem(
|
||||
ADDR_HANDLE hLib,
|
||||
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount
|
||||
);
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -3724,7 +3836,7 @@ typedef struct _ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT
|
|||
AddrResourceType resourceType; ///< Surface type
|
||||
AddrFormat format; ///< Surface format
|
||||
UINT_32 width; ///< Width of mip0 in texels (not in compressed block)
|
||||
UINT_32 height; ///< Height of mip0 in texels (not in compressed block)
|
||||
UINT_32 height; ///< Height of mip0 in texels (not in compressed block)
|
||||
UINT_32 numSlices; ///< Number surface slice/depth of mip0
|
||||
UINT_32 numMipLevels; ///< Total mipmap levels.
|
||||
UINT_32 pipeBankXor; ///< Combined swizzle used to do bank/pipe rotation
|
||||
|
|
@ -3977,6 +4089,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2GetPossibleSwizzleModes(
|
|||
*
|
||||
* @brief
|
||||
* Return whether the swizzle mode is supported by display engine
|
||||
pResult: whether it is displayAble or not for the given displaySwizzleMode
|
||||
****************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE ADDR_API Addr2IsValidDisplaySwizzleMode(
|
||||
|
|
@ -4056,23 +4169,22 @@ typedef union _ADDR3_SURFACE_FLAGS
|
|||
{
|
||||
struct
|
||||
{
|
||||
UINT_32 color : 1; ///< This resource is a color buffer, can be used with RTV
|
||||
UINT_32 depth : 1; ///< This resource is a depth buffer, can be used with DSV
|
||||
UINT_32 stencil : 1; ///< This resource is a stencil buffer, can be used with DSV
|
||||
UINT_32 texture : 1; ///< This resource can be used with SRV
|
||||
UINT_32 unordered : 1; ///< This resource can be used with UAV
|
||||
UINT_32 hiZHiS : 1;
|
||||
UINT_32 blockCompressed : 1;
|
||||
UINT_32 nv12 : 1;
|
||||
UINT_32 p010 : 1;
|
||||
UINT_32 view3dAs2dArray : 1;
|
||||
UINT_32 isVrsImage : 1; ///< This resource is a VRS source image
|
||||
UINT_32 standardPrt : 1; ///< This resource is a PRT resource with the specific block
|
||||
/// dimensions that some APIs want
|
||||
UINT_32 reserved1 : 2;
|
||||
UINT_32 denseSliceExact : 1; ///< Pad dimensions such that
|
||||
/// Pow2Align(pitch*height, surfAlign)==pitch*height
|
||||
UINT_32 qbStereo : 1; ///< Quad buffer stereo surface
|
||||
UINT_32 display : 1; ///< This resource is displayable, can be used with DRV
|
||||
UINT_32 reserved : 16; ///< Reserved bits
|
||||
UINT_32 reserved : 18; ///< Reserved bits
|
||||
};
|
||||
|
||||
UINT_32 value;
|
||||
|
|
@ -4323,6 +4435,91 @@ ADDR_E_RETURNCODE ADDR_API Addr3ComputeSurfaceAddrFromCoord(
|
|||
const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
|
||||
ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut);
|
||||
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* ADDR3_COPY_MEMSURFACE_REGION
|
||||
*
|
||||
* @brief
|
||||
* Input structure for Addr3CopyMemToSurface and Addr3CopySurfaceToMem
|
||||
****************************************************************************************************
|
||||
*/
|
||||
typedef struct _ADDR3_COPY_MEMSURFACE_REGION
|
||||
{
|
||||
UINT_32 size; ///< Size of this structure in bytes
|
||||
|
||||
UINT_32 x; ///< Starting X coordinate, in elements
|
||||
UINT_32 y; ///< Starting Y coordinate, in elements
|
||||
UINT_32 slice; ///< Starting slice index or Z coordinate, in elements
|
||||
UINT_32 mipId; ///< The mip ID in mip chain
|
||||
ADDR_EXTENT3D copyDims; ///< Size of the region to copy, in elements
|
||||
|
||||
void* pMem; ///< Pointer to memory to copy
|
||||
UINT_64 memRowPitch; ///< Pitch between rows in bytes
|
||||
UINT_64 memSlicePitch; ///< Pitch between array/depth slices in bytes
|
||||
} ADDR3_COPY_MEMSURFACE_REGION;
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* ADDR3_COPY_MEMSURFACE_INPUT
|
||||
*
|
||||
* @brief
|
||||
* Input structure for Addr3CopyMemToSurface and Addr3CopySurfaceToMem
|
||||
****************************************************************************************************
|
||||
*/
|
||||
typedef struct _ADDR3_COPY_MEMSURFACE_INPUT
|
||||
{
|
||||
UINT_32 size; ///< Size of this structure in bytes
|
||||
|
||||
Addr3SwizzleMode swizzleMode; ///< Swizzle mode for Gfx12
|
||||
ADDR3_SURFACE_FLAGS flags; ///< Surface flags
|
||||
AddrFormat format; ///< Format
|
||||
AddrResourceType resourceType; ///< Surface type
|
||||
UINT_32 bpp; ///< Bits per pixel
|
||||
ADDR_EXTENT3D unAlignedDims; ///< Surface original dimensions (of mip0), in pixels
|
||||
UINT_32 numMipLevels; ///< Total mipmap levels
|
||||
UINT_32 numSamples; ///< Number of samples
|
||||
UINT_32 pitchInElement; ///< Pitch in elements (blocks for compressed formats)
|
||||
UINT_32 pbXor; ///< Xor value
|
||||
|
||||
void* pMappedSurface; ///< Pointer to the image surface, mapped to CPU memory
|
||||
BOOL_32 singleSubres; ///< Pointer is to the base of the subresource, not to the
|
||||
/// base of the surface image data. Requires:
|
||||
/// - copyDims.depth == 1
|
||||
/// - all copy regions target the same mip
|
||||
/// - all copy regions target the same slice/depth
|
||||
} ADDR3_COPY_MEMSURFACE_INPUT;
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* Addr3CopyMemToSurface
|
||||
*
|
||||
* @brief
|
||||
* Copy an image region from memory to an uncompressed CPU-mapped surface
|
||||
****************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE ADDR_API Addr3CopyMemToSurface(
|
||||
ADDR_HANDLE hLib,
|
||||
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount
|
||||
);
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* Addr3CopySurfaceToMem
|
||||
*
|
||||
* @brief
|
||||
* Copy an image region from an uncompressed CPU-mapped surface to memory
|
||||
****************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE ADDR_API Addr3CopySurfaceToMem(
|
||||
ADDR_HANDLE hLib,
|
||||
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount
|
||||
);
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* ADDR3_COMPUTE_PIPEBANKXOR_INPUT
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
|
|||
|
|
@ -18,6 +18,8 @@ files_addrlib = files(
|
|||
'src/core/addrlib3.h',
|
||||
'src/core/addrobject.cpp',
|
||||
'src/core/addrobject.h',
|
||||
'src/core/addrswizzler.cpp',
|
||||
'src/core/addrswizzler.h',
|
||||
'src/core/coord.cpp',
|
||||
'src/core/coord.h',
|
||||
'src/gfx9/gfx9addrlib.cpp',
|
||||
|
|
@ -54,6 +56,12 @@ else
|
|||
cpp_args_addrlib += '-DBIGENDIAN_CPU'
|
||||
endif
|
||||
|
||||
if with_mesa_ndebug
|
||||
cpp_args_addrlib += '-DDEBUG=0'
|
||||
else
|
||||
cpp_args_addrlib += '-DDEBUG=1'
|
||||
endif
|
||||
|
||||
cpp_args_addrlib += cpp.get_supported_arguments(
|
||||
['-Wno-unused-variable', '-Wno-unused-local-typedefs',
|
||||
'-Wno-unused-but-set-variable', '-Wno-maybe-uninitialized',
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -573,6 +573,32 @@ UINT_32 ADDR_API AddrGetVersion(ADDR_HANDLE hLib)
|
|||
return version;
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* AddrGetInterfaceVersion
|
||||
*
|
||||
* @brief
|
||||
* Get AddrLib interface version number. Client may use this to know what AddrN functions to
|
||||
* use.
|
||||
****************************************************************************************************
|
||||
*/
|
||||
UINT_32 ADDR_API AddrGetInterfaceVersion(ADDR_HANDLE hLib)
|
||||
{
|
||||
UINT_32 version = 0;
|
||||
|
||||
Addr::Lib* pLib = Lib::GetLib(hLib);
|
||||
|
||||
ADDR_ASSERT(pLib != NULL);
|
||||
|
||||
if (pLib)
|
||||
{
|
||||
version = pLib->GetInterfaceVersion();
|
||||
}
|
||||
|
||||
ADDR_RESET_DEBUG_PRINTERS();
|
||||
return version;
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* AddrUseTileIndex
|
||||
|
|
@ -1219,6 +1245,72 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceAddrFromCoord(
|
|||
return returnCode;
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* Addr2CopyMemToSurface
|
||||
*
|
||||
* @brief
|
||||
* Copy an image region from memory to an uncompressed CPU-mapped surface
|
||||
*
|
||||
* @return
|
||||
* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
|
||||
****************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE ADDR_API Addr2CopyMemToSurface(
|
||||
ADDR_HANDLE hLib, ///< address lib handle
|
||||
const ADDR2_COPY_MEMSURFACE_INPUT* pIn, ///< [in] description of image and mapping
|
||||
const ADDR2_COPY_MEMSURFACE_REGION* pRegions, ///< [in] list of copy regions
|
||||
UINT_32 regionCount) ///< [in] count of copy regions in list
|
||||
{
|
||||
V2::Lib* pLib = V2::Lib::GetLib(hLib);
|
||||
|
||||
ADDR_E_RETURNCODE returnCode = ADDR_OK;
|
||||
|
||||
if (pLib != NULL)
|
||||
{
|
||||
returnCode = pLib->CopyMemToSurface(pIn, pRegions, regionCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
returnCode = ADDR_ERROR;
|
||||
}
|
||||
|
||||
return returnCode;
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* Addr2CopySurfaceToMem
|
||||
*
|
||||
* @brief
|
||||
* Copy an image region from an uncompressed CPU-mapped surface to memory
|
||||
*
|
||||
* @return
|
||||
* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
|
||||
****************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE ADDR_API Addr2CopySurfaceToMem(
|
||||
ADDR_HANDLE hLib, ///< address lib handle
|
||||
const ADDR2_COPY_MEMSURFACE_INPUT* pIn, ///< [in] description of image and mapping
|
||||
const ADDR2_COPY_MEMSURFACE_REGION* pRegions, ///< [in] list of copy regions
|
||||
UINT_32 regionCount) ///< [in] count of copy regions in list
|
||||
{
|
||||
V2::Lib* pLib = V2::Lib::GetLib(hLib);
|
||||
|
||||
ADDR_E_RETURNCODE returnCode = ADDR_OK;
|
||||
|
||||
if (pLib != NULL)
|
||||
{
|
||||
returnCode = pLib->CopySurfaceToMem(pIn, pRegions, regionCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
returnCode = ADDR_ERROR;
|
||||
}
|
||||
|
||||
return returnCode;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
|
|
@ -1822,7 +1914,12 @@ ADDR_E_RETURNCODE ADDR_API Addr2IsValidDisplaySwizzleMode(
|
|||
in.swizzleMode = swizzleMode;
|
||||
in.bpp = bpp;
|
||||
|
||||
*pResult = pLib->IsValidDisplaySwizzleMode(&in);
|
||||
BOOL_32 result = pLib->IsValidDisplaySwizzleMode(&in);
|
||||
if (pResult != NULL)
|
||||
{
|
||||
*pResult = result;
|
||||
}
|
||||
|
||||
returnCode = ADDR_OK;
|
||||
}
|
||||
else
|
||||
|
|
@ -2105,6 +2202,72 @@ ADDR_E_RETURNCODE ADDR_API Addr3ComputeSurfaceAddrFromCoord(
|
|||
return returnCode;
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* Addr3CopyMemToSurface
|
||||
*
|
||||
* @brief
|
||||
* Copy an image region from memory to an uncompressed CPU-mapped surface
|
||||
*
|
||||
* @return
|
||||
* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
|
||||
****************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE ADDR_API Addr3CopyMemToSurface(
|
||||
ADDR_HANDLE hLib, ///< address lib handle
|
||||
const ADDR3_COPY_MEMSURFACE_INPUT* pIn, ///< [in] description of image and mapping
|
||||
const ADDR3_COPY_MEMSURFACE_REGION* pRegions, ///< [in] list of copy regions
|
||||
UINT_32 regionCount) ///< [in] count of copy regions in list
|
||||
{
|
||||
V3::Lib* pLib = V3::Lib::GetLib(hLib);
|
||||
|
||||
ADDR_E_RETURNCODE returnCode = ADDR_OK;
|
||||
|
||||
if (pLib != NULL)
|
||||
{
|
||||
returnCode = pLib->CopyMemToSurface(pIn, pRegions, regionCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
returnCode = ADDR_ERROR;
|
||||
}
|
||||
|
||||
return returnCode;
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* Addr3CopySurfaceToMem
|
||||
*
|
||||
* @brief
|
||||
* Copy an image region from an uncompressed CPU-mapped surface to memory
|
||||
*
|
||||
* @return
|
||||
* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
|
||||
****************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE ADDR_API Addr3CopySurfaceToMem(
|
||||
ADDR_HANDLE hLib, ///< address lib handle
|
||||
const ADDR3_COPY_MEMSURFACE_INPUT* pIn, ///< [in] description of image and mapping
|
||||
const ADDR3_COPY_MEMSURFACE_REGION* pRegions, ///< [in] list of copy regions
|
||||
UINT_32 regionCount) ///< [in] count of copy regions in list
|
||||
{
|
||||
V3::Lib* pLib = V3::Lib::GetLib(hLib);
|
||||
|
||||
ADDR_E_RETURNCODE returnCode = ADDR_OK;
|
||||
|
||||
if (pLib != NULL)
|
||||
{
|
||||
returnCode = pLib->CopySurfaceToMem(pIn, pRegions, regionCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
returnCode = ADDR_ERROR;
|
||||
}
|
||||
|
||||
return returnCode;
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* Addr3ComputePipeBankXor
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2017-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2017-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -81,7 +81,7 @@
|
|||
|
||||
#define AMDGPU_VEGA10_RANGE 0x01, 0x14 //# 1 <= x < 20
|
||||
#define AMDGPU_VEGA12_RANGE 0x14, 0x28 //# 20 <= x < 40
|
||||
#define AMDGPU_VEGA20_RANGE 0x28, 0x32 //# 40 <= x < max
|
||||
#define AMDGPU_VEGA20_RANGE 0x28, 0xFF //# 40 <= x < max
|
||||
|
||||
#define AMDGPU_RAVEN_RANGE 0x01, 0x81 //# 1 <= x < 129
|
||||
#define AMDGPU_RAVEN2_RANGE 0x81, 0x90 //# 129 <= x < 144
|
||||
|
|
@ -183,8 +183,7 @@
|
|||
#define ASICREV_IS_GFX1151(r) ASICREV_IS(r, GFX1151)
|
||||
#define ASICREV_IS_GFX1152(r) ASICREV_IS(r, GFX1152)
|
||||
#define ASICREV_IS_GFX1153(r) ASICREV_IS(r, GFX1153)
|
||||
|
||||
#define ASICREV_IS_PHOENIX1(r) ASICREV_IS(r, PHOENIX1)
|
||||
#define ASICREV_IS_PHOENIX(r) ASICREV_IS(r, PHOENIX)
|
||||
#define ASICREV_IS_PHOENIX2(r) ASICREV_IS(r, PHOENIX2)
|
||||
#define ASICREV_IS_HAWK_POINT1(r) ASICREV_IS(r, HAWK_POINT1)
|
||||
#define ASICREV_IS_HAWK_POINT2(r) ASICREV_IS(r, HAWK_POINT2)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -17,6 +17,7 @@
|
|||
#define __ADDR_COMMON_H__
|
||||
|
||||
#include "addrinterface.h"
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
#if !defined(__APPLE__) || defined(HAVE_TSERVER)
|
||||
|
|
@ -26,7 +27,6 @@
|
|||
|
||||
#if defined(__GNUC__)
|
||||
#include <signal.h>
|
||||
#include <assert.h>
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
|
@ -36,17 +36,9 @@
|
|||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Platform specific debug break defines
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#if !defined(DEBUG)
|
||||
#ifdef NDEBUG
|
||||
#define DEBUG 0
|
||||
#else
|
||||
#define DEBUG 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if DEBUG
|
||||
#if defined(__GNUC__)
|
||||
#define ADDR_DBG_BREAK() { assert(false); }
|
||||
#define ADDR_DBG_BREAK() { raise(SIGTRAP); }
|
||||
#elif defined(__APPLE__)
|
||||
#define ADDR_DBG_BREAK() { IOPanic("");}
|
||||
#else
|
||||
|
|
@ -191,21 +183,21 @@ do { if (!(cond)) \
|
|||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
#if defined(static_assert)
|
||||
#if 1
|
||||
#define ADDR_C_ASSERT(__e) static_assert(__e, "")
|
||||
#else
|
||||
/* This version of STATIC_ASSERT() relies on VLAs. If COND is
|
||||
* false/zero, the array size will be -1 and we'll get a compile
|
||||
* error
|
||||
*/
|
||||
# define ADDR_C_ASSERT(__e) do { \
|
||||
(void) sizeof(char [1 - 2*!(__e)]); \
|
||||
} while (0)
|
||||
#define ADDR_C_ASSERT(__e) typedef char __ADDR_C_ASSERT__[(__e) ? 1 : -1]
|
||||
#endif
|
||||
|
||||
namespace Addr
|
||||
{
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Common constants
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
static const UINT_32 MaxElementBytesLog2 = 5; ///< Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp)
|
||||
|
||||
|
||||
namespace V1
|
||||
{
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
@ -382,6 +374,36 @@ static inline UINT_32 BitScanForward(
|
|||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* BitScanReverse
|
||||
*
|
||||
* @brief
|
||||
* Returns the reverse-position of the most-significant '1' bit. Must not be 0.
|
||||
****************************************************************************************************
|
||||
*/
|
||||
static inline UINT_32 BitScanReverse(
|
||||
UINT_32 mask) ///< [in] Bitmask to scan
|
||||
{
|
||||
ADDR_ASSERT(mask > 0);
|
||||
unsigned long out = 0;
|
||||
#if (defined(_WIN32) || defined(_WIN64))
|
||||
::_BitScanReverse(&out, mask);
|
||||
out ^= 31;
|
||||
#elif defined(__GNUC__)
|
||||
out = __builtin_clz(mask);
|
||||
#else
|
||||
out = 32;
|
||||
while (mask != 0)
|
||||
{
|
||||
mask >>= 1;
|
||||
out++;
|
||||
}
|
||||
out = sizeof(mask) * 8 - out;
|
||||
#endif
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* IsPow2
|
||||
|
|
@ -414,10 +436,10 @@ static inline UINT_64 IsPow2(
|
|||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* ByteAlign
|
||||
* PowTwoAlign
|
||||
*
|
||||
* @brief
|
||||
* Align UINT_32 "x" to "align" alignment, "align" should be power of 2
|
||||
* Align UINT_32 "x" up to "align" alignment, "align" should be power of 2
|
||||
****************************************************************************************************
|
||||
*/
|
||||
static inline UINT_32 PowTwoAlign(
|
||||
|
|
@ -433,10 +455,10 @@ static inline UINT_32 PowTwoAlign(
|
|||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* ByteAlign
|
||||
* PowTwoAlign
|
||||
*
|
||||
* @brief
|
||||
* Align UINT_64 "x" to "align" alignment, "align" should be power of 2
|
||||
* Align UINT_64 "x" up to "align" alignment, "align" should be power of 2
|
||||
****************************************************************************************************
|
||||
*/
|
||||
static inline UINT_64 PowTwoAlign(
|
||||
|
|
@ -450,6 +472,44 @@ static inline UINT_64 PowTwoAlign(
|
|||
return (x + (align - 1)) & (~(align - 1));
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* PowTwoAlignDown
|
||||
*
|
||||
* @brief
|
||||
* Align UINT_32 "x" down to "align" alignment, "align" should be power of 2
|
||||
****************************************************************************************************
|
||||
*/
|
||||
static inline UINT_32 PowTwoAlignDown(
|
||||
UINT_32 x,
|
||||
UINT_32 align)
|
||||
{
|
||||
//
|
||||
// Assert that x is a power of two.
|
||||
//
|
||||
ADDR_ASSERT(IsPow2(align));
|
||||
return (x & ~(align - 1));
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* PowTwoAlignDown
|
||||
*
|
||||
* @brief
|
||||
* Align UINT_64 "x" down to "align" alignment, "align" should be power of 2
|
||||
****************************************************************************************************
|
||||
*/
|
||||
static inline UINT_64 PowTwoAlignDown(
|
||||
UINT_64 x,
|
||||
UINT_64 align)
|
||||
{
|
||||
//
|
||||
// Assert that x is a power of two.
|
||||
//
|
||||
ADDR_ASSERT(IsPow2(align));
|
||||
return (x & ~(align - 1));
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* Min
|
||||
|
|
@ -571,44 +631,18 @@ static inline UINT_32 NextPow2(
|
|||
return newDim;
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* Log2NonPow2
|
||||
*
|
||||
* @brief
|
||||
* Compute log of base 2 no matter the target is power of 2 or not
|
||||
****************************************************************************************************
|
||||
*/
|
||||
static inline UINT_32 Log2NonPow2(
|
||||
UINT_32 x) ///< [in] the value should calculate log based 2
|
||||
{
|
||||
UINT_32 y;
|
||||
|
||||
y = 0;
|
||||
while (x > 1)
|
||||
{
|
||||
x >>= 1;
|
||||
y++;
|
||||
}
|
||||
|
||||
return y;
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* Log2
|
||||
*
|
||||
* @brief
|
||||
* Compute log of base 2
|
||||
* Compute log of base 2 no matter the target is power of 2 or not. Returns 0 if 0.
|
||||
****************************************************************************************************
|
||||
*/
|
||||
static inline UINT_32 Log2(
|
||||
UINT_32 x) ///< [in] the value should calculate log based 2
|
||||
{
|
||||
// Assert that x is a power of two.
|
||||
ADDR_ASSERT(IsPow2(x));
|
||||
|
||||
return Log2NonPow2(x);
|
||||
return (x != 0) ? (31 ^ BitScanReverse(x)) : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1081,6 +1115,72 @@ static inline UINT_32 ShiftRight(
|
|||
return Max(a >> b, 1u);
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* VoidPtrDec
|
||||
*
|
||||
* @brief
|
||||
* Subtracts a value to the given pointer directly.
|
||||
****************************************************************************************************
|
||||
*/
|
||||
static inline void* VoidPtrDec(
|
||||
void* pIn,
|
||||
size_t offset)
|
||||
{
|
||||
return (void*)(((char*)(pIn)) - offset);
|
||||
}
|
||||
|
||||
static inline const void* VoidPtrDec(
|
||||
const void* pIn,
|
||||
size_t offset)
|
||||
{
|
||||
return (const void*)(((const char*)(pIn)) - offset);
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* VoidPtrInc
|
||||
*
|
||||
* @brief
|
||||
* Adds a value to the given pointer directly.
|
||||
****************************************************************************************************
|
||||
*/
|
||||
static inline void* VoidPtrInc(
|
||||
void* pIn,
|
||||
size_t offset)
|
||||
{
|
||||
return (void*)(((char*)(pIn)) + offset);
|
||||
}
|
||||
|
||||
static inline const void* VoidPtrInc(
|
||||
const void* pIn,
|
||||
size_t offset)
|
||||
{
|
||||
return (const void*)(((const char*)(pIn)) + offset);
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* VoidPtrXor
|
||||
*
|
||||
* @brief
|
||||
* Xors a value to the given pointer directly.
|
||||
****************************************************************************************************
|
||||
*/
|
||||
static inline void* VoidPtrXor(
|
||||
void* pIn,
|
||||
size_t offset)
|
||||
{
|
||||
return (void*)(((uintptr_t)(pIn)) ^ offset);
|
||||
}
|
||||
|
||||
static inline const void* VoidPtrXor(
|
||||
const void* pIn,
|
||||
size_t offset)
|
||||
{
|
||||
return (const void*)(((uintptr_t)(pIn)) ^ offset);
|
||||
}
|
||||
|
||||
} // Addr
|
||||
|
||||
#endif // __ADDR_COMMON_H__
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -668,4 +668,105 @@ UINT_32 Lib::GetBpe(AddrFormat format) const
|
|||
return GetElemLib()->GetBitsPerPixel(format);
|
||||
}
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* Lib::ComputeOffsetFromSwizzlePattern
|
||||
*
|
||||
* @brief
|
||||
* Compute offset from swizzle pattern
|
||||
*
|
||||
* @return
|
||||
* Offset
|
||||
************************************************************************************************************************
|
||||
*/
|
||||
UINT_32 Lib::ComputeOffsetFromSwizzlePattern(
|
||||
const UINT_64* pPattern, ///< Swizzle pattern
|
||||
UINT_32 numBits, ///< Number of bits in pattern
|
||||
UINT_32 x, ///< x coord in pixel
|
||||
UINT_32 y, ///< y coord in pixel
|
||||
UINT_32 z, ///< z coord in slice
|
||||
UINT_32 s ///< sample id
|
||||
)
|
||||
{
|
||||
UINT_32 offset = 0;
|
||||
const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
|
||||
|
||||
for (UINT_32 i = 0; i < numBits; i++)
|
||||
{
|
||||
UINT_32 v = 0;
|
||||
|
||||
if (pSwizzlePattern[i].x != 0)
|
||||
{
|
||||
UINT_16 mask = pSwizzlePattern[i].x;
|
||||
UINT_32 xBits = x;
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
if (mask & 1)
|
||||
{
|
||||
v ^= xBits & 1;
|
||||
}
|
||||
|
||||
xBits >>= 1;
|
||||
mask >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (pSwizzlePattern[i].y != 0)
|
||||
{
|
||||
UINT_16 mask = pSwizzlePattern[i].y;
|
||||
UINT_32 yBits = y;
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
if (mask & 1)
|
||||
{
|
||||
v ^= yBits & 1;
|
||||
}
|
||||
|
||||
yBits >>= 1;
|
||||
mask >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (pSwizzlePattern[i].z != 0)
|
||||
{
|
||||
UINT_16 mask = pSwizzlePattern[i].z;
|
||||
UINT_32 zBits = z;
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
if (mask & 1)
|
||||
{
|
||||
v ^= zBits & 1;
|
||||
}
|
||||
|
||||
zBits >>= 1;
|
||||
mask >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (pSwizzlePattern[i].s != 0)
|
||||
{
|
||||
UINT_16 mask = pSwizzlePattern[i].s;
|
||||
UINT_32 sBits = s;
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
if (mask & 1)
|
||||
{
|
||||
v ^= sBits & 1;
|
||||
}
|
||||
|
||||
sBits >>= 1;
|
||||
mask >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
offset |= (v << i);
|
||||
}
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
} // Addr
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -214,6 +214,73 @@ enum ShaderEngineTileSizeConfig
|
|||
ADDR_CONFIG_SE_TILE_32 = 0x00000001,
|
||||
};
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* @brief Bit setting for swizzle pattern
|
||||
************************************************************************************************************************
|
||||
*/
|
||||
union ADDR_BIT_SETTING
|
||||
{
|
||||
struct
|
||||
{
|
||||
UINT_16 x;
|
||||
UINT_16 y;
|
||||
UINT_16 z;
|
||||
UINT_16 s;
|
||||
};
|
||||
UINT_64 value;
|
||||
};
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* InitBit
|
||||
*
|
||||
* @brief
|
||||
* Initialize bit setting value via a return value
|
||||
************************************************************************************************************************
|
||||
*/
|
||||
#define InitBit(c, index) (1ull << ((c << 4) + index))
|
||||
|
||||
const UINT_64 X0 = InitBit(0, 0);
|
||||
const UINT_64 X1 = InitBit(0, 1);
|
||||
const UINT_64 X2 = InitBit(0, 2);
|
||||
const UINT_64 X3 = InitBit(0, 3);
|
||||
const UINT_64 X4 = InitBit(0, 4);
|
||||
const UINT_64 X5 = InitBit(0, 5);
|
||||
const UINT_64 X6 = InitBit(0, 6);
|
||||
const UINT_64 X7 = InitBit(0, 7);
|
||||
const UINT_64 X8 = InitBit(0, 8);
|
||||
const UINT_64 X9 = InitBit(0, 9);
|
||||
const UINT_64 X10 = InitBit(0, 10);
|
||||
const UINT_64 X11 = InitBit(0, 11);
|
||||
|
||||
const UINT_64 Y0 = InitBit(1, 0);
|
||||
const UINT_64 Y1 = InitBit(1, 1);
|
||||
const UINT_64 Y2 = InitBit(1, 2);
|
||||
const UINT_64 Y3 = InitBit(1, 3);
|
||||
const UINT_64 Y4 = InitBit(1, 4);
|
||||
const UINT_64 Y5 = InitBit(1, 5);
|
||||
const UINT_64 Y6 = InitBit(1, 6);
|
||||
const UINT_64 Y7 = InitBit(1, 7);
|
||||
const UINT_64 Y8 = InitBit(1, 8);
|
||||
const UINT_64 Y9 = InitBit(1, 9);
|
||||
const UINT_64 Y10 = InitBit(1, 10);
|
||||
const UINT_64 Y11 = InitBit(1, 11);
|
||||
|
||||
const UINT_64 Z0 = InitBit(2, 0);
|
||||
const UINT_64 Z1 = InitBit(2, 1);
|
||||
const UINT_64 Z2 = InitBit(2, 2);
|
||||
const UINT_64 Z3 = InitBit(2, 3);
|
||||
const UINT_64 Z4 = InitBit(2, 4);
|
||||
const UINT_64 Z5 = InitBit(2, 5);
|
||||
const UINT_64 Z6 = InitBit(2, 6);
|
||||
const UINT_64 Z7 = InitBit(2, 7);
|
||||
const UINT_64 Z8 = InitBit(2, 8);
|
||||
|
||||
const UINT_64 S0 = InitBit(3, 0);
|
||||
const UINT_64 S1 = InitBit(3, 1);
|
||||
const UINT_64 S2 = InitBit(3, 2);
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* @brief This class contains asic independent address lib functionalities
|
||||
|
|
@ -234,6 +301,9 @@ public:
|
|||
}
|
||||
|
||||
static Lib* GetLib(ADDR_HANDLE hLib);
|
||||
|
||||
/// Returns which version of addrlib functions should be used.
|
||||
virtual UINT_32 GetInterfaceVersion() const = 0;
|
||||
|
||||
/// Returns AddrLib version (from compiled binary instead include file)
|
||||
UINT_32 GetVersion()
|
||||
|
|
@ -263,6 +333,15 @@ public:
|
|||
|
||||
UINT_32 GetBpe(AddrFormat format) const;
|
||||
|
||||
|
||||
static UINT_32 ComputeOffsetFromSwizzlePattern(
|
||||
const UINT_64* pPattern,
|
||||
UINT_32 numBits,
|
||||
UINT_32 x,
|
||||
UINT_32 y,
|
||||
UINT_32 z,
|
||||
UINT_32 s);
|
||||
|
||||
protected:
|
||||
Lib(); // Constructor is protected
|
||||
Lib(const Client* pClient);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -70,6 +70,11 @@ public:
|
|||
static Lib* GetLib(
|
||||
ADDR_HANDLE hLib);
|
||||
|
||||
virtual UINT_32 GetInterfaceVersion() const
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
/// Returns tileIndex support
|
||||
BOOL_32 UseTileIndex(INT_32 index) const
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -382,6 +382,238 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoord(
|
|||
return returnCode;
|
||||
}
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* Lib::CopyLinearSurface
|
||||
*
|
||||
* @brief
|
||||
* Implements uncompressed linear copies between memory and images.
|
||||
*
|
||||
* @return
|
||||
* ADDR_E_RETURNCODE
|
||||
************************************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE Lib::CopyLinearSurface(
|
||||
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount,
|
||||
bool surfaceIsDst) const
|
||||
{
|
||||
ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
|
||||
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
|
||||
ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {{0}};
|
||||
ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
|
||||
ADDR_E_RETURNCODE returnCode = ADDR_OK;
|
||||
|
||||
if (pIn->numSamples > 1)
|
||||
{
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
}
|
||||
|
||||
localIn.size = sizeof(localIn);
|
||||
localIn.flags = pIn->flags;
|
||||
localIn.swizzleMode = pIn->swizzleMode;
|
||||
localIn.resourceType = pIn->resourceType;
|
||||
localIn.format = pIn->format;
|
||||
localIn.bpp = pIn->bpp;
|
||||
localIn.width = Max(pIn->unAlignedDims.width, 1u);
|
||||
localIn.height = Max(pIn->unAlignedDims.height, 1u);
|
||||
localIn.numSlices = Max(pIn->unAlignedDims.depth, 1u);
|
||||
localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
|
||||
localIn.numSamples = Max(pIn->numSamples, 1u);
|
||||
|
||||
if (localIn.numMipLevels <= 1)
|
||||
{
|
||||
localIn.pitchInElement = pIn->pitchInElement;
|
||||
}
|
||||
|
||||
localOut.size = sizeof(localOut);
|
||||
localOut.pMipInfo = mipInfo;
|
||||
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
returnCode = ComputeSurfaceInfo(&localIn, &localOut);
|
||||
}
|
||||
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
for (UINT_32 regionIdx = 0; regionIdx < regionCount; regionIdx++)
|
||||
{
|
||||
const ADDR2_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
|
||||
|
||||
void* pMipBase = VoidPtrInc(pIn->pMappedSurface,
|
||||
(pIn->singleSubres ? 0 : mipInfo[pCurRegion->mipId].offset));
|
||||
|
||||
const size_t lineSizeBytes = (localIn.bpp >> 3) * pCurRegion->copyDims.width;
|
||||
const size_t lineImgPitchBytes = (localIn.bpp >> 3) * mipInfo[pCurRegion->mipId].pitch;
|
||||
|
||||
for (UINT_32 sliceIdx = 0; sliceIdx < pCurRegion->copyDims.depth; sliceIdx++)
|
||||
{
|
||||
UINT_32 sliceCoord = sliceIdx + pCurRegion->slice;
|
||||
size_t imgOffsetInMip = (localOut.sliceSize * sliceCoord) +
|
||||
(lineImgPitchBytes * pCurRegion->y) +
|
||||
(pCurRegion->x * (pIn->bpp >> 3));
|
||||
size_t memOffset = sliceIdx * pCurRegion->memSlicePitch;
|
||||
|
||||
for (UINT_32 yIdx = 0; yIdx < pCurRegion->copyDims.height; yIdx++)
|
||||
{
|
||||
if (surfaceIsDst)
|
||||
{
|
||||
memcpy(VoidPtrInc(pMipBase, imgOffsetInMip), VoidPtrInc(pCurRegion->pMem, memOffset), lineSizeBytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(VoidPtrInc(pCurRegion->pMem, memOffset), VoidPtrInc(pMipBase, imgOffsetInMip), lineSizeBytes);
|
||||
}
|
||||
|
||||
imgOffsetInMip += lineImgPitchBytes;
|
||||
memOffset += pCurRegion->memRowPitch;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return returnCode;
|
||||
}
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* Lib::CopyMemToSurface
|
||||
*
|
||||
* @brief
|
||||
* Interface function stub of Addr2CopyMemToSurface.
|
||||
*
|
||||
* @return
|
||||
* ADDR_E_RETURNCODE
|
||||
************************************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE Lib::CopyMemToSurface(
|
||||
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount) const
|
||||
{
|
||||
ADDR_E_RETURNCODE returnCode = ADDR_OK;
|
||||
|
||||
if ((regionCount == 0) || (pRegions == NULL))
|
||||
{
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
}
|
||||
else if (GetFillSizeFieldsFlags() == TRUE)
|
||||
{
|
||||
if (pIn->size != sizeof(ADDR2_COPY_MEMSURFACE_INPUT))
|
||||
{
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
}
|
||||
else
|
||||
{
|
||||
UINT_32 baseSlice = pRegions[0].slice;
|
||||
UINT_32 baseMip = pRegions[0].mipId;
|
||||
BOOL_32 singleSubres = pIn->singleSubres;
|
||||
for (UINT_32 i = 0; i < regionCount; i++)
|
||||
{
|
||||
if (pRegions[i].size != sizeof(ADDR2_COPY_MEMSURFACE_REGION))
|
||||
{
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
break;
|
||||
}
|
||||
if (singleSubres &&
|
||||
((pRegions[i].copyDims.depth != 1) ||
|
||||
(pRegions[i].slice != baseSlice) ||
|
||||
(pRegions[i].mipId != baseMip)))
|
||||
{
|
||||
// Copy will cover multiple/interleaved subresources, a
|
||||
// mapped pointer to a single subres cannot be valid.
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
if (IsLinear(pIn->swizzleMode))
|
||||
{
|
||||
returnCode = CopyLinearSurface(pIn, pRegions, regionCount, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
returnCode = HwlCopyMemToSurface(pIn, pRegions, regionCount);
|
||||
}
|
||||
}
|
||||
|
||||
return returnCode;
|
||||
}
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* Lib::CopySurfaceToMem
|
||||
*
|
||||
* @brief
|
||||
* Interface function stub of Addr2CopySurfaceToMem.
|
||||
*
|
||||
* @return
|
||||
* ADDR_E_RETURNCODE
|
||||
************************************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE Lib::CopySurfaceToMem(
|
||||
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount) const
|
||||
{
|
||||
ADDR_E_RETURNCODE returnCode = ADDR_OK;
|
||||
|
||||
if (regionCount == 0)
|
||||
{
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
}
|
||||
else if (GetFillSizeFieldsFlags() == TRUE)
|
||||
{
|
||||
if (pIn->size != sizeof(ADDR2_COPY_MEMSURFACE_INPUT))
|
||||
{
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
}
|
||||
else
|
||||
{
|
||||
UINT_32 baseSlice = pRegions[0].slice;
|
||||
UINT_32 baseMip = pRegions[0].mipId;
|
||||
BOOL_32 singleSubres = pIn->singleSubres;
|
||||
for (UINT_32 i = 0; i < regionCount; i++)
|
||||
{
|
||||
if (pRegions[i].size != sizeof(ADDR2_COPY_MEMSURFACE_REGION))
|
||||
{
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
break;
|
||||
}
|
||||
if (singleSubres &&
|
||||
((pRegions[i].copyDims.depth != 1) ||
|
||||
(pRegions[i].slice != baseSlice) ||
|
||||
(pRegions[i].mipId != baseMip)))
|
||||
{
|
||||
// Copy will cover multiple/interleaved subresources, a
|
||||
// mapped pointer to a single subres cannot be valid.
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
if (IsLinear(pIn->swizzleMode))
|
||||
{
|
||||
returnCode = CopyLinearSurface(pIn, pRegions, regionCount, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
returnCode = HwlCopySurfaceToMem(pIn, pRegions, regionCount);
|
||||
}
|
||||
}
|
||||
|
||||
return returnCode;
|
||||
}
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* Lib::ComputeSurfaceCoordFromAddr
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -74,23 +74,6 @@ struct Dim3d
|
|||
UINT_32 d;
|
||||
};
|
||||
|
||||
// Macro define resource block type
|
||||
enum AddrBlockType
|
||||
{
|
||||
AddrBlockLinear = 0, // Resource uses linear swizzle mode
|
||||
AddrBlockMicro = 1, // Resource uses 256B block
|
||||
AddrBlockThin4KB = 2, // Resource uses thin 4KB block
|
||||
AddrBlockThick4KB = 3, // Resource uses thick 4KB block
|
||||
AddrBlockThin64KB = 4, // Resource uses thin 64KB block
|
||||
AddrBlockThick64KB = 5, // Resource uses thick 64KB block
|
||||
AddrBlockThinVar = 6, // Resource uses thin var block
|
||||
AddrBlockThickVar = 7, // Resource uses thick var block
|
||||
AddrBlockMaxTiledType,
|
||||
|
||||
AddrBlockThin256KB = AddrBlockThinVar,
|
||||
AddrBlockThick256KB = AddrBlockThickVar,
|
||||
};
|
||||
|
||||
enum AddrSwSet
|
||||
{
|
||||
AddrSwSetZ = 1 << ADDR_SW_Z,
|
||||
|
|
@ -109,23 +92,6 @@ const UINT_32 Log2Size256 = 8u;
|
|||
const UINT_32 Log2Size4K = 12u;
|
||||
const UINT_32 Log2Size64K = 16u;
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* @brief Bit setting for swizzle pattern
|
||||
************************************************************************************************************************
|
||||
*/
|
||||
union ADDR_BIT_SETTING
|
||||
{
|
||||
struct
|
||||
{
|
||||
UINT_16 x;
|
||||
UINT_16 y;
|
||||
UINT_16 z;
|
||||
UINT_16 s;
|
||||
};
|
||||
UINT_64 value;
|
||||
};
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* @brief Swizzle pattern information
|
||||
|
|
@ -142,55 +108,6 @@ struct ADDR_SW_PATINFO
|
|||
UINT_8 nibble4Idx;
|
||||
};
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* InitBit
|
||||
*
|
||||
* @brief
|
||||
* Initialize bit setting value via a return value
|
||||
************************************************************************************************************************
|
||||
*/
|
||||
#define InitBit(c, index) (1ull << ((c << 4) + index))
|
||||
|
||||
const UINT_64 X0 = InitBit(0, 0);
|
||||
const UINT_64 X1 = InitBit(0, 1);
|
||||
const UINT_64 X2 = InitBit(0, 2);
|
||||
const UINT_64 X3 = InitBit(0, 3);
|
||||
const UINT_64 X4 = InitBit(0, 4);
|
||||
const UINT_64 X5 = InitBit(0, 5);
|
||||
const UINT_64 X6 = InitBit(0, 6);
|
||||
const UINT_64 X7 = InitBit(0, 7);
|
||||
const UINT_64 X8 = InitBit(0, 8);
|
||||
const UINT_64 X9 = InitBit(0, 9);
|
||||
const UINT_64 X10 = InitBit(0, 10);
|
||||
const UINT_64 X11 = InitBit(0, 11);
|
||||
|
||||
const UINT_64 Y0 = InitBit(1, 0);
|
||||
const UINT_64 Y1 = InitBit(1, 1);
|
||||
const UINT_64 Y2 = InitBit(1, 2);
|
||||
const UINT_64 Y3 = InitBit(1, 3);
|
||||
const UINT_64 Y4 = InitBit(1, 4);
|
||||
const UINT_64 Y5 = InitBit(1, 5);
|
||||
const UINT_64 Y6 = InitBit(1, 6);
|
||||
const UINT_64 Y7 = InitBit(1, 7);
|
||||
const UINT_64 Y8 = InitBit(1, 8);
|
||||
const UINT_64 Y9 = InitBit(1, 9);
|
||||
const UINT_64 Y10 = InitBit(1, 10);
|
||||
const UINT_64 Y11 = InitBit(1, 11);
|
||||
|
||||
const UINT_64 Z0 = InitBit(2, 0);
|
||||
const UINT_64 Z1 = InitBit(2, 1);
|
||||
const UINT_64 Z2 = InitBit(2, 2);
|
||||
const UINT_64 Z3 = InitBit(2, 3);
|
||||
const UINT_64 Z4 = InitBit(2, 4);
|
||||
const UINT_64 Z5 = InitBit(2, 5);
|
||||
const UINT_64 Z6 = InitBit(2, 6);
|
||||
const UINT_64 Z7 = InitBit(2, 7);
|
||||
const UINT_64 Z8 = InitBit(2, 8);
|
||||
|
||||
const UINT_64 S0 = InitBit(3, 0);
|
||||
const UINT_64 S1 = InitBit(3, 1);
|
||||
const UINT_64 S2 = InitBit(3, 2);
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
|
|
@ -205,6 +122,10 @@ public:
|
|||
static Lib* GetLib(
|
||||
ADDR_HANDLE hLib);
|
||||
|
||||
virtual UINT_32 GetInterfaceVersion() const
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
//
|
||||
// Interface stubs
|
||||
//
|
||||
|
|
@ -221,6 +142,16 @@ public:
|
|||
ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddr(
|
||||
const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
|
||||
ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
|
||||
|
||||
ADDR_E_RETURNCODE CopyMemToSurface(
|
||||
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount) const;
|
||||
|
||||
ADDR_E_RETURNCODE CopySurfaceToMem(
|
||||
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount) const;
|
||||
|
||||
// For HTile
|
||||
ADDR_E_RETURNCODE ComputeHtileInfo(
|
||||
|
|
@ -473,7 +404,7 @@ protected:
|
|||
sample = (sample == 0) ? 1 : sample;
|
||||
frag = (frag == 0) ? sample : frag;
|
||||
|
||||
UINT_32 fmaskBpp = QLog2(frag);
|
||||
UINT_32 fmaskBpp = Log2(frag);
|
||||
|
||||
if (sample > frag)
|
||||
{
|
||||
|
|
@ -725,6 +656,24 @@ protected:
|
|||
return ADDR_NOTIMPLEMENTED;
|
||||
}
|
||||
|
||||
virtual ADDR_E_RETURNCODE HwlCopyMemToSurface(
|
||||
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount) const
|
||||
{
|
||||
ADDR_NOT_IMPLEMENTED();
|
||||
return ADDR_NOTSUPPORTED;
|
||||
}
|
||||
|
||||
virtual ADDR_E_RETURNCODE HwlCopySurfaceToMem(
|
||||
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount) const
|
||||
{
|
||||
ADDR_NOT_IMPLEMENTED();
|
||||
return ADDR_NOTSUPPORTED;
|
||||
}
|
||||
|
||||
ADDR_E_RETURNCODE ComputeBlock256Equation(
|
||||
AddrResourceType rsrcType,
|
||||
AddrSwizzleMode swMode,
|
||||
|
|
@ -754,6 +703,12 @@ protected:
|
|||
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
|
||||
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
|
||||
|
||||
ADDR_E_RETURNCODE CopyLinearSurface(
|
||||
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount,
|
||||
bool surfaceIsDst) const;
|
||||
|
||||
ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordLinear(
|
||||
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
|
||||
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
|
||||
|
|
@ -892,13 +847,13 @@ protected:
|
|||
{
|
||||
case ADDR_RSRC_TEX_3D:
|
||||
// Fall through to share 2D case
|
||||
actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->numSlices) + 1);
|
||||
actualMipLevels = Max(actualMipLevels, Log2(pIn->numSlices) + 1);
|
||||
case ADDR_RSRC_TEX_2D:
|
||||
// Fall through to share 1D case
|
||||
actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->height) + 1);
|
||||
actualMipLevels = Max(actualMipLevels, Log2(pIn->height) + 1);
|
||||
case ADDR_RSRC_TEX_1D:
|
||||
// Base 1D case
|
||||
actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->width) + 1);
|
||||
actualMipLevels = Max(actualMipLevels, Log2(pIn->width) + 1);
|
||||
break;
|
||||
default:
|
||||
ADDR_ASSERT_ALWAYS();
|
||||
|
|
@ -976,8 +931,6 @@ protected:
|
|||
static const UINT_32 MaxSwModeType = 32;
|
||||
// Max number of resource type (2D/3D) supported for equation
|
||||
static const UINT_32 MaxRsrcType = 2;
|
||||
// Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp)
|
||||
static const UINT_32 MaxElementBytesLog2 = 5;
|
||||
// Almost all swizzle mode + resource type support equation
|
||||
static const UINT_32 EquationTableSize = MaxElementBytesLog2 * MaxSwModeType * MaxRsrcType;
|
||||
// Equation table
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -82,7 +82,7 @@ void Lib::Init()
|
|||
// There is no equation table entry for linear, so start at the "next" swizzle mode entry.
|
||||
for (UINT_32 swizzleModeIdx = ADDR3_LINEAR + 1; swizzleModeIdx < ADDR3_MAX_TYPE; swizzleModeIdx++)
|
||||
{
|
||||
for (UINT_32 msaaRateIdx = 0; msaaRateIdx < MaxMsaaRateLog2; msaaRateIdx++)
|
||||
for (UINT_32 msaaRateIdx = 0; msaaRateIdx < MaxNumMsaaRates; msaaRateIdx++)
|
||||
{
|
||||
for (UINT_32 log2BytesIdx = 0; log2BytesIdx < MaxElementBytesLog2; log2BytesIdx++)
|
||||
{
|
||||
|
|
@ -276,6 +276,11 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceInfo(
|
|||
}
|
||||
}
|
||||
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
returnCode = ComputeSurfaceInfoSanityCheck(&localIn);
|
||||
}
|
||||
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
returnCode = HwlComputeSurfaceInfo(&localIn, pOut);
|
||||
|
|
@ -480,6 +485,242 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoord(
|
|||
return returnCode;
|
||||
}
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* Lib::CopyLinearSurface
|
||||
*
|
||||
* @brief
|
||||
* Implements uncompressed linear copies between memory and images.
|
||||
*
|
||||
* @return
|
||||
* ADDR_E_RETURNCODE
|
||||
************************************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE Lib::CopyLinearSurface(
|
||||
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount,
|
||||
bool surfaceIsDst) const
|
||||
{
|
||||
ADDR3_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
|
||||
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
|
||||
ADDR3_MIP_INFO mipInfo[Addr3MaxMipLevels] = {{0}};
|
||||
ADDR_ASSERT(pIn->numMipLevels <= Addr3MaxMipLevels);
|
||||
ADDR_E_RETURNCODE returnCode = ADDR_OK;
|
||||
|
||||
if (pIn->numSamples > 1)
|
||||
{
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
}
|
||||
|
||||
localIn.size = sizeof(localIn);
|
||||
localIn.flags = pIn->flags;
|
||||
localIn.swizzleMode = ADDR3_LINEAR;
|
||||
localIn.resourceType = pIn->resourceType;
|
||||
localIn.format = pIn->format;
|
||||
localIn.bpp = pIn->bpp;
|
||||
localIn.width = Max(pIn->unAlignedDims.width, 1u);
|
||||
localIn.height = Max(pIn->unAlignedDims.height, 1u);
|
||||
localIn.numSlices = Max(pIn->unAlignedDims.depth, 1u);
|
||||
localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
|
||||
localIn.numSamples = Max(pIn->numSamples, 1u);
|
||||
|
||||
if (localIn.numMipLevels <= 1)
|
||||
{
|
||||
localIn.pitchInElement = pIn->pitchInElement;
|
||||
}
|
||||
|
||||
localOut.size = sizeof(localOut);
|
||||
localOut.pMipInfo = mipInfo;
|
||||
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
returnCode = ComputeSurfaceInfo(&localIn, &localOut);
|
||||
}
|
||||
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
for (UINT_32 regionIdx = 0; regionIdx < regionCount; regionIdx++)
|
||||
{
|
||||
const ADDR3_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
|
||||
|
||||
void* pMipBase = VoidPtrInc(pIn->pMappedSurface,
|
||||
(pIn->singleSubres ? 0 : mipInfo[pCurRegion->mipId].offset));
|
||||
|
||||
const size_t lineSizeBytes = (localIn.bpp >> 3) * pCurRegion->copyDims.width;
|
||||
const size_t lineImgPitchBytes = (localIn.bpp >> 3) * mipInfo[pCurRegion->mipId].pitch;
|
||||
|
||||
for (UINT_32 sliceIdx = 0; sliceIdx < pCurRegion->copyDims.depth; sliceIdx++)
|
||||
{
|
||||
UINT_32 sliceCoord = sliceIdx + pCurRegion->slice;
|
||||
size_t imgOffsetInMip = (localOut.sliceSize * sliceCoord) +
|
||||
(lineImgPitchBytes * pCurRegion->y) +
|
||||
(pCurRegion->x * (pIn->bpp >> 3));
|
||||
size_t memOffset = sliceIdx * pCurRegion->memSlicePitch;
|
||||
|
||||
for (UINT_32 yIdx = 0; yIdx < pCurRegion->copyDims.height; yIdx++)
|
||||
{
|
||||
if (surfaceIsDst)
|
||||
{
|
||||
memcpy(VoidPtrInc(pMipBase, imgOffsetInMip),
|
||||
VoidPtrInc(pCurRegion->pMem, memOffset),
|
||||
lineSizeBytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(VoidPtrInc(pCurRegion->pMem, memOffset),
|
||||
VoidPtrInc(pMipBase, imgOffsetInMip),
|
||||
lineSizeBytes);
|
||||
}
|
||||
|
||||
imgOffsetInMip += lineImgPitchBytes;
|
||||
memOffset += pCurRegion->memRowPitch;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return returnCode;
|
||||
}
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* Lib::CopyMemToSurface
|
||||
*
|
||||
* @brief
|
||||
* Interface function stub of Addr3CopyMemToSurface.
|
||||
*
|
||||
* @return
|
||||
* ADDR_E_RETURNCODE
|
||||
************************************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE Lib::CopyMemToSurface(
|
||||
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount) const
|
||||
{
|
||||
ADDR_E_RETURNCODE returnCode = ADDR_OK;
|
||||
|
||||
if ((regionCount == 0) || (pRegions == NULL))
|
||||
{
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
}
|
||||
else if (GetFillSizeFieldsFlags() == TRUE)
|
||||
{
|
||||
if (pIn->size != sizeof(ADDR3_COPY_MEMSURFACE_INPUT))
|
||||
{
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
}
|
||||
else
|
||||
{
|
||||
UINT_32 baseSlice = pRegions[0].slice;
|
||||
UINT_32 baseMip = pRegions[0].mipId;
|
||||
BOOL_32 singleSubres = pIn->singleSubres;
|
||||
for (UINT_32 i = 0; i < regionCount; i++)
|
||||
{
|
||||
if (pRegions[i].size != sizeof(ADDR3_COPY_MEMSURFACE_REGION))
|
||||
{
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
break;
|
||||
}
|
||||
if (singleSubres &&
|
||||
((pRegions[i].copyDims.depth != 1) ||
|
||||
(pRegions[i].slice != baseSlice) ||
|
||||
(pRegions[i].mipId != baseMip)))
|
||||
{
|
||||
// Copy will cover multiple/interleaved subresources, a
|
||||
// mapped pointer to a single subres cannot be valid.
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
if (IsLinear(pIn->swizzleMode))
|
||||
{
|
||||
returnCode = CopyLinearSurface(pIn, pRegions, regionCount, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
returnCode = HwlCopyMemToSurface(pIn, pRegions, regionCount);
|
||||
}
|
||||
}
|
||||
|
||||
return returnCode;
|
||||
}
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* Lib::CopySurfaceToMem
|
||||
*
|
||||
* @brief
|
||||
* Interface function stub of Addr3CopySurfaceToMem.
|
||||
*
|
||||
* @return
|
||||
* ADDR_E_RETURNCODE
|
||||
************************************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE Lib::CopySurfaceToMem(
|
||||
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount) const
|
||||
{
|
||||
ADDR_E_RETURNCODE returnCode = ADDR_OK;
|
||||
|
||||
if (regionCount == 0)
|
||||
{
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
}
|
||||
else if (GetFillSizeFieldsFlags() == TRUE)
|
||||
{
|
||||
if (pIn->size != sizeof(ADDR3_COPY_MEMSURFACE_INPUT))
|
||||
{
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
}
|
||||
else
|
||||
{
|
||||
UINT_32 baseSlice = pRegions[0].slice;
|
||||
UINT_32 baseMip = pRegions[0].mipId;
|
||||
BOOL_32 singleSubres = pIn->singleSubres;
|
||||
for (UINT_32 i = 0; i < regionCount; i++)
|
||||
{
|
||||
if (pRegions[i].size != sizeof(ADDR3_COPY_MEMSURFACE_REGION))
|
||||
{
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
break;
|
||||
}
|
||||
if (singleSubres &&
|
||||
((pRegions[i].copyDims.depth != 1) ||
|
||||
(pRegions[i].slice != baseSlice) ||
|
||||
(pRegions[i].mipId != baseMip)))
|
||||
{
|
||||
// Copy will cover multiple/interleaved subresources, a
|
||||
// mapped pointer to a single subres cannot be valid.
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
if (IsLinear(pIn->swizzleMode))
|
||||
{
|
||||
returnCode = CopyLinearSurface(pIn, pRegions, regionCount, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
returnCode = HwlCopySurfaceToMem(pIn, pRegions, regionCount);
|
||||
}
|
||||
}
|
||||
|
||||
return returnCode;
|
||||
}
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* Lib::ComputeSurfaceAddrFromCoord
|
||||
|
|
@ -776,14 +1017,21 @@ ADDR_E_RETURNCODE Lib::ApplyCustomizedPitchHeight(
|
|||
|
||||
const UINT_32 elementBytes = pIn->bpp >> 3;
|
||||
|
||||
// Normal pitch of image data
|
||||
const UINT_32 pitchAlignmentBytes = 1 << GetBlockSizeLog2(pIn->swizzleMode, TRUE);
|
||||
const UINT_32 pitchAlignmentElements = pitchAlignmentBytes / elementBytes;
|
||||
pOut->pitch = PowTwoAlign(pIn->width, pitchAlignmentElements);
|
||||
UINT_32 pitchAlignmentElements = pOut->blockExtent.width;
|
||||
UINT_32 pitchSliceAlignmentElements = pOut->blockExtent.width;
|
||||
|
||||
// Pitch of image data used for slice sizing (same except for linear images)
|
||||
const UINT_32 pitchSliceAlignmentBytes = 1 << GetBlockSizeLog2(pIn->swizzleMode, CanTrimLinearPadding(pIn));
|
||||
const UINT_32 pitchSliceAlignmentElements = pitchSliceAlignmentBytes / elementBytes;
|
||||
if (IsLinear(pIn->swizzleMode))
|
||||
{
|
||||
// Normal pitch of image data
|
||||
const UINT_32 pitchAlignmentBytes = 1 << GetBlockSizeLog2(pIn->swizzleMode, TRUE);
|
||||
pitchAlignmentElements = pitchAlignmentBytes / elementBytes;
|
||||
|
||||
// Pitch of image data used for slice sizing
|
||||
const UINT_32 pitchSliceAlignmentBytes = 1 << GetBlockSizeLog2(pIn->swizzleMode, CanTrimLinearPadding(pIn));
|
||||
pitchSliceAlignmentElements = pitchSliceAlignmentBytes / elementBytes;
|
||||
}
|
||||
|
||||
pOut->pitch = PowTwoAlign(pIn->width, pitchAlignmentElements);
|
||||
pOut->pitchForSlice = PowTwoAlign(pIn->width, pitchSliceAlignmentElements);
|
||||
|
||||
UINT_32 heightAlign = pOut->blockExtent.height;
|
||||
|
|
@ -854,6 +1102,7 @@ ADDR_E_RETURNCODE Lib::ApplyCustomizedPitchHeight(
|
|||
return returnCode;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* Lib::ComputeQbStereoInfo
|
||||
|
|
@ -889,5 +1138,34 @@ VOID Lib::ComputeQbStereoInfo(
|
|||
pOut->sliceSize <<= 1;
|
||||
}
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* Lib::ComputeSurfaceInfoSanityCheck
|
||||
*
|
||||
* @brief
|
||||
* Internal function to do basic sanity check before compute surface info
|
||||
*
|
||||
* @return
|
||||
* ADDR_E_RETURNCODE
|
||||
************************************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE Lib::ComputeSurfaceInfoSanityCheck(
|
||||
const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
|
||||
) const
|
||||
{
|
||||
ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT localIn = {};
|
||||
localIn.size = sizeof(ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT);
|
||||
localIn.flags = pIn->flags;
|
||||
localIn.resourceType = pIn->resourceType;
|
||||
localIn.bpp = pIn->bpp;
|
||||
localIn.width = pIn->width;
|
||||
localIn.height = pIn->height;
|
||||
localIn.numSlices = pIn->numSlices;
|
||||
localIn.numMipLevels = pIn->numMipLevels;
|
||||
localIn.numSamples = pIn->numSamples;
|
||||
|
||||
return HwlValidateNonSwModeParams(&localIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
|
||||
}
|
||||
|
||||
} // V3
|
||||
} // Addr
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -28,6 +28,7 @@ constexpr UINT_32 Size256 = 256u;
|
|||
constexpr UINT_32 Size4K = 4 * 1024;
|
||||
constexpr UINT_32 Size64K = 64 * 1024;
|
||||
constexpr UINT_32 Size256K = 256 * 1024;
|
||||
constexpr UINT_32 Addr3MaxMipLevels = 16; // Max Mip Levels across all addr3 chips
|
||||
|
||||
struct ADDR3_COORD
|
||||
{
|
||||
|
|
@ -46,23 +47,6 @@ struct ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT
|
|||
void* pvAddrParams;
|
||||
};
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* @brief Bit setting for swizzle pattern
|
||||
************************************************************************************************************************
|
||||
*/
|
||||
union ADDR_BIT_SETTING
|
||||
{
|
||||
struct
|
||||
{
|
||||
UINT_16 x;
|
||||
UINT_16 y;
|
||||
UINT_16 z;
|
||||
UINT_16 s;
|
||||
};
|
||||
UINT_64 value;
|
||||
};
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* @brief Flags for SwizzleModeTable
|
||||
|
|
@ -108,53 +92,6 @@ struct ADDR_SW_PATINFO
|
|||
UINT_8 nibble4Idx;
|
||||
};
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* InitBit
|
||||
*
|
||||
* @brief
|
||||
* Initialize bit setting value via a return value
|
||||
************************************************************************************************************************
|
||||
*/
|
||||
#define InitBit(c, index) (1ull << ((c << 4) + index))
|
||||
|
||||
const UINT_64 X0 = InitBit(0, 0);
|
||||
const UINT_64 X1 = InitBit(0, 1);
|
||||
const UINT_64 X2 = InitBit(0, 2);
|
||||
const UINT_64 X3 = InitBit(0, 3);
|
||||
const UINT_64 X4 = InitBit(0, 4);
|
||||
const UINT_64 X5 = InitBit(0, 5);
|
||||
const UINT_64 X6 = InitBit(0, 6);
|
||||
const UINT_64 X7 = InitBit(0, 7);
|
||||
const UINT_64 X8 = InitBit(0, 8);
|
||||
|
||||
const UINT_64 Y0 = InitBit(1, 0);
|
||||
const UINT_64 Y1 = InitBit(1, 1);
|
||||
const UINT_64 Y2 = InitBit(1, 2);
|
||||
const UINT_64 Y3 = InitBit(1, 3);
|
||||
const UINT_64 Y4 = InitBit(1, 4);
|
||||
const UINT_64 Y5 = InitBit(1, 5);
|
||||
const UINT_64 Y6 = InitBit(1, 6);
|
||||
const UINT_64 Y7 = InitBit(1, 7);
|
||||
const UINT_64 Y8 = InitBit(1, 8);
|
||||
|
||||
const UINT_64 Z0 = InitBit(2, 0);
|
||||
const UINT_64 Z1 = InitBit(2, 1);
|
||||
const UINT_64 Z2 = InitBit(2, 2);
|
||||
const UINT_64 Z3 = InitBit(2, 3);
|
||||
const UINT_64 Z4 = InitBit(2, 4);
|
||||
const UINT_64 Z5 = InitBit(2, 5);
|
||||
|
||||
const UINT_64 S0 = InitBit(3, 0);
|
||||
const UINT_64 S1 = InitBit(3, 1);
|
||||
const UINT_64 S2 = InitBit(3, 2);
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* @brief Bit setting for swizzle pattern
|
||||
************************************************************************************************************************
|
||||
*/
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* @brief This class contains asic independent address lib functionalities
|
||||
|
|
@ -168,6 +105,11 @@ public:
|
|||
static Lib* GetLib(
|
||||
ADDR_HANDLE hLib);
|
||||
|
||||
virtual UINT_32 GetInterfaceVersion() const
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
//
|
||||
// Interface stubs
|
||||
//
|
||||
|
|
@ -185,6 +127,16 @@ public:
|
|||
const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
|
||||
ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
|
||||
|
||||
ADDR_E_RETURNCODE CopyMemToSurface(
|
||||
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount) const;
|
||||
|
||||
ADDR_E_RETURNCODE CopySurfaceToMem(
|
||||
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount) const;
|
||||
|
||||
// Misc
|
||||
ADDR_E_RETURNCODE ComputePipeBankXor(
|
||||
const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn,
|
||||
|
|
@ -212,18 +164,16 @@ protected:
|
|||
SwizzleModeFlags m_swizzleModeTable[ADDR3_MAX_TYPE]; ///< Swizzle mode table
|
||||
|
||||
// Number of unique MSAA sample rates (1/2/4/8)
|
||||
static const UINT_32 MaxMsaaRateLog2 = 4;
|
||||
// Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp)
|
||||
static const UINT_32 MaxElementBytesLog2 = 5;
|
||||
static const UINT_32 MaxNumMsaaRates = 4;
|
||||
|
||||
// Number of equation entries in the table
|
||||
UINT_32 m_numEquations;
|
||||
|
||||
// Swizzle equation lookup table according to swizzle mode, MSAA sample rate and bpp. This does not include linear.
|
||||
UINT_32 m_equationLookupTable[ADDR3_MAX_TYPE - 1][MaxMsaaRateLog2][MaxElementBytesLog2];
|
||||
UINT_32 m_equationLookupTable[ADDR3_MAX_TYPE - 1][MaxNumMsaaRates][MaxElementBytesLog2];
|
||||
|
||||
// Block dimension lookup table according to swizzle mode, MSAA sample rate and bpp. This includes linear.
|
||||
ADDR_EXTENT3D m_blockDimensionTable[ADDR3_MAX_TYPE][MaxMsaaRateLog2][MaxElementBytesLog2];
|
||||
ADDR_EXTENT3D m_blockDimensionTable[ADDR3_MAX_TYPE][MaxNumMsaaRates][MaxElementBytesLog2];
|
||||
|
||||
virtual ADDR_E_RETURNCODE HwlComputeStereoInfo(
|
||||
const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn,
|
||||
|
|
@ -333,7 +283,7 @@ protected:
|
|||
|
||||
// The max alignment is tied to the swizzle mode and since the largest swizzle mode is 256kb, so the maximal
|
||||
// alignment is also 256kb.
|
||||
virtual UINT_32 HwlComputeMaxBaseAlignments() const { return Size256K; }
|
||||
virtual UINT_32 HwlComputeMaxBaseAlignments() const { return Size256K; }
|
||||
|
||||
virtual ADDR_E_RETURNCODE HwlGetPossibleSwizzleModes(
|
||||
const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT* pIn,
|
||||
|
|
@ -357,6 +307,24 @@ protected:
|
|||
return ADDR_NOTSUPPORTED;
|
||||
}
|
||||
|
||||
virtual ADDR_E_RETURNCODE HwlCopyMemToSurface(
|
||||
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount) const
|
||||
{
|
||||
ADDR_NOT_IMPLEMENTED();
|
||||
return ADDR_NOTSUPPORTED;
|
||||
}
|
||||
|
||||
virtual ADDR_E_RETURNCODE HwlCopySurfaceToMem(
|
||||
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount) const
|
||||
{
|
||||
ADDR_NOT_IMPLEMENTED();
|
||||
return ADDR_NOTSUPPORTED;
|
||||
}
|
||||
|
||||
virtual ADDR_E_RETURNCODE HwlComputePipeBankXor(
|
||||
const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn,
|
||||
ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
|
||||
|
|
@ -373,6 +341,12 @@ protected:
|
|||
const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
|
||||
const ADDR_EXTENT3D& blockDims) const;
|
||||
|
||||
ADDR_E_RETURNCODE CopyLinearSurface(
|
||||
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount,
|
||||
bool surfaceIsDst) const;
|
||||
|
||||
ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordLinear(
|
||||
const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
|
||||
ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
|
||||
|
|
@ -457,6 +431,8 @@ protected:
|
|||
|
||||
virtual BOOL_32 HwlValidateNonSwModeParams(const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT* pIn) const = 0;
|
||||
|
||||
ADDR_E_RETURNCODE ComputeSurfaceInfoSanityCheck(const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
|
||||
|
||||
private:
|
||||
// Disallow the copy constructor
|
||||
Lib(const Lib& a);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
|
|||
441
src/amd/addrlib/src/core/addrswizzler.cpp
Normal file
441
src/amd/addrlib/src/core/addrswizzler.cpp
Normal file
|
|
@ -0,0 +1,441 @@
|
|||
|
||||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* @file addrswizzler.cpp
|
||||
* @brief Contains code for efficient CPU swizzling.
|
||||
****************************************************************************************************
|
||||
*/
|
||||
|
||||
#include "addrswizzler.h"
|
||||
|
||||
namespace Addr
|
||||
{
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* LutAddresser::LutAddresser
|
||||
*
|
||||
* @brief
|
||||
* Constructor for the LutAddresser class.
|
||||
****************************************************************************************************
|
||||
*/
|
||||
LutAddresser::LutAddresser()
|
||||
:
|
||||
m_pXLut(&m_lutData[0]),
|
||||
m_pYLut(&m_lutData[0]),
|
||||
m_pZLut(&m_lutData[0]),
|
||||
m_pSLut(&m_lutData[0]),
|
||||
m_xLutMask(0),
|
||||
m_yLutMask(0),
|
||||
m_zLutMask(0),
|
||||
m_sLutMask(0),
|
||||
m_blockBits(0),
|
||||
m_blockSize(),
|
||||
m_bpeLog2(0),
|
||||
m_bit(),
|
||||
m_lutData()
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* LutAddresser::Init
|
||||
*
|
||||
* @brief
|
||||
* Calculates general properties about the swizzle
|
||||
****************************************************************************************************
|
||||
*/
|
||||
void LutAddresser::Init(
|
||||
const ADDR_BIT_SETTING* pEq,
|
||||
UINT_32 eqSize,
|
||||
ADDR_EXTENT3D blockSize,
|
||||
UINT_8 blockBits)
|
||||
{
|
||||
ADDR_ASSERT(eqSize <= ADDR_MAX_EQUATION_BIT);
|
||||
memcpy(&m_bit[0], pEq, sizeof(ADDR_BIT_SETTING) * eqSize);
|
||||
m_blockSize = blockSize;
|
||||
m_blockBits = blockBits;
|
||||
|
||||
InitSwizzleProps();
|
||||
InitLuts();
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* LutAddresser::InitSwizzleProps
|
||||
*
|
||||
* @brief
|
||||
* Calculates general properties about the swizzle
|
||||
****************************************************************************************************
|
||||
*/
|
||||
void LutAddresser::InitSwizzleProps()
|
||||
{
|
||||
// Calculate BPE from the swizzle. This can be derived from the number of invalid low bits.
|
||||
m_bpeLog2 = 0;
|
||||
for (UINT_32 i = 0; i < MaxElementBytesLog2; i++)
|
||||
{
|
||||
if (m_bit[i].value != 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
m_bpeLog2++;
|
||||
}
|
||||
|
||||
// Generate a mask/size for each channel's LUT. This may be larger than the block size.
|
||||
// If a given 'source' bit (eg. 'x0') is used for any part of the equation, fill that in the mask.
|
||||
for (UINT_32 i = 0; i < ADDR_MAX_EQUATION_BIT; i++)
|
||||
{
|
||||
m_xLutMask |= m_bit[i].x;
|
||||
m_yLutMask |= m_bit[i].y;
|
||||
m_zLutMask |= m_bit[i].z;
|
||||
m_sLutMask |= m_bit[i].s;
|
||||
}
|
||||
|
||||
// An expandX of 1 is a no-op
|
||||
m_maxExpandX = 1;
|
||||
if (m_sLutMask == 0)
|
||||
{
|
||||
// Calculate expandX from the swizzle. This can be derived from the number of consecutive,
|
||||
// increasing low x bits
|
||||
for (UINT_32 i = 0; i < 3; i++)
|
||||
{
|
||||
const auto& curBit = m_bit[m_bpeLog2 + i];
|
||||
ADDR_ASSERT(curBit.value != 0);
|
||||
if ((IsPow2(curBit.value) == false) || // More than one bit contributes
|
||||
(curBit.x == 0) || // Bit is from Y/Z/S channel
|
||||
(curBit.x != m_maxExpandX)) // X bits are out of order
|
||||
{
|
||||
break;
|
||||
}
|
||||
m_maxExpandX *= 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* LutAddresser::InitLuts
|
||||
*
|
||||
* @brief
|
||||
* Creates lookup tables for each channel.
|
||||
****************************************************************************************************
|
||||
*/
|
||||
void LutAddresser::InitLuts()
|
||||
{
|
||||
UINT_32 curOffset = 0;
|
||||
m_pXLut = &m_lutData[0];
|
||||
for (UINT_32 x = 0; x < (m_xLutMask + 1); x++)
|
||||
{
|
||||
m_pXLut[x] = EvalEquation(x, 0, 0, 0);
|
||||
}
|
||||
curOffset += m_xLutMask + 1;
|
||||
ADDR_ASSERT(curOffset <= MaxLutSize);
|
||||
|
||||
if (m_yLutMask != 0)
|
||||
{
|
||||
m_pYLut = &m_lutData[curOffset];
|
||||
for (UINT_32 y = 0; y < (m_yLutMask + 1); y++)
|
||||
{
|
||||
m_pYLut[y] = EvalEquation(0, y, 0, 0);
|
||||
}
|
||||
curOffset += m_yLutMask + 1;
|
||||
ADDR_ASSERT(curOffset <= MaxLutSize);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_pYLut = &m_lutData[0];
|
||||
ADDR_ASSERT(m_pYLut[0] == 0);
|
||||
}
|
||||
|
||||
if (m_zLutMask != 0)
|
||||
{
|
||||
m_pZLut = &m_lutData[curOffset];
|
||||
for (UINT_32 z = 0; z < (m_zLutMask + 1); z++)
|
||||
{
|
||||
m_pZLut[z] = EvalEquation(0, 0, z, 0);
|
||||
}
|
||||
curOffset += m_zLutMask + 1;
|
||||
ADDR_ASSERT(curOffset <= MaxLutSize);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_pZLut = &m_lutData[0];
|
||||
ADDR_ASSERT(m_pZLut[0] == 0);
|
||||
}
|
||||
|
||||
if (m_sLutMask != 0)
|
||||
{
|
||||
m_pSLut = &m_lutData[curOffset];
|
||||
for (UINT_32 s = 0; s < (m_sLutMask + 1); s++)
|
||||
{
|
||||
m_pSLut[s] = EvalEquation(0, 0, 0, s);
|
||||
}
|
||||
curOffset += m_sLutMask + 1;
|
||||
ADDR_ASSERT(curOffset <= MaxLutSize);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_pSLut = &m_lutData[0];
|
||||
ADDR_ASSERT(m_pSLut[0] == 0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* LutAddresser::EvalEquation
|
||||
*
|
||||
* @brief
|
||||
* Evaluates the equation at a given coordinate manually.
|
||||
****************************************************************************************************
|
||||
*/
|
||||
UINT_32 LutAddresser::EvalEquation(
|
||||
UINT_32 x,
|
||||
UINT_32 y,
|
||||
UINT_32 z,
|
||||
UINT_32 s)
|
||||
{
|
||||
UINT_32 out = 0;
|
||||
|
||||
for (UINT_32 i = 0; i < ADDR_MAX_EQUATION_BIT; i++)
|
||||
{
|
||||
if (m_bit[i].value == 0)
|
||||
{
|
||||
if (out != 0)
|
||||
{
|
||||
// Invalid bits at the top of the equation
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (x != 0)
|
||||
{
|
||||
UINT_32 xSrcs = m_bit[i].x;
|
||||
while (xSrcs != 0)
|
||||
{
|
||||
UINT_32 xIdx = BitScanForward(xSrcs);
|
||||
out ^= (((x >> xIdx) & 1) << i);
|
||||
xSrcs = UnsetLeastBit(xSrcs);
|
||||
}
|
||||
}
|
||||
|
||||
if (y != 0)
|
||||
{
|
||||
UINT_32 ySrcs = m_bit[i].y;
|
||||
while (ySrcs != 0)
|
||||
{
|
||||
UINT_32 yIdx = BitScanForward(ySrcs);
|
||||
out ^= (((y >> yIdx) & 1) << i);
|
||||
ySrcs = UnsetLeastBit(ySrcs);
|
||||
}
|
||||
}
|
||||
|
||||
if (z != 0)
|
||||
{
|
||||
UINT_32 zSrcs = m_bit[i].z;
|
||||
while (zSrcs != 0)
|
||||
{
|
||||
UINT_32 zIdx = BitScanForward(zSrcs);
|
||||
out ^= (((z >> zIdx) & 1) << i);
|
||||
zSrcs = UnsetLeastBit(zSrcs);
|
||||
}
|
||||
}
|
||||
|
||||
if (s != 0)
|
||||
{
|
||||
UINT_32 sSrcs = m_bit[i].s;
|
||||
while (sSrcs != 0)
|
||||
{
|
||||
UINT_32 sIdx = BitScanForward(sSrcs);
|
||||
out ^= (((s >> sIdx) & 1) << i);
|
||||
sSrcs = UnsetLeastBit(sSrcs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* Copy2DSliceUnaligned
|
||||
*
|
||||
* @brief
|
||||
* Copies an arbitrary 2D pixel region to or from a surface.
|
||||
****************************************************************************************************
|
||||
*/
|
||||
template <int BPELog2, int ExpandX, bool ImgIsDest>
|
||||
void Copy2DSliceUnaligned(
|
||||
void* pImgBlockSliceStart, // Block corresponding to beginning of slice
|
||||
void* pBuf, // Pointer to data starting from the copy origin.
|
||||
size_t bufStrideY, // Stride of each row in pBuf
|
||||
UINT_32 imageBlocksY, // Width of the image slice, in blocks.
|
||||
ADDR_COORD2D origin, // Absolute origin, in elements
|
||||
ADDR_EXTENT2D extent, // Size to copy, in elements
|
||||
UINT_32 sliceXor, // Includes pipeBankXor and z XOR
|
||||
const LutAddresser& addresser)
|
||||
{
|
||||
UINT_32 xStart = origin.x;
|
||||
UINT_32 xEnd = origin.x + extent.width;
|
||||
|
||||
constexpr UINT_32 PixBytes = (1 << BPELog2);
|
||||
|
||||
// Apply a negative offset now so later code can do eg. pBuf[x] instead of pBuf[x - origin.x]
|
||||
pBuf = VoidPtrDec(pBuf, xStart * PixBytes);
|
||||
|
||||
// Do things one row at a time for unaligned regions.
|
||||
for (UINT_32 y = origin.y; y < (origin.y + extent.height); y++)
|
||||
{
|
||||
UINT_32 yBlk = (y >> addresser.GetBlockYBits()) * imageBlocksY;
|
||||
UINT_32 rowXor = sliceXor ^ addresser.GetAddressY(y);
|
||||
|
||||
UINT_32 x = xStart;
|
||||
|
||||
// Most swizzles pack 2-4 pixels horizontally. Take advantage of this even in non-microblock-aligned
|
||||
// regions to commonly do 2-4x less work. This is still way less good than copying by whole microblocks though.
|
||||
if (ExpandX > 1)
|
||||
{
|
||||
// Unaligned left edge
|
||||
for (; x < Min(xEnd, PowTwoAlign(xStart, ExpandX)); x++)
|
||||
{
|
||||
UINT_32 blk = (yBlk + (x >> addresser.GetBlockXBits()));
|
||||
void* pImgBlock = VoidPtrInc(pImgBlockSliceStart, blk << addresser.GetBlockBits());
|
||||
void* pPix = VoidPtrInc(pImgBlock, rowXor ^ addresser.GetAddressX(x));
|
||||
if (ImgIsDest)
|
||||
{
|
||||
memcpy(pPix, VoidPtrInc(pBuf, x * PixBytes), PixBytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(VoidPtrInc(pBuf, x * PixBytes), pPix, PixBytes);
|
||||
}
|
||||
}
|
||||
// Aligned middle
|
||||
for (; x < PowTwoAlignDown(xEnd, ExpandX); x += ExpandX)
|
||||
{
|
||||
UINT_32 blk = (yBlk + (x >> addresser.GetBlockXBits()));
|
||||
void* pImgBlock = VoidPtrInc(pImgBlockSliceStart, blk << addresser.GetBlockBits());
|
||||
void* pPix = VoidPtrInc(pImgBlock, rowXor ^ addresser.GetAddressX(x));
|
||||
if (ImgIsDest)
|
||||
{
|
||||
memcpy(pPix, VoidPtrInc(pBuf, x * PixBytes), PixBytes * ExpandX);
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(VoidPtrInc(pBuf, x * PixBytes), pPix, PixBytes * ExpandX);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Unaligned end (or the whole thing when ExpandX == 1)
|
||||
for (; x < xEnd; x++)
|
||||
{
|
||||
// Get the index of the block within the slice
|
||||
UINT_32 blk = (yBlk + (x >> addresser.GetBlockXBits()));
|
||||
// Apply that index to get the base address of the current block.
|
||||
void* pImgBlock = VoidPtrInc(pImgBlockSliceStart, blk << addresser.GetBlockBits());
|
||||
// Grab the x-xor and XOR it all together, adding to get the final address
|
||||
void* pPix = VoidPtrInc(pImgBlock, rowXor ^ addresser.GetAddressX(x));
|
||||
if (ImgIsDest)
|
||||
{
|
||||
memcpy(pPix, VoidPtrInc(pBuf, x * PixBytes), PixBytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(VoidPtrInc(pBuf, x * PixBytes), pPix, PixBytes);
|
||||
}
|
||||
}
|
||||
|
||||
pBuf = VoidPtrInc(pBuf, bufStrideY);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* LutAddresser::GetCopyMemImgFunc
|
||||
*
|
||||
* @brief
|
||||
* Determines and returns which copy function to use for copying to images
|
||||
****************************************************************************************************
|
||||
*/
|
||||
UnalignedCopyMemImgFunc LutAddresser::GetCopyMemImgFunc() const
|
||||
{
|
||||
// While these are all the same function, the codegen gets really bad if the size of each pixel
|
||||
// is not known at compile time. Hence, templates.
|
||||
const UnalignedCopyMemImgFunc Funcs[MaxElementBytesLog2][3] =
|
||||
{
|
||||
// ExpandX = 1, 2, 4
|
||||
{ Copy2DSliceUnaligned<0, 1, true>, Copy2DSliceUnaligned<0, 2, true>, Copy2DSliceUnaligned<0, 4, true> }, // 1BPE
|
||||
{ Copy2DSliceUnaligned<1, 1, true>, Copy2DSliceUnaligned<1, 2, true>, Copy2DSliceUnaligned<1, 4, true> }, // 2BPE
|
||||
{ Copy2DSliceUnaligned<2, 1, true>, Copy2DSliceUnaligned<2, 2, true>, Copy2DSliceUnaligned<2, 4, true> }, // 4BPE
|
||||
{ Copy2DSliceUnaligned<3, 1, true>, Copy2DSliceUnaligned<3, 2, true>, Copy2DSliceUnaligned<3, 4, true> }, // 8BPE
|
||||
{ Copy2DSliceUnaligned<4, 1, true>, Copy2DSliceUnaligned<4, 2, true>, Copy2DSliceUnaligned<4, 4, true> }, // 16BPE
|
||||
};
|
||||
|
||||
UnalignedCopyMemImgFunc pfnRet = nullptr;
|
||||
ADDR_ASSERT(m_bpeLog2 < MaxElementBytesLog2);
|
||||
if (m_maxExpandX >= 4)
|
||||
{
|
||||
pfnRet = Funcs[m_bpeLog2][2];
|
||||
}
|
||||
else if (m_maxExpandX >= 2)
|
||||
{
|
||||
pfnRet = Funcs[m_bpeLog2][1];
|
||||
}
|
||||
else
|
||||
{
|
||||
pfnRet = Funcs[m_bpeLog2][0];
|
||||
}
|
||||
return pfnRet;
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* LutAddresser::GetCopyImgMemFunc
|
||||
*
|
||||
* @brief
|
||||
* Determines and returns which copy function to use for copying from images
|
||||
****************************************************************************************************
|
||||
*/
|
||||
UnalignedCopyMemImgFunc LutAddresser::GetCopyImgMemFunc() const
|
||||
{
|
||||
// While these are all the same function, the codegen gets really bad if the size of each pixel
|
||||
// is not known at compile time. Hence, templates.
|
||||
const UnalignedCopyMemImgFunc Funcs[MaxElementBytesLog2][3] =
|
||||
{
|
||||
// ExpandX = 1, 2, 4
|
||||
{ Copy2DSliceUnaligned<0, 1, false>, Copy2DSliceUnaligned<0, 2, false>, Copy2DSliceUnaligned<0, 4, false> }, // 1BPE
|
||||
{ Copy2DSliceUnaligned<1, 1, false>, Copy2DSliceUnaligned<1, 2, false>, Copy2DSliceUnaligned<1, 4, false> }, // 2BPE
|
||||
{ Copy2DSliceUnaligned<2, 1, false>, Copy2DSliceUnaligned<2, 2, false>, Copy2DSliceUnaligned<2, 4, false> }, // 4BPE
|
||||
{ Copy2DSliceUnaligned<3, 1, false>, Copy2DSliceUnaligned<3, 2, false>, Copy2DSliceUnaligned<3, 4, false> }, // 8BPE
|
||||
{ Copy2DSliceUnaligned<4, 1, false>, Copy2DSliceUnaligned<4, 2, false>, Copy2DSliceUnaligned<4, 4, false> }, // 16BPE
|
||||
};
|
||||
|
||||
UnalignedCopyMemImgFunc pfnRet = nullptr;
|
||||
ADDR_ASSERT(m_bpeLog2 < MaxElementBytesLog2);
|
||||
if (m_maxExpandX >= 4)
|
||||
{
|
||||
pfnRet = Funcs[m_bpeLog2][2];
|
||||
}
|
||||
else if (m_maxExpandX >= 2)
|
||||
{
|
||||
pfnRet = Funcs[m_bpeLog2][1];
|
||||
}
|
||||
else
|
||||
{
|
||||
pfnRet = Funcs[m_bpeLog2][0];
|
||||
}
|
||||
return pfnRet;
|
||||
}
|
||||
|
||||
}
|
||||
119
src/amd/addrlib/src/core/addrswizzler.h
Normal file
119
src/amd/addrlib/src/core/addrswizzler.h
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* @file addrswizzler.cpp
|
||||
* @brief Contains code for efficient CPU swizzling.
|
||||
****************************************************************************************************
|
||||
*/
|
||||
#ifndef __ADDR_SWIZZLER_H__
|
||||
#define __ADDR_SWIZZLER_H__
|
||||
|
||||
#include "addrlib.h"
|
||||
#include "addrcommon.h"
|
||||
|
||||
namespace Addr
|
||||
{
|
||||
|
||||
// Forward decl
|
||||
class LutAddresser;
|
||||
|
||||
typedef void (*UnalignedCopyMemImgFunc)(
|
||||
void* pImgBlockSliceStart, // Block corresponding to beginning of slice
|
||||
void* pBuf, // Pointer to data starting from the copy origin.
|
||||
size_t bufStrideY, // Stride of each row in pBuf
|
||||
UINT_32 imageBlocksY, // Width of the image slice, in blocks.
|
||||
ADDR_COORD2D origin, // Absolute origin, in elements
|
||||
ADDR_EXTENT2D extent, // Size to copy, in elements
|
||||
UINT_32 sliceXor, // Includes pipeBankXor and z XOR
|
||||
const LutAddresser& addresser);
|
||||
|
||||
// This class calculates and holds up to four lookup tables (x/y/z/s) which can be used to cheaply calculate the
|
||||
// position of a pixel within a block at the cost of some precomputation and memory usage.
|
||||
//
|
||||
// This works for all equations and does something like this:
|
||||
// offset = blockAddr ^ XLut[x & xMask] ^ YLut[Y & ymask]...
|
||||
class LutAddresser
|
||||
{
|
||||
public:
|
||||
constexpr static UINT_32 MaxLutSize = 2100; // Sized to fit the largest non-VAR LUT size
|
||||
|
||||
LutAddresser();
|
||||
|
||||
void Init(const ADDR_BIT_SETTING* pEq, UINT_32 eqSize, ADDR_EXTENT3D blockSize, UINT_8 blkBits);
|
||||
|
||||
// Does a full calculation to get the offset within a block. Takes an *absolute* coordinate,
|
||||
// not the coordinate within the block.
|
||||
UINT_32 GetBlockOffset(
|
||||
UINT_32 x,
|
||||
UINT_32 y,
|
||||
UINT_32 z,
|
||||
UINT_32 s = 0,
|
||||
UINT_32 pipeBankXor = 0)
|
||||
{
|
||||
return GetAddressX(x) ^ GetAddressY(y) ^ GetAddressZ(z) ^ GetAddressS(s) ^ pipeBankXor;
|
||||
}
|
||||
|
||||
// Get the block size
|
||||
UINT_32 GetBlockBits() const { return m_blockBits; }
|
||||
UINT_32 GetBlockXBits() const { return Log2(m_blockSize.width); }
|
||||
UINT_32 GetBlockYBits() const { return Log2(m_blockSize.height); }
|
||||
UINT_32 GetBlockZBits() const { return Log2(m_blockSize.depth); }
|
||||
|
||||
// "Fast single channel" functions to get the part that each channel contributes to be XORd together.
|
||||
UINT_32 GetAddressX(UINT_32 x) const { return m_pXLut[x & m_xLutMask];}
|
||||
UINT_32 GetAddressY(UINT_32 y) const { return m_pYLut[y & m_yLutMask];}
|
||||
UINT_32 GetAddressZ(UINT_32 z) const { return m_pZLut[z & m_zLutMask];}
|
||||
UINT_32 GetAddressS(UINT_32 s) const { return m_pSLut[s & m_sLutMask];}
|
||||
|
||||
// Get a function that can copy a single 2D slice of an image with this swizzle.
|
||||
UnalignedCopyMemImgFunc GetCopyMemImgFunc() const;
|
||||
UnalignedCopyMemImgFunc GetCopyImgMemFunc() const;
|
||||
private:
|
||||
// Calculate general properties of the swizzle equations
|
||||
void InitSwizzleProps();
|
||||
// Fills a LUT for each channel.
|
||||
void InitLuts();
|
||||
// Evaluate coordinate without LUTs
|
||||
UINT_32 EvalEquation(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s);
|
||||
|
||||
// Pointers within m_lutData corresponding to where each LUT starts
|
||||
// m_lutData[0] always has a value of 0 and thus can be considered an empty 1-entry LUT for "don't care" channels
|
||||
UINT_32* m_pXLut;
|
||||
UINT_32* m_pYLut;
|
||||
UINT_32* m_pZLut;
|
||||
UINT_32* m_pSLut;
|
||||
|
||||
// Size of each LUT, minus 1 to form a mask. A mask of 0 is valid for an empty LUT.
|
||||
UINT_32 m_xLutMask;
|
||||
UINT_32 m_yLutMask;
|
||||
UINT_32 m_zLutMask;
|
||||
UINT_32 m_sLutMask;
|
||||
|
||||
// Number of bits in the block (aka Log2(blkSize))
|
||||
UINT_32 m_blockBits;
|
||||
|
||||
// The block size
|
||||
ADDR_EXTENT3D m_blockSize;
|
||||
|
||||
// Number of 'x' bits at the bottom of the equation. Must be a pow2 and at least 1.
|
||||
// This will be used as a simple optimization to batch together operations on adjacent x pixels.
|
||||
UINT_32 m_maxExpandX;
|
||||
|
||||
// BPE for this equation.
|
||||
UINT_32 m_bpeLog2;
|
||||
|
||||
// The full equation
|
||||
ADDR_BIT_SETTING m_bit[ADDR_MAX_EQUATION_BIT];
|
||||
|
||||
// Backing store for the LUT tables.
|
||||
UINT_32 m_lutData[MaxLutSize];
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // __ADDR_SWIZZLER_H__
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -3721,6 +3721,7 @@ const ADDR_SW_PATINFO GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO[] =
|
|||
{ 3, 27, 344, 365, 124, } , // 64 pipes (32 PKRs) 16 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
|
||||
};
|
||||
|
||||
|
||||
const UINT_64 GFX10_SW_PATTERN_NIBBLE01[][8] =
|
||||
{
|
||||
{X0, X1, X2, X3, Y0, Y1, Y2, Y3, }, // 0
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -41,8 +41,7 @@ struct Gfx10ChipSettings
|
|||
UINT_32 supportRbPlus : 1;
|
||||
UINT_32 dsMipmapHtileFix : 1;
|
||||
UINT_32 dccUnsup3DSwDis : 1;
|
||||
UINT_32 : 4;
|
||||
UINT_32 reserved2 : 24;
|
||||
UINT_32 reserved2 : 28;
|
||||
};
|
||||
};
|
||||
|
||||
|
|
@ -142,7 +141,6 @@ const UINT_32 Gfx10Rsrc3dPrtSwModeMask = Gfx10Rsrc2dPrtSwModeMask & ~Gfx10Displa
|
|||
const UINT_32 Gfx10Rsrc3dThin64KBSwModeMask = (1u << ADDR_SW_64KB_Z_X) |
|
||||
(1u << ADDR_SW_64KB_R_X);
|
||||
|
||||
|
||||
const UINT_32 Gfx10Rsrc3dThinSwModeMask = Gfx10Rsrc3dThin64KBSwModeMask |
|
||||
Gfx10BlkVarSwModeMask;
|
||||
|
||||
|
|
@ -155,8 +153,7 @@ const UINT_32 Gfx10Rsrc3dThick4KBSwModeMask = Gfx10Rsrc3dThickSwModeMask & Gfx10
|
|||
const UINT_32 Gfx10Rsrc3dThick64KBSwModeMask = Gfx10Rsrc3dThickSwModeMask & Gfx10Blk64KBSwModeMask;
|
||||
|
||||
const UINT_32 Gfx10MsaaSwModeMask = (Gfx10ZSwModeMask |
|
||||
Gfx10RenderSwModeMask)
|
||||
;
|
||||
Gfx10RenderSwModeMask);
|
||||
|
||||
const UINT_32 Dcn20NonBpp64SwModeMask = (1u << ADDR_SW_LINEAR) |
|
||||
(1u << ADDR_SW_4KB_S) |
|
||||
|
|
@ -299,6 +296,10 @@ protected:
|
|||
const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
|
||||
ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const;
|
||||
|
||||
virtual ADDR_E_RETURNCODE HwlGetPossibleSwizzleModes(
|
||||
const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
|
||||
ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const;
|
||||
|
||||
virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoSanityCheck(
|
||||
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
|
||||
|
||||
|
|
@ -313,6 +314,16 @@ protected:
|
|||
virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled(
|
||||
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
|
||||
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
|
||||
|
||||
virtual ADDR_E_RETURNCODE HwlCopyMemToSurface(
|
||||
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount) const;
|
||||
|
||||
virtual ADDR_E_RETURNCODE HwlCopySurfaceToMem(
|
||||
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount) const;
|
||||
|
||||
virtual UINT_32 HwlComputeMaxBaseAlignments() const;
|
||||
|
||||
|
|
@ -342,14 +353,6 @@ private:
|
|||
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
|
||||
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
|
||||
|
||||
UINT_32 ComputeOffsetFromSwizzlePattern(
|
||||
const UINT_64* pPattern,
|
||||
UINT_32 numBits,
|
||||
UINT_32 x,
|
||||
UINT_32 y,
|
||||
UINT_32 z,
|
||||
UINT_32 s) const;
|
||||
|
||||
UINT_32 ComputeOffsetFromEquation(
|
||||
const ADDR_EQUATION* pEq,
|
||||
UINT_32 x,
|
||||
|
|
@ -393,7 +396,7 @@ private:
|
|||
*/
|
||||
VOID GetSwizzlePatternFromPatternInfo(
|
||||
const ADDR_SW_PATINFO* pPatInfo,
|
||||
ADDR_BIT_SETTING (&pSwizzle)[20]) const
|
||||
ADDR_BIT_SETTING (&pSwizzle)[ADDR_MAX_EQUATION_BIT]) const
|
||||
{
|
||||
memcpy(pSwizzle,
|
||||
GFX10_SW_PATTERN_NIBBLE01[pPatInfo->nibble01Idx],
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -15,6 +15,7 @@
|
|||
|
||||
#include "gfx11addrlib.h"
|
||||
#include "gfx11_gb_reg.h"
|
||||
#include "addrswizzler.h"
|
||||
|
||||
#include "amdgpu_asic_addr.h"
|
||||
|
||||
|
|
@ -1874,7 +1875,7 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSlicePipeBankXor(
|
|||
|
||||
if (pPatInfo != NULL)
|
||||
{
|
||||
ADDR_BIT_SETTING fullSwizzlePattern[20];
|
||||
ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT];
|
||||
GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
|
||||
|
||||
const UINT_32 pipeBankXorOffset =
|
||||
|
|
@ -2751,7 +2752,7 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting(
|
|||
}
|
||||
|
||||
// Select the biggest allowed block type
|
||||
minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
|
||||
minSizeBlk = Log2(allowedBlockSet.value) + 1;
|
||||
|
||||
if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
|
||||
{
|
||||
|
|
@ -2897,7 +2898,7 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting(
|
|||
// Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
|
||||
// swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
|
||||
// available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
|
||||
pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
|
||||
pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2(allowedSwModeSet.value));
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
@ -3690,6 +3691,245 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceAddrFromCoordTiled(
|
|||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* Gfx11Lib::HwlCopyMemToSurface
|
||||
*
|
||||
* @brief
|
||||
* Copy multiple regions from memory to a non-linear surface.
|
||||
*
|
||||
* @return
|
||||
* Error or success.
|
||||
************************************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE Gfx11Lib::HwlCopyMemToSurface(
|
||||
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount
|
||||
) const
|
||||
{
|
||||
// Copy memory to tiled surface. We will use the 'swizzler' object to dispatch to a version of the copy routine
|
||||
// optimized for a particular micro-swizzle mode if available.
|
||||
ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
|
||||
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
|
||||
ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {{0}};
|
||||
ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
|
||||
ADDR_E_RETURNCODE returnCode = ADDR_OK;
|
||||
|
||||
if (pIn->numSamples > 1)
|
||||
{
|
||||
// TODO: MSAA
|
||||
returnCode = ADDR_NOTIMPLEMENTED;
|
||||
}
|
||||
if (IsBlockVariable(pIn->swizzleMode))
|
||||
{
|
||||
// TODO: larger LUTs for worst-case 256KB swizzle.
|
||||
returnCode = ADDR_NOTIMPLEMENTED;
|
||||
}
|
||||
|
||||
localIn.size = sizeof(localIn);
|
||||
localIn.flags = pIn->flags;
|
||||
localIn.swizzleMode = pIn->swizzleMode;
|
||||
localIn.resourceType = pIn->resourceType;
|
||||
localIn.format = pIn->format;
|
||||
localIn.bpp = pIn->bpp;
|
||||
localIn.width = Max(pIn->unAlignedDims.width, 1u);
|
||||
localIn.height = Max(pIn->unAlignedDims.height, 1u);
|
||||
localIn.numSlices = Max(pIn->unAlignedDims.depth, 1u);
|
||||
localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
|
||||
localIn.numSamples = Max(pIn->numSamples, 1u);
|
||||
|
||||
localOut.size = sizeof(localOut);
|
||||
localOut.pMipInfo = mipInfo;
|
||||
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
returnCode = ComputeSurfaceInfo(&localIn, &localOut);
|
||||
}
|
||||
const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
|
||||
const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
|
||||
pIn->resourceType,
|
||||
Log2(pIn->bpp >> 3),
|
||||
pIn->numSamples);
|
||||
|
||||
ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT] = {};
|
||||
GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
|
||||
ADDR_EXTENT3D blockExtent = {
|
||||
localOut.blockWidth,
|
||||
localOut.blockHeight,
|
||||
localOut.blockSlices
|
||||
};
|
||||
|
||||
LutAddresser addresser = LutAddresser();
|
||||
addresser.Init(fullSwizzlePattern, ADDR_MAX_EQUATION_BIT, blockExtent, blkSizeLog2);
|
||||
UnalignedCopyMemImgFunc pfnCopyUnaligned = addresser.GetCopyMemImgFunc();
|
||||
if (pfnCopyUnaligned == nullptr)
|
||||
{
|
||||
ADDR_ASSERT_ALWAYS();
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
}
|
||||
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
for (UINT_32 regionIdx = 0; regionIdx < regionCount; regionIdx++)
|
||||
{
|
||||
const ADDR2_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
|
||||
const ADDR2_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
|
||||
UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
|
||||
UINT_32 yBlks = pMipInfo->pitch / localOut.blockWidth;
|
||||
|
||||
UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
|
||||
UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
|
||||
UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
|
||||
|
||||
for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
|
||||
{
|
||||
// The copy functions take the base address of the hardware slice, not the logical slice. Those are
|
||||
// not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
|
||||
// for unaligned copies.
|
||||
UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockSlices);
|
||||
UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
|
||||
|
||||
UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
|
||||
UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
|
||||
|
||||
ADDR_COORD2D sliceOrigin = { xStart, yStart };
|
||||
ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
|
||||
|
||||
pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
|
||||
VoidPtrInc(pCurRegion->pMem, memOffset),
|
||||
pCurRegion->memRowPitch,
|
||||
yBlks,
|
||||
sliceOrigin,
|
||||
sliceExtent,
|
||||
sliceXor,
|
||||
addresser);
|
||||
}
|
||||
}
|
||||
}
|
||||
return returnCode;
|
||||
}
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* Gfx11Lib::HwlCopySurfaceToMem
|
||||
*
|
||||
* @brief
|
||||
* Copy multiple regions from a non-linear surface to memory.
|
||||
*
|
||||
* @return
|
||||
* Error or success.
|
||||
************************************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE Gfx11Lib::HwlCopySurfaceToMem(
|
||||
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount
|
||||
) const
|
||||
{
|
||||
// Copy memory to tiled surface. We will use the 'swizzler' object to dispatch to a version of the copy routine
|
||||
// optimized for a particular micro-swizzle mode if available.
|
||||
ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
|
||||
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
|
||||
ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {{0}};
|
||||
ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
|
||||
ADDR_E_RETURNCODE returnCode = ADDR_OK;
|
||||
|
||||
if (pIn->numSamples > 1)
|
||||
{
|
||||
// TODO: MSAA
|
||||
returnCode = ADDR_NOTIMPLEMENTED;
|
||||
}
|
||||
if (IsBlockVariable(pIn->swizzleMode))
|
||||
{
|
||||
// TODO: larger LUTs for worst-case 256KB swizzle.
|
||||
returnCode = ADDR_NOTIMPLEMENTED;
|
||||
}
|
||||
|
||||
localIn.size = sizeof(localIn);
|
||||
localIn.flags = pIn->flags;
|
||||
localIn.swizzleMode = pIn->swizzleMode;
|
||||
localIn.resourceType = pIn->resourceType;
|
||||
localIn.format = pIn->format;
|
||||
localIn.bpp = pIn->bpp;
|
||||
localIn.width = Max(pIn->unAlignedDims.width, 1u);
|
||||
localIn.height = Max(pIn->unAlignedDims.height, 1u);
|
||||
localIn.numSlices = Max(pIn->unAlignedDims.depth, 1u);
|
||||
localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
|
||||
localIn.numSamples = Max(pIn->numSamples, 1u);
|
||||
|
||||
localOut.size = sizeof(localOut);
|
||||
localOut.pMipInfo = mipInfo;
|
||||
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
returnCode = ComputeSurfaceInfo(&localIn, &localOut);
|
||||
}
|
||||
const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
|
||||
const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
|
||||
pIn->resourceType,
|
||||
Log2(pIn->bpp >> 3),
|
||||
pIn->numSamples);
|
||||
|
||||
ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT] = {};
|
||||
GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
|
||||
ADDR_EXTENT3D blockExtent = {
|
||||
localOut.blockWidth,
|
||||
localOut.blockHeight,
|
||||
localOut.blockSlices
|
||||
};
|
||||
|
||||
LutAddresser addresser = LutAddresser();
|
||||
addresser.Init(fullSwizzlePattern, ADDR_MAX_EQUATION_BIT, blockExtent, blkSizeLog2);
|
||||
UnalignedCopyMemImgFunc pfnCopyUnaligned = addresser.GetCopyImgMemFunc();
|
||||
if (pfnCopyUnaligned == nullptr)
|
||||
{
|
||||
ADDR_ASSERT_ALWAYS();
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
}
|
||||
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
for (UINT_32 regionIdx = 0; regionIdx < regionCount; regionIdx++)
|
||||
{
|
||||
const ADDR2_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
|
||||
const ADDR2_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
|
||||
UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
|
||||
UINT_32 yBlks = pMipInfo->pitch / localOut.blockWidth;
|
||||
|
||||
UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
|
||||
UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
|
||||
UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
|
||||
|
||||
for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
|
||||
{
|
||||
// The copy functions take the base address of the hardware slice, not the logical slice. Those are
|
||||
// not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
|
||||
// for unaligned copies.
|
||||
UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockSlices);
|
||||
UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
|
||||
|
||||
UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
|
||||
UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
|
||||
|
||||
ADDR_COORD2D sliceOrigin = { xStart, yStart };
|
||||
ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
|
||||
|
||||
pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
|
||||
VoidPtrInc(pCurRegion->pMem, memOffset),
|
||||
pCurRegion->memRowPitch,
|
||||
yBlks,
|
||||
sliceOrigin,
|
||||
sliceExtent,
|
||||
sliceXor,
|
||||
addresser);
|
||||
}
|
||||
}
|
||||
}
|
||||
return returnCode;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* Gfx11Lib::ComputeOffsetFromEquation
|
||||
|
|
@ -3740,107 +3980,6 @@ UINT_32 Gfx11Lib::ComputeOffsetFromEquation(
|
|||
return offset;
|
||||
}
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* Gfx11Lib::ComputeOffsetFromSwizzlePattern
|
||||
*
|
||||
* @brief
|
||||
* Compute offset from swizzle pattern
|
||||
*
|
||||
* @return
|
||||
* Offset
|
||||
************************************************************************************************************************
|
||||
*/
|
||||
UINT_32 Gfx11Lib::ComputeOffsetFromSwizzlePattern(
|
||||
const UINT_64* pPattern, ///< Swizzle pattern
|
||||
UINT_32 numBits, ///< Number of bits in pattern
|
||||
UINT_32 x, ///< x coord in pixel
|
||||
UINT_32 y, ///< y coord in pixel
|
||||
UINT_32 z, ///< z coord in slice
|
||||
UINT_32 s ///< sample id
|
||||
) const
|
||||
{
|
||||
UINT_32 offset = 0;
|
||||
const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
|
||||
|
||||
for (UINT_32 i = 0; i < numBits; i++)
|
||||
{
|
||||
UINT_32 v = 0;
|
||||
|
||||
if (pSwizzlePattern[i].x != 0)
|
||||
{
|
||||
UINT_16 mask = pSwizzlePattern[i].x;
|
||||
UINT_32 xBits = x;
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
if (mask & 1)
|
||||
{
|
||||
v ^= xBits & 1;
|
||||
}
|
||||
|
||||
xBits >>= 1;
|
||||
mask >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (pSwizzlePattern[i].y != 0)
|
||||
{
|
||||
UINT_16 mask = pSwizzlePattern[i].y;
|
||||
UINT_32 yBits = y;
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
if (mask & 1)
|
||||
{
|
||||
v ^= yBits & 1;
|
||||
}
|
||||
|
||||
yBits >>= 1;
|
||||
mask >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (pSwizzlePattern[i].z != 0)
|
||||
{
|
||||
UINT_16 mask = pSwizzlePattern[i].z;
|
||||
UINT_32 zBits = z;
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
if (mask & 1)
|
||||
{
|
||||
v ^= zBits & 1;
|
||||
}
|
||||
|
||||
zBits >>= 1;
|
||||
mask >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (pSwizzlePattern[i].s != 0)
|
||||
{
|
||||
UINT_16 mask = pSwizzlePattern[i].s;
|
||||
UINT_32 sBits = s;
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
if (mask & 1)
|
||||
{
|
||||
v ^= sBits & 1;
|
||||
}
|
||||
|
||||
sBits >>= 1;
|
||||
mask >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
offset |= (v << i);
|
||||
}
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* Gfx11Lib::GetSwizzlePatternInfo
|
||||
|
|
@ -4200,7 +4339,7 @@ ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceAddrFromCoordMacroTiled(
|
|||
const UINT_32 xb = pIn->x / localOut.blockWidth;
|
||||
const UINT_64 blkIdx = yb * pb + xb;
|
||||
|
||||
ADDR_BIT_SETTING fullSwizzlePattern[20];
|
||||
ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT];
|
||||
GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
|
||||
|
||||
const UINT_32 blkOffset =
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -300,6 +300,16 @@ protected:
|
|||
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
|
||||
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
|
||||
|
||||
virtual ADDR_E_RETURNCODE HwlCopyMemToSurface(
|
||||
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount) const;
|
||||
|
||||
virtual ADDR_E_RETURNCODE HwlCopySurfaceToMem(
|
||||
const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount) const;
|
||||
|
||||
virtual UINT_32 HwlComputeMaxBaseAlignments() const;
|
||||
|
||||
virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;
|
||||
|
|
@ -328,14 +338,6 @@ private:
|
|||
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
|
||||
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
|
||||
|
||||
UINT_32 ComputeOffsetFromSwizzlePattern(
|
||||
const UINT_64* pPattern,
|
||||
UINT_32 numBits,
|
||||
UINT_32 x,
|
||||
UINT_32 y,
|
||||
UINT_32 z,
|
||||
UINT_32 s) const;
|
||||
|
||||
UINT_32 ComputeOffsetFromEquation(
|
||||
const ADDR_EQUATION* pEq,
|
||||
UINT_32 x,
|
||||
|
|
@ -373,7 +375,7 @@ private:
|
|||
|
||||
VOID GetSwizzlePatternFromPatternInfo(
|
||||
const ADDR_SW_PATINFO* pPatInfo,
|
||||
ADDR_BIT_SETTING (&pSwizzle)[20]) const
|
||||
ADDR_BIT_SETTING (&pSwizzle)[ADDR_MAX_EQUATION_BIT]) const
|
||||
{
|
||||
memcpy(pSwizzle,
|
||||
GFX11_SW_PATTERN_NIBBLE01[pPatInfo->nibble01Idx],
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -15,6 +15,7 @@
|
|||
|
||||
#include "gfx12addrlib.h"
|
||||
#include "gfx12_gb_reg.h"
|
||||
#include "addrswizzler.h"
|
||||
|
||||
#include "amdgpu_asic_addr.h"
|
||||
|
||||
|
|
@ -187,7 +188,7 @@ VOID Gfx12Lib::InitEquationTable()
|
|||
// Skip linear equation (data table is not useful for 2D/3D images-- only contains x-coordinate bits)
|
||||
if (IsValidSwMode(swMode) && (IsLinear(swMode) == false))
|
||||
{
|
||||
const UINT_32 maxMsaa = Is2dSwizzle(swMode) ? MaxMsaaRateLog2 : 1;
|
||||
const UINT_32 maxMsaa = Is2dSwizzle(swMode) ? MaxNumMsaaRates : 1;
|
||||
|
||||
for (UINT_32 msaaIdx = 0; msaaIdx < maxMsaa; msaaIdx++)
|
||||
{
|
||||
|
|
@ -266,7 +267,7 @@ VOID Gfx12Lib::InitBlockDimensionTable()
|
|||
if (IsValidSwMode(swMode))
|
||||
{
|
||||
surfaceInfo.swizzleMode = swMode;
|
||||
const UINT_32 maxMsaa = Is2dSwizzle(swMode) ? MaxMsaaRateLog2 : 1;
|
||||
const UINT_32 maxMsaa = Is2dSwizzle(swMode) ? MaxNumMsaaRates : 1;
|
||||
|
||||
for (UINT_32 msaaIdx = 0; msaaIdx < maxMsaa; msaaIdx++)
|
||||
{
|
||||
|
|
@ -621,10 +622,9 @@ ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSurfaceInfo(
|
|||
|
||||
// Slices must be exact multiples of the block sizes. However:
|
||||
// - with 3D images, one block will contain multiple slices, so that needs to be taken into account.
|
||||
//
|
||||
// Note that with linear images that have only one slice, we can always guarantee pOut->sliceSize is 256B
|
||||
// alignment so there is no need to worry about it.
|
||||
ADDR_ASSERT(((pOut->sliceSize * pOut->blockExtent.depth) % GetBlockSize(pSurfInfo->swizzleMode)) == 0);
|
||||
// - with linear images that have only one slice, we may trim and use the pitch alignment for size.
|
||||
ADDR_ASSERT(((pOut->sliceSize * pOut->blockExtent.depth) %
|
||||
GetBlockSize(pSurfInfo->swizzleMode, CanTrimLinearPadding(pSurfInfo))) == 0);
|
||||
}
|
||||
|
||||
return returnCode;
|
||||
|
|
@ -880,6 +880,231 @@ ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSurfaceAddrFromCoordTiled(
|
|||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* Gfx12Lib::HwlCopyMemToSurface
|
||||
*
|
||||
* @brief
|
||||
* Copy multiple regions from memory to a non-linear surface.
|
||||
*
|
||||
* @return
|
||||
* Error or success.
|
||||
************************************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE Gfx12Lib::HwlCopyMemToSurface(
|
||||
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount
|
||||
) const
|
||||
{
|
||||
// Copy memory to tiled surface. We will use the 'swizzler' object to dispatch to a version of the copy routine
|
||||
// optimized for a particular micro-swizzle mode if available.
|
||||
ADDR3_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
|
||||
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
|
||||
ADDR3_MIP_INFO mipInfo[MaxMipLevels] = {{0}};
|
||||
ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
|
||||
ADDR_E_RETURNCODE returnCode = ADDR_OK;
|
||||
|
||||
if (pIn->numSamples > 1)
|
||||
{
|
||||
// TODO: MSAA
|
||||
returnCode = ADDR_NOTIMPLEMENTED;
|
||||
}
|
||||
|
||||
localIn.size = sizeof(localIn);
|
||||
localIn.flags = pIn->flags;
|
||||
localIn.swizzleMode = pIn->swizzleMode;
|
||||
localIn.resourceType = pIn->resourceType;
|
||||
localIn.format = pIn->format;
|
||||
localIn.bpp = pIn->bpp;
|
||||
localIn.width = Max(pIn->unAlignedDims.width, 1u);
|
||||
localIn.height = Max(pIn->unAlignedDims.height, 1u);
|
||||
localIn.numSlices = Max(pIn->unAlignedDims.depth, 1u);
|
||||
localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
|
||||
localIn.numSamples = Max(pIn->numSamples, 1u);
|
||||
|
||||
localOut.size = sizeof(localOut);
|
||||
localOut.pMipInfo = mipInfo;
|
||||
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
returnCode = ComputeSurfaceInfo(&localIn, &localOut);
|
||||
}
|
||||
|
||||
LutAddresser addresser = LutAddresser();
|
||||
UnalignedCopyMemImgFunc pfnCopyUnaligned = nullptr;
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
|
||||
const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
|
||||
Log2(pIn->bpp >> 3),
|
||||
pIn->numSamples);
|
||||
|
||||
ADDR_BIT_SETTING fullSwizzlePattern[Log2Size256K] = {};
|
||||
GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
|
||||
addresser.Init(fullSwizzlePattern, Log2Size256K, localOut.blockExtent, blkSizeLog2);
|
||||
pfnCopyUnaligned = addresser.GetCopyMemImgFunc();
|
||||
if (pfnCopyUnaligned == nullptr)
|
||||
{
|
||||
ADDR_ASSERT_ALWAYS(); // What format is this?
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
}
|
||||
}
|
||||
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
for (UINT_32 regionIdx = 0; regionIdx < regionCount; regionIdx++)
|
||||
{
|
||||
const ADDR3_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
|
||||
const ADDR3_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
|
||||
UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
|
||||
UINT_32 yBlks = pMipInfo->pitch / localOut.blockExtent.width;
|
||||
|
||||
UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
|
||||
UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
|
||||
UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
|
||||
|
||||
for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
|
||||
{
|
||||
// The copy functions take the base address of the hardware slice, not the logical slice. Those are
|
||||
// not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
|
||||
// for unaligned copies.
|
||||
UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockExtent.depth);
|
||||
UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
|
||||
|
||||
UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
|
||||
UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
|
||||
|
||||
ADDR_COORD2D sliceOrigin = { xStart, yStart };
|
||||
ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
|
||||
|
||||
pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
|
||||
VoidPtrInc(pCurRegion->pMem, memOffset),
|
||||
pCurRegion->memRowPitch,
|
||||
yBlks,
|
||||
sliceOrigin,
|
||||
sliceExtent,
|
||||
sliceXor,
|
||||
addresser);
|
||||
}
|
||||
}
|
||||
}
|
||||
return returnCode;
|
||||
}
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* Gfx12Lib::HwlCopySurfaceToMem
|
||||
*
|
||||
* @brief
|
||||
* Copy multiple regions from a non-linear surface to memory.
|
||||
*
|
||||
* @return
|
||||
* Error or success.
|
||||
************************************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE Gfx12Lib::HwlCopySurfaceToMem(
|
||||
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount
|
||||
) const
|
||||
{
|
||||
// Copy memory to tiled surface. We will use the 'swizzler' object to dispatch to a version of the copy routine
|
||||
// optimized for a particular micro-swizzle mode if available.
|
||||
ADDR3_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
|
||||
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
|
||||
ADDR3_MIP_INFO mipInfo[MaxMipLevels] = {{0}};
|
||||
ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
|
||||
ADDR_E_RETURNCODE returnCode = ADDR_OK;
|
||||
|
||||
if (pIn->numSamples > 1)
|
||||
{
|
||||
// TODO: MSAA
|
||||
returnCode = ADDR_NOTIMPLEMENTED;
|
||||
}
|
||||
|
||||
localIn.size = sizeof(localIn);
|
||||
localIn.flags = pIn->flags;
|
||||
localIn.swizzleMode = pIn->swizzleMode;
|
||||
localIn.resourceType = pIn->resourceType;
|
||||
localIn.format = pIn->format;
|
||||
localIn.bpp = pIn->bpp;
|
||||
localIn.width = Max(pIn->unAlignedDims.width, 1u);
|
||||
localIn.height = Max(pIn->unAlignedDims.height, 1u);
|
||||
localIn.numSlices = Max(pIn->unAlignedDims.depth, 1u);
|
||||
localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
|
||||
localIn.numSamples = Max(pIn->numSamples, 1u);
|
||||
|
||||
localOut.size = sizeof(localOut);
|
||||
localOut.pMipInfo = mipInfo;
|
||||
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
returnCode = ComputeSurfaceInfo(&localIn, &localOut);
|
||||
}
|
||||
|
||||
LutAddresser addresser = LutAddresser();
|
||||
UnalignedCopyMemImgFunc pfnCopyUnaligned = nullptr;
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
|
||||
const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
|
||||
Log2(pIn->bpp >> 3),
|
||||
pIn->numSamples);
|
||||
|
||||
ADDR_BIT_SETTING fullSwizzlePattern[Log2Size256K] = {};
|
||||
GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
|
||||
addresser.Init(fullSwizzlePattern, Log2Size256K, localOut.blockExtent, blkSizeLog2);
|
||||
pfnCopyUnaligned = addresser.GetCopyImgMemFunc();
|
||||
if (pfnCopyUnaligned == nullptr)
|
||||
{
|
||||
ADDR_ASSERT_ALWAYS(); // What format is this?
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
}
|
||||
}
|
||||
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
for (UINT_32 regionIdx = 0; regionIdx < regionCount; regionIdx++)
|
||||
{
|
||||
const ADDR3_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
|
||||
const ADDR3_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
|
||||
UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
|
||||
UINT_32 yBlks = pMipInfo->pitch / localOut.blockExtent.width;
|
||||
|
||||
UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
|
||||
UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
|
||||
UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
|
||||
|
||||
for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
|
||||
{
|
||||
// The copy functions take the base address of the hardware slice, not the logical slice. Those are
|
||||
// not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
|
||||
// for unaligned copies.
|
||||
UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockExtent.depth);
|
||||
UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
|
||||
|
||||
UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
|
||||
UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
|
||||
|
||||
ADDR_COORD2D sliceOrigin = { xStart, yStart };
|
||||
ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
|
||||
|
||||
pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
|
||||
VoidPtrInc(pCurRegion->pMem, memOffset),
|
||||
pCurRegion->memRowPitch,
|
||||
yBlks,
|
||||
sliceOrigin,
|
||||
sliceExtent,
|
||||
sliceXor,
|
||||
addresser);
|
||||
}
|
||||
}
|
||||
}
|
||||
return returnCode;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* Gfx12Lib::HwlComputePipeBankXor
|
||||
|
|
@ -1768,43 +1993,36 @@ ADDR_E_RETURNCODE Gfx12Lib::HwlGetPossibleSwizzleModes(
|
|||
pOut->validModes.sw2d64kB = 1;
|
||||
pOut->validModes.sw2d256kB = 1;
|
||||
}
|
||||
// Block-compressed images need to be either using 2D or linear swizzle modes.
|
||||
else if (flags.blockCompressed)
|
||||
// Some APIs (like Vulkan) require that PRT should always use 64KB blocks
|
||||
else if (flags.standardPrt)
|
||||
{
|
||||
pOut->validModes.swLinear = 1;
|
||||
|
||||
// We find cases where Tex3d BlockCompressed image adopts 2D_256B should be prohibited.
|
||||
if (IsTex3d(pIn->resourceType) == FALSE)
|
||||
if (IsTex3d(pIn->resourceType) && (flags.view3dAs2dArray == 0))
|
||||
{
|
||||
pOut->validModes.sw2d256B = 1;
|
||||
pOut->validModes.sw3d64kB = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
pOut->validModes.sw2d64kB = 1;
|
||||
}
|
||||
pOut->validModes.sw2d4kB = 1;
|
||||
pOut->validModes.sw2d64kB = 1;
|
||||
pOut->validModes.sw2d256kB = 1;
|
||||
}
|
||||
else if (IsTex1d(pIn->resourceType))
|
||||
else if (// Block-compressed images need to be either using 2D or linear swizzle modes.
|
||||
flags.blockCompressed ||
|
||||
// Only 3D w/ view3dAs2dArray == 0 will use 1D/2D block swizzle modes
|
||||
(IsTex3d(pIn->resourceType) == FALSE) || flags.view3dAs2dArray ||
|
||||
// NV12 and P010 support
|
||||
// SW_LINEAR, SW_256B_2D, SW_4KB_2D, SW_64KB_2D, SW_256KB_2D
|
||||
// There could be more multimedia formats that require more hw specific tiling modes...
|
||||
flags.nv12 || flags.p010)
|
||||
{
|
||||
pOut->validModes.swLinear = 1;
|
||||
pOut->validModes.sw2d256B = 1;
|
||||
pOut->validModes.sw2d4kB = 1;
|
||||
pOut->validModes.sw2d64kB = 1;
|
||||
pOut->validModes.sw2d256kB = 1;
|
||||
}
|
||||
else if (flags.nv12 || flags.p010 || IsTex2d(pIn->resourceType) || flags.view3dAs2dArray)
|
||||
{
|
||||
// NV12 and P010 support
|
||||
// SW_LINEAR, SW_256B_2D, SW_4KB_2D, SW_64KB_2D, SW_256KB_2D
|
||||
// There could be more multimedia formats that require more hw specific tiling modes...
|
||||
|
||||
// The exception is VRS images.
|
||||
// Linear is not allowed for VRS images.
|
||||
if (flags.isVrsImage == 0)
|
||||
{
|
||||
pOut->validModes.swLinear = 1;
|
||||
}
|
||||
if (flags.view3dAs2dArray == 0)
|
||||
|
||||
// 3D resources can't use SW_256B_2D
|
||||
if (IsTex3d(pIn->resourceType) == FALSE)
|
||||
{
|
||||
// ADDR3_256B_2D can't support 3D images.
|
||||
pOut->validModes.sw2d256B = 1;
|
||||
}
|
||||
pOut->validModes.sw2d4kB = 1;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -162,6 +162,16 @@ private:
|
|||
const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
|
||||
const ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
|
||||
|
||||
virtual ADDR_E_RETURNCODE HwlCopyMemToSurface(
|
||||
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount) const override;
|
||||
|
||||
virtual ADDR_E_RETURNCODE HwlCopySurfaceToMem(
|
||||
const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
|
||||
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount) const override;
|
||||
|
||||
UINT_32 m_numSwizzleBits;
|
||||
|
||||
// Initialize equation table
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -3834,7 +3834,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
|
|||
}
|
||||
|
||||
// Select the biggest allowed block type
|
||||
minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
|
||||
minSizeBlk = Log2(allowedBlockSet.value) + 1;
|
||||
|
||||
if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
|
||||
{
|
||||
|
|
@ -3960,7 +3960,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
|
|||
// Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle
|
||||
// type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
|
||||
// available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
|
||||
pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
|
||||
pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2(allowedSwModeSet.value));
|
||||
}
|
||||
|
||||
returnCode = ADDR_OK;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -68,11 +68,10 @@ struct SiChipSettings
|
|||
UINT_32 isPolaris10 : 1;
|
||||
UINT_32 isPolaris11 : 1;
|
||||
UINT_32 isPolaris12 : 1;
|
||||
// VI fusion
|
||||
UINT_32 isVegaM : 1;
|
||||
UINT_32 isCarrizo : 1;
|
||||
|
||||
UINT_32 : 2;
|
||||
UINT_32 : 1;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -3205,7 +3205,6 @@ static bool gfx12_compute_surface(struct ac_addrlib *addrlib, const struct radeo
|
|||
struct radeon_surf *surf)
|
||||
{
|
||||
bool compressed = surf->blk_w == 4 && surf->blk_h == 4;
|
||||
bool is_color_surface = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
|
||||
bool stencil_only = (surf->flags & RADEON_SURF_SBUFFER) && !(surf->flags & RADEON_SURF_ZBUFFER);
|
||||
ADDR3_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
|
||||
|
||||
|
|
@ -3220,13 +3219,11 @@ static bool gfx12_compute_surface(struct ac_addrlib *addrlib, const struct radeo
|
|||
AddrSurfInfoIn.bpp = surf->bpe * 8;
|
||||
}
|
||||
|
||||
AddrSurfInfoIn.flags.color = is_color_surface && !(surf->flags & RADEON_SURF_NO_RENDER_TARGET);
|
||||
AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
|
||||
AddrSurfInfoIn.flags.depth = !!(surf->flags & RADEON_SURF_ZBUFFER);
|
||||
AddrSurfInfoIn.flags.stencil = stencil_only;
|
||||
AddrSurfInfoIn.flags.texture = !(surf->flags & RADEON_SURF_NO_TEXTURE);
|
||||
AddrSurfInfoIn.flags.unordered = !(surf->flags & RADEON_SURF_NO_TEXTURE);
|
||||
AddrSurfInfoIn.flags.blockCompressed = compressed;
|
||||
AddrSurfInfoIn.flags.isVrsImage = !!(surf->flags & RADEON_SURF_VRS_RATE);
|
||||
AddrSurfInfoIn.flags.standardPrt = !!(surf->flags & RADEON_SURF_PRT);
|
||||
|
||||
if (config->is_3d)
|
||||
AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_3D;
|
||||
|
|
@ -3254,11 +3251,6 @@ static bool gfx12_compute_surface(struct ac_addrlib *addrlib, const struct radeo
|
|||
AddrSurfInfoIn.swizzleMode = ac_get_modifier_swizzle_mode(info->gfx_level, surf->modifier);
|
||||
} else if (surf->flags & RADEON_SURF_IMPORTED) {
|
||||
AddrSurfInfoIn.swizzleMode = surf->u.gfx9.swizzle_mode;
|
||||
} else if (surf->flags & RADEON_SURF_PRT) {
|
||||
if (config->is_3d)
|
||||
AddrSurfInfoIn.swizzleMode = ADDR3_64KB_3D;
|
||||
else
|
||||
AddrSurfInfoIn.swizzleMode = ADDR3_64KB_2D;
|
||||
} else if (mode == RADEON_SURF_MODE_LINEAR_ALIGNED) {
|
||||
assert(config->info.samples <= 1 && !(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
|
||||
AddrSurfInfoIn.swizzleMode = ADDR3_LINEAR;
|
||||
|
|
|
|||
|
|
@ -221,8 +221,6 @@ static void gfx12_generate_hash(struct ac_addrlib *ac_addrlib,
|
|||
ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT input = {0};
|
||||
input.size = sizeof(input);
|
||||
input.swizzleMode = surf->u.gfx9.swizzle_mode;
|
||||
input.flags.color = 1;
|
||||
input.flags.texture = 1;
|
||||
input.resourceType = ADDR_RSRC_TEX_2D;
|
||||
input.bpp = util_format_get_blocksizebits(entry->format);
|
||||
input.unAlignedDims.width = entry->w;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue