mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 11:28:05 +02:00
amd: import gfx11.7 addrlib
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40958>
This commit is contained in:
parent
dfca417db8
commit
d778ede72c
20 changed files with 3407 additions and 509 deletions
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2026 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -24,7 +24,7 @@ extern "C"
|
|||
#endif
|
||||
|
||||
#define ADDRLIB_VERSION_MAJOR 10
|
||||
#define ADDRLIB_VERSION_MINOR 1
|
||||
#define ADDRLIB_VERSION_MINOR 6
|
||||
#define ADDRLIB_MAKE_VERSION(major, minor) ((major << 16) | minor)
|
||||
#define ADDRLIB_VERSION ADDRLIB_MAKE_VERSION(ADDRLIB_VERSION_MAJOR, ADDRLIB_VERSION_MINOR)
|
||||
|
||||
|
|
@ -107,6 +107,11 @@ typedef struct _ADDR_EXTENT3D
|
|||
* AddrComputeFmaskAddrFromCoord()
|
||||
* AddrComputeFmaskCoordFromAddr()
|
||||
*
|
||||
* /////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
* // Format properties functions
|
||||
* /////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
* AddrFormatProperties()
|
||||
*
|
||||
**/
|
||||
/**
|
||||
* /////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
@ -452,6 +457,49 @@ ADDR_E_RETURNCODE ADDR_API AddrCreate(
|
|||
ADDR_E_RETURNCODE ADDR_API AddrDestroy(
|
||||
ADDR_HANDLE hLib);
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* ADDR_FORMAT_PROPERTIES_IN
|
||||
*
|
||||
* @brief
|
||||
* Input structure to the AddrFormatProperties routine.
|
||||
*
|
||||
****************************************************************************************************
|
||||
*/
|
||||
typedef struct _ADDR_FORMAT_PROPERTIES_IN {
|
||||
UINT_32 size; ///< Size of this structure in bytes
|
||||
AddrFormat format; ///< If format is set to valid one, bpp/width/height
|
||||
/// might be overwritten
|
||||
} ADDR_FORMAT_PROPERTIES_IN;
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* ADDR_FORMAT_PROPERTIES_OUT
|
||||
*
|
||||
* @brief
|
||||
* Output structure from the AddrFormatProperties routine.
|
||||
*
|
||||
****************************************************************************************************
|
||||
*/
|
||||
typedef struct _ADDR_FORMAT_PROPERTIES_OUT {
|
||||
UINT_32 size; ///< Size of this structure in bytes
|
||||
UINT_32 bpp; ///< Bits per pixel as laid out in memory (eg. 128bpp for BC7)
|
||||
ADDR_EXTENT2D expand; ///< Dimensions of one macro pixel block
|
||||
} ADDR_FORMAT_PROPERTIES_OUT;
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* AddrFormatProperties
|
||||
*
|
||||
* @brief
|
||||
* Gets a list of format properties
|
||||
*
|
||||
****************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE ADDR_API AddrFormatProperties(
|
||||
ADDR_HANDLE hLib,
|
||||
const ADDR_FORMAT_PROPERTIES_IN* in,
|
||||
ADDR_FORMAT_PROPERTIES_OUT* pOut);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Surface functions
|
||||
|
|
@ -2463,6 +2511,7 @@ typedef union _ADDR2_SURFACE_FLAGS
|
|||
UINT_32 rotated : 1; ///< This resource is rotated and displayable
|
||||
UINT_32 needEquation : 1; ///< This resource needs equation to be generated if possible
|
||||
UINT_32 opt4space : 1; ///< This resource should be optimized for space
|
||||
UINT_32 computeMaxSize : 1; ///< This resource should select the largest swizzle possible
|
||||
UINT_32 minimizeAlign : 1; ///< This resource should use minimum alignment
|
||||
UINT_32 noMetadata : 1; ///< This resource has no metadata
|
||||
UINT_32 metaRbUnaligned : 1; ///< This resource has rb unaligned metadata
|
||||
|
|
@ -2470,7 +2519,7 @@ typedef union _ADDR2_SURFACE_FLAGS
|
|||
UINT_32 view3dAs2dArray : 1; ///< This resource is a 3D resource viewed as 2D array
|
||||
UINT_32 allowExtEquation : 1; ///< If unset, only legacy DX eqs are allowed (2 XORs)
|
||||
UINT_32 requireMetadata : 1; ///< This resource must support metadata
|
||||
UINT_32 reserved : 11; ///< Reserved bits
|
||||
UINT_32 reserved : 10; ///< Reserved bits
|
||||
};
|
||||
|
||||
UINT_32 value;
|
||||
|
|
@ -2666,6 +2715,31 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceAddrFromCoord(
|
|||
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
|
||||
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut);
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* ADDR_COPY_FLAGS
|
||||
*
|
||||
* @brief
|
||||
* Options controlling image copy functions.
|
||||
****************************************************************************************************
|
||||
*/
|
||||
typedef union _ADDR_COPY_FLAGS {
|
||||
struct
|
||||
{
|
||||
UINT_32 blockMemcpy : 1; ///< Memory layout is pre-swizzled and stored block-by-block.
|
||||
/// For regions in the miptail, this uses hybrid memcpy.
|
||||
/// Regions must cover full width/height of the subresource.
|
||||
UINT_32 hybridMemcpy : 1; ///< Memory layout is partially pre-swizzled and stored
|
||||
/// microblock-by-microblock. Data in this format is agnostic to
|
||||
/// chip harvesting and block size. Regions will be padded out
|
||||
/// to microblock boundaries for alignment.
|
||||
/// Mutually exclusive with 'blockMemcpy'.
|
||||
UINT_32 reserved : 30; ///< Reserved bits
|
||||
};
|
||||
|
||||
UINT_32 value;
|
||||
} ADDR_COPY_FLAGS;
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* ADDR2_COPY_MEMSURFACE_REGION
|
||||
|
|
@ -2718,6 +2792,7 @@ typedef struct _ADDR2_COPY_MEMSURFACE_INPUT
|
|||
/// - copyDims.depth == 1
|
||||
/// - all copy regions target the same mip
|
||||
/// - all copy regions target the same slice/depth
|
||||
ADDR_COPY_FLAGS copyFlags; ///< Controls how the copy is performed.
|
||||
} ADDR2_COPY_MEMSURFACE_INPUT;
|
||||
|
||||
/**
|
||||
|
|
@ -4008,30 +4083,34 @@ typedef union _ADDR2_SWMODE_SET
|
|||
*/
|
||||
typedef struct _ADDR2_GET_PREFERRED_SURF_SETTING_INPUT
|
||||
{
|
||||
UINT_32 size; ///< Size of this structure in bytes
|
||||
UINT_32 size; ///< Size of this structure in bytes
|
||||
|
||||
ADDR2_SURFACE_FLAGS flags; ///< Surface flags
|
||||
AddrResourceType resourceType; ///< Surface type
|
||||
AddrFormat format; ///< Surface format
|
||||
AddrResrouceLocation resourceLoction; ///< Surface heap choice
|
||||
ADDR2_BLOCK_SET forbiddenBlock; ///< Client can use it to disable some block setting
|
||||
///< such as linear for DXTn, tiled for YUV
|
||||
ADDR2_SWTYPE_SET preferredSwSet; ///< Client can use it to specify sw type(s) wanted
|
||||
BOOL_32 noXor; ///< Do not use xor mode for this resource
|
||||
UINT_32 bpp; ///< bits per pixel
|
||||
UINT_32 width; ///< Width (of mip0), in pixels
|
||||
UINT_32 height; ///< Height (of mip0), in pixels
|
||||
UINT_32 numSlices; ///< Number surface slice/depth (of mip0),
|
||||
UINT_32 numMipLevels; ///< Total mipmap levels.
|
||||
UINT_32 numSamples; ///< Number of samples
|
||||
UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as
|
||||
/// number of samples for normal AA; Set it to the
|
||||
/// number of fragments for EQAA
|
||||
UINT_32 maxAlign; ///< maximum base/size alignment requested by client
|
||||
UINT_32 minSizeAlign; ///< memory allocated for surface in client driver will
|
||||
/// be padded to multiple of this value (in bytes)
|
||||
DOUBLE memoryBudget; ///< Memory consumption ratio based on minimum possible
|
||||
/// size.
|
||||
ADDR2_SURFACE_FLAGS flags; ///< Surface flags
|
||||
AddrResourceType resourceType; ///< Surface type
|
||||
AddrFormat format; ///< Surface format
|
||||
AddrResrouceLocation resourceLoction; ///< Surface heap choice
|
||||
ADDR2_BLOCK_SET forbiddenBlock; ///< Client can use it to disable some block setting
|
||||
///< such as linear for DXTn, tiled for YUV
|
||||
ADDR2_SWTYPE_SET preferredSwSet; ///< Client can use it to specify sw type(s) wanted
|
||||
BOOL_32 noXor; ///< Do not use xor mode for this resource
|
||||
UINT_32 bpp; ///< bits per pixel
|
||||
UINT_32 width; ///< Width (of mip0), in pixels
|
||||
UINT_32 height; ///< Height (of mip0), in pixels
|
||||
UINT_32 numSlices; ///< Number surface slice/depth (of mip0),
|
||||
UINT_32 numMipLevels; ///< Total mipmap levels.
|
||||
UINT_32 numSamples; ///< Number of samples
|
||||
UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as
|
||||
/// number of samples for normal AA; Set it to the
|
||||
/// number of fragments for EQAA
|
||||
UINT_32 maxAlign; ///< maximum base/size alignment requested by client
|
||||
UINT_32 minSizeAlign; ///< memory allocated for surface in client driver will
|
||||
/// be padded to multiple of this value (in bytes)
|
||||
DOUBLE memoryBudget; ///< Memory consumption ratio based on minimum possible
|
||||
/// size.
|
||||
bool useBlockBasedHeuristic; ///< Use the block-based heuristic for swizzle mode selection.
|
||||
/// The heuristic has the property of image size predictably
|
||||
/// with image extents, which is needed for Vulkan. It ignores
|
||||
/// minSizeAlign, maxAlign and memoryBudget options
|
||||
} ADDR2_GET_PREFERRED_SURF_SETTING_INPUT;
|
||||
|
||||
/**
|
||||
|
|
@ -4488,6 +4567,7 @@ typedef struct _ADDR3_COPY_MEMSURFACE_INPUT
|
|||
/// - copyDims.depth == 1
|
||||
/// - all copy regions target the same mip
|
||||
/// - all copy regions target the same slice/depth
|
||||
ADDR_COPY_FLAGS copyFlags; ///< Controls how the copy is performed.
|
||||
} ADDR3_COPY_MEMSURFACE_INPUT;
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2026 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ files_addrlib = files(
|
|||
'src/core/addrobject.h',
|
||||
'src/core/addrswizzler.cpp',
|
||||
'src/core/addrswizzler.h',
|
||||
'src/core/addrswizzlersimd.h',
|
||||
'src/core/coord.cpp',
|
||||
'src/core/coord.h',
|
||||
'src/gfx9/gfx9addrlib.cpp',
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2026 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -81,6 +81,34 @@ ADDR_E_RETURNCODE ADDR_API AddrDestroy(
|
|||
return returnCode;
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* AddrFormatProperties
|
||||
*
|
||||
* @brief
|
||||
* Retreives properties of the specified format.
|
||||
*
|
||||
****************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE ADDR_API AddrFormatProperties(
|
||||
ADDR_HANDLE hLib,
|
||||
const ADDR_FORMAT_PROPERTIES_IN& in,
|
||||
ADDR_FORMAT_PROPERTIES_OUT* pOut)
|
||||
{
|
||||
ADDR_E_RETURNCODE retCode = ADDR_INVALIDPARAMS;
|
||||
|
||||
if (hLib)
|
||||
{
|
||||
Lib* pLib = Lib::GetLib(hLib);
|
||||
|
||||
if (pLib != NULL)
|
||||
{
|
||||
retCode = pLib->GetFormatProperties(in, pOut);
|
||||
}
|
||||
}
|
||||
|
||||
return retCode;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Surface functions
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2017-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2017-2026 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -29,6 +29,7 @@
|
|||
#define FAMILY_NV3 0x91 //# 145 / Navi: 3x
|
||||
#define FAMILY_STX 0x96
|
||||
#define FAMILY_PHX 0x94 //# 148 / Phoenix
|
||||
#define FAMILY_GFX1170 0x9A
|
||||
#define FAMILY_RMB 0x92 //# 146 / Rembrandt
|
||||
#define FAMILY_RPL 0x95 //# 149 / Raphael
|
||||
#define FAMILY_MDN 0x97 //# 151 / Mendocino
|
||||
|
|
@ -109,6 +110,7 @@
|
|||
#define AMDGPU_PHOENIX2_RANGE 0x80, 0xC0 //# 128 <= x < 192
|
||||
#define AMDGPU_HAWK_POINT1_RANGE 0xC0, 0xF0 //# 192 <= x < 240
|
||||
#define AMDGPU_HAWK_POINT2_RANGE 0xF0, 0xFF //# 240 <= x < 255
|
||||
#define AMDGPU_GFX1170_RANGE 0x01, 0x40 //# 1 <= x < 64
|
||||
|
||||
#define AMDGPU_REMBRANDT_RANGE 0x01, 0xFF //# 01 <= x < 255
|
||||
#define AMDGPU_RAPHAEL_RANGE 0x01, 0xFF //# 1 <= x < max
|
||||
|
|
@ -189,6 +191,7 @@
|
|||
#define ASICREV_IS_PHOENIX2(r) ASICREV_IS(r, PHOENIX2)
|
||||
#define ASICREV_IS_HAWK_POINT1(r) ASICREV_IS(r, HAWK_POINT1)
|
||||
#define ASICREV_IS_HAWK_POINT2(r) ASICREV_IS(r, HAWK_POINT2)
|
||||
#define ASICREV_IS_GFX1170(r) ASICREV_IS(r, GFX1170)
|
||||
|
||||
#define ASICREV_IS_REMBRANDT(r) ASICREV_IS(r, REMBRANDT)
|
||||
#define ASICREV_IS_RAPHAEL(r) ASICREV_IS(r, RAPHAEL)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2026 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -360,7 +360,7 @@ static inline UINT_32 BitMaskScanForward(
|
|||
{
|
||||
ADDR_ASSERT(mask > 0);
|
||||
unsigned long out = 0;
|
||||
#if (defined(_WIN64) && defined(_M_X64)) || (defined(_WIN32) && defined(_M_IX64))
|
||||
#if ((defined(_WIN64) && defined(_M_X64)) || (defined(_WIN32) && defined(_M_IX64))) && !defined(_M_ARM64EC)
|
||||
out = ::_tzcnt_u32(mask);
|
||||
#elif (defined(_WIN32) || defined(_WIN64))
|
||||
::_BitScanForward(&out, mask);
|
||||
|
|
@ -436,6 +436,22 @@ static inline UINT_64 IsPow2(
|
|||
return !(dim & (dim - 1));
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* RoundUpToMultiple
|
||||
*
|
||||
* @brief
|
||||
* Rounds up the specified integer to the nearest multiple of the specified alignment value.
|
||||
****************************************************************************************************
|
||||
*/
|
||||
template <typename T>
|
||||
constexpr T RoundUpToMultiple(
|
||||
T operand, ///< Value to be aligned.
|
||||
T alignment) ///< Alignment desired.
|
||||
{
|
||||
return (((operand + (alignment - 1)) / alignment) * alignment);
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* PowTwoAlign
|
||||
|
|
@ -647,6 +663,25 @@ static inline UINT_32 Log2(
|
|||
return (x != 0) ? (31 ^ BitMaskScanReverse(x)) : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* ConstexprLog2
|
||||
*
|
||||
* @brief
|
||||
* Compute log of base 2 no matter the target is power of 2 or not. Returns 0 if 0.
|
||||
****************************************************************************************************
|
||||
*/
|
||||
static constexpr inline UINT_32 ConstexprLog2(
|
||||
UINT_32 x) ///< [in] the value should calculate log based 2
|
||||
{
|
||||
UINT_32 out = 0;
|
||||
while (x >>= 1)
|
||||
{
|
||||
out++;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* QLog2
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2026 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -222,6 +222,7 @@ ADDR_E_RETURNCODE Lib::Create(
|
|||
case FAMILY_NV3:
|
||||
case FAMILY_STX:
|
||||
case FAMILY_PHX:
|
||||
case FAMILY_GFX1170:
|
||||
pLib = Gfx11HwlInit(&client);
|
||||
break;
|
||||
case FAMILY_NV4:
|
||||
|
|
@ -304,6 +305,44 @@ ADDR_E_RETURNCODE Lib::Create(
|
|||
return returnCode;
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* Lib::GetFormatProperties
|
||||
*
|
||||
* @brief
|
||||
* Returns the properties of the format as specifed in the input.
|
||||
* @return
|
||||
* ADDR_E_RETURNCODE
|
||||
****************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE Lib::GetFormatProperties(
|
||||
const ADDR_FORMAT_PROPERTIES_IN& in,
|
||||
ADDR_FORMAT_PROPERTIES_OUT* pOut
|
||||
) const
|
||||
{
|
||||
ADDR_E_RETURNCODE returnCode = ADDR_OK;
|
||||
|
||||
if (GetFillSizeFieldsFlags() == TRUE)
|
||||
{
|
||||
if ((in.size != sizeof(ADDR_FORMAT_PROPERTIES_IN)) ||
|
||||
(pOut->size != sizeof(ADDR_FORMAT_PROPERTIES_OUT)))
|
||||
{
|
||||
returnCode = ADDR_PARAMSIZEMISMATCH;
|
||||
}
|
||||
}
|
||||
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
pOut->bpp = GetElemLib()->GetBitsPerPixel(in.format,
|
||||
nullptr, // elemMode, unused
|
||||
&pOut->expand.width,
|
||||
&pOut->expand.height,
|
||||
nullptr); // unused bits
|
||||
}
|
||||
|
||||
return returnCode;
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* Lib::SetChipFamily
|
||||
|
|
@ -315,7 +354,7 @@ ADDR_E_RETURNCODE Lib::Create(
|
|||
****************************************************************************************************
|
||||
*/
|
||||
VOID Lib::SetChipFamily(
|
||||
UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
|
||||
UINT_32 uChipFamily, ///< [in] chip family defined in atiid.h
|
||||
UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
|
||||
{
|
||||
ChipFamily family = HwlConvertChipFamily(uChipFamily, uChipRevision);
|
||||
|
|
@ -668,6 +707,47 @@ UINT_32 Lib::GetBpe(AddrFormat format) const
|
|||
return GetElemLib()->GetBitsPerPixel(format);
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* Lib::GetSwizzleModePreferenceRatio
|
||||
*
|
||||
* @brief
|
||||
* Get ratio driving swizzle mode selection heuristic. Ratio is returned as fraction nominator
|
||||
* and denominator
|
||||
* @return
|
||||
* void
|
||||
****************************************************************************************************
|
||||
*/
|
||||
void Lib::GetSwizzleModePreferenceRatio(
|
||||
const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
|
||||
UINT_32* pOutRatioLo,
|
||||
UINT_32* pOutRatioHi
|
||||
) const
|
||||
{
|
||||
const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
|
||||
|
||||
if (computeMinSize)
|
||||
{
|
||||
*pOutRatioLo = 1;
|
||||
*pOutRatioHi = 1;
|
||||
}
|
||||
else if (pIn->flags.opt4space)
|
||||
{
|
||||
*pOutRatioLo = 3;
|
||||
*pOutRatioHi = 2;
|
||||
}
|
||||
else if (pIn->flags.computeMaxSize)
|
||||
{
|
||||
*pOutRatioLo = 1024;
|
||||
*pOutRatioHi = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
*pOutRatioLo = 2;
|
||||
*pOutRatioHi = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* Lib::ComputeOffsetFromSwizzlePattern
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2026 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -300,6 +300,10 @@ public:
|
|||
delete this;
|
||||
}
|
||||
|
||||
ADDR_E_RETURNCODE GetFormatProperties(
|
||||
const ADDR_FORMAT_PROPERTIES_IN& in,
|
||||
ADDR_FORMAT_PROPERTIES_OUT* pOut) const;
|
||||
|
||||
static Lib* GetLib(ADDR_HANDLE hLib);
|
||||
|
||||
/// Returns which version of addrlib functions should be used.
|
||||
|
|
@ -333,6 +337,10 @@ public:
|
|||
|
||||
UINT_32 GetBpe(AddrFormat format) const;
|
||||
|
||||
void GetSwizzleModePreferenceRatio(
|
||||
const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
|
||||
UINT_32* pOutRatioLo,
|
||||
UINT_32* pOutRatioHi) const;
|
||||
|
||||
static UINT_32 ComputeOffsetFromSwizzlePattern(
|
||||
const UINT_64* pPattern,
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2026 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -2473,7 +2473,7 @@ UINT_64 Lib::HwlComputeXmaskAddrFromCoord(
|
|||
//
|
||||
macroTileIndexX = x / macroTileWidth;
|
||||
macroTileIndexY = y / macroTileHeight;
|
||||
macroTileOffset = ((macroTileIndexY * macroTilesPerRow) + macroTileIndexX) * macroTileBytes;
|
||||
macroTileOffset = (static_cast<UINT_64>(macroTileIndexY * macroTilesPerRow) + macroTileIndexX) * macroTileBytes;
|
||||
|
||||
//
|
||||
// Compute the pixel offset within the macro tile.
|
||||
|
|
@ -2675,7 +2675,7 @@ VOID Lib::ComputeSurfaceCoordFromAddrMicroTiled(
|
|||
//
|
||||
sliceBits = static_cast<UINT_64>(pitch) * height * microTileThickness * bpp * numSamples;
|
||||
|
||||
rowBits = (pitch / MicroTileWidth) * microTileBits;
|
||||
rowBits = static_cast<UINT_64>(pitch / MicroTileWidth) * microTileBits;
|
||||
|
||||
//
|
||||
// Extract the slice index.
|
||||
|
|
@ -3559,11 +3559,11 @@ BOOL_32 Lib::DegradeTo1D(
|
|||
if (degrade == FALSE)
|
||||
{
|
||||
// Only check width and height as slices are aligned to thickness
|
||||
UINT_64 unalignedSize = width * height;
|
||||
UINT_64 unalignedSize = static_cast<UINT_64>(width) * height;
|
||||
|
||||
UINT_32 alignedPitch = PowTwoAlign(width, macroTilePitchAlign);
|
||||
UINT_32 alignedHeight = PowTwoAlign(height, macroTileHeightAlign);
|
||||
UINT_64 alignedSize = alignedPitch * alignedHeight;
|
||||
UINT_64 alignedSize = static_cast<UINT_64>(alignedPitch) * alignedHeight;
|
||||
|
||||
// alignedSize > 1.5 * unalignedSize
|
||||
if (2 * alignedSize > 3 * unalignedSize)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2026 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -207,7 +207,7 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceInfo(
|
|||
// Overwrite these parameters if we have a valid format
|
||||
}
|
||||
|
||||
if (localIn.bpp != 0)
|
||||
if (localIn.bpp >= 8)
|
||||
{
|
||||
localIn.width = Max(localIn.width, 1u);
|
||||
localIn.height = Max(localIn.height, 1u);
|
||||
|
|
@ -444,8 +444,8 @@ ADDR_E_RETURNCODE Lib::CopyLinearSurface(
|
|||
void* pMipBase = VoidPtrInc(pIn->pMappedSurface,
|
||||
(pIn->singleSubres ? 0 : mipInfo[pCurRegion->mipId].offset));
|
||||
|
||||
const size_t lineSizeBytes = (localIn.bpp >> 3) * pCurRegion->copyDims.width;
|
||||
const size_t lineImgPitchBytes = (localIn.bpp >> 3) * mipInfo[pCurRegion->mipId].pitch;
|
||||
const size_t lineSizeBytes = (static_cast<size_t>(localIn.bpp) >> 3) * pCurRegion->copyDims.width;
|
||||
const size_t lineImgPitchBytes = (static_cast<size_t>(localIn.bpp) >> 3) * mipInfo[pCurRegion->mipId].pitch;
|
||||
|
||||
for (UINT_32 sliceIdx = 0; sliceIdx < pCurRegion->copyDims.depth; sliceIdx++)
|
||||
{
|
||||
|
|
@ -504,6 +504,11 @@ ADDR_E_RETURNCODE Lib::CopyMemToSurface(
|
|||
{
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
}
|
||||
else if (pIn->copyFlags.blockMemcpy && pIn->copyFlags.hybridMemcpy)
|
||||
{
|
||||
// Invalid to specify conflicting copy modes.
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
}
|
||||
else
|
||||
{
|
||||
UINT_32 baseSlice = pRegions[0].slice;
|
||||
|
|
@ -573,6 +578,11 @@ ADDR_E_RETURNCODE Lib::CopySurfaceToMem(
|
|||
{
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
}
|
||||
else if (pIn->copyFlags.blockMemcpy && pIn->copyFlags.hybridMemcpy)
|
||||
{
|
||||
// Invalid to specify conflicting copy modes.
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
}
|
||||
else
|
||||
{
|
||||
UINT_32 baseSlice = pRegions[0].slice;
|
||||
|
|
@ -1424,7 +1434,7 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoordLinear(
|
|||
{
|
||||
pOut->addr = (localOut.sliceSize * pIn->slice) +
|
||||
mipInfo[pIn->mipId].offset +
|
||||
(pIn->y * mipInfo[pIn->mipId].pitch + pIn->x) * (pIn->bpp >> 3);
|
||||
(static_cast<size_t>(pIn->y) * mipInfo[pIn->mipId].pitch + pIn->x) * (pIn->bpp >> 3);
|
||||
pOut->bitPosition = 0;
|
||||
}
|
||||
else
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2026 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -39,8 +39,6 @@ namespace V3
|
|||
Lib::Lib()
|
||||
:
|
||||
Addr::Lib(),
|
||||
m_pipesLog2(0),
|
||||
m_pipeInterleaveLog2(0),
|
||||
m_numEquations(0)
|
||||
{
|
||||
Init();
|
||||
|
|
@ -59,8 +57,6 @@ Lib::Lib(
|
|||
const Client* pClient)
|
||||
:
|
||||
Addr::Lib(pClient),
|
||||
m_pipesLog2(0),
|
||||
m_pipeInterleaveLog2(0),
|
||||
m_numEquations(0)
|
||||
{
|
||||
Init();
|
||||
|
|
@ -265,7 +261,7 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceInfo(
|
|||
// Overwrite these parameters if we have a valid format
|
||||
}
|
||||
|
||||
if (localIn.bpp != 0)
|
||||
if (localIn.bpp >= 8)
|
||||
{
|
||||
localIn.width = Max(localIn.width, 1u);
|
||||
localIn.height = Max(localIn.height, 1u);
|
||||
|
|
@ -547,8 +543,8 @@ ADDR_E_RETURNCODE Lib::CopyLinearSurface(
|
|||
void* pMipBase = VoidPtrInc(pIn->pMappedSurface,
|
||||
(pIn->singleSubres ? 0 : mipInfo[pCurRegion->mipId].offset));
|
||||
|
||||
const size_t lineSizeBytes = (localIn.bpp >> 3) * pCurRegion->copyDims.width;
|
||||
const size_t lineImgPitchBytes = (localIn.bpp >> 3) * mipInfo[pCurRegion->mipId].pitch;
|
||||
const size_t lineSizeBytes = (static_cast<size_t>(localIn.bpp) >> 3) * pCurRegion->copyDims.width;
|
||||
const size_t lineImgPitchBytes = (static_cast<size_t>(localIn.bpp) >> 3) * mipInfo[pCurRegion->mipId].pitch;
|
||||
|
||||
for (UINT_32 sliceIdx = 0; sliceIdx < pCurRegion->copyDims.depth; sliceIdx++)
|
||||
{
|
||||
|
|
@ -611,6 +607,11 @@ ADDR_E_RETURNCODE Lib::CopyMemToSurface(
|
|||
{
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
}
|
||||
else if (pIn->copyFlags.blockMemcpy && pIn->copyFlags.hybridMemcpy)
|
||||
{
|
||||
// Invalid to specify conflicting copy modes.
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
}
|
||||
else
|
||||
{
|
||||
UINT_32 baseSlice = pRegions[0].slice;
|
||||
|
|
@ -680,6 +681,11 @@ ADDR_E_RETURNCODE Lib::CopySurfaceToMem(
|
|||
{
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
}
|
||||
else if (pIn->copyFlags.blockMemcpy && pIn->copyFlags.hybridMemcpy)
|
||||
{
|
||||
// Invalid to specify conflicting copy modes.
|
||||
returnCode = ADDR_INVALIDPARAMS;
|
||||
}
|
||||
else
|
||||
{
|
||||
UINT_32 baseSlice = pRegions[0].slice;
|
||||
|
|
@ -736,7 +742,7 @@ ADDR_E_RETURNCODE Lib::ComputePipeBankXor(
|
|||
const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn,
|
||||
ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT* pOut)
|
||||
{
|
||||
ADDR_E_RETURNCODE returnCode;
|
||||
ADDR_E_RETURNCODE returnCode = ADDR_OK;
|
||||
|
||||
if ((GetFillSizeFieldsFlags() == TRUE) &&
|
||||
((pIn->size != sizeof(ADDR3_COMPUTE_PIPEBANKXOR_INPUT)) ||
|
||||
|
|
@ -746,7 +752,23 @@ ADDR_E_RETURNCODE Lib::ComputePipeBankXor(
|
|||
}
|
||||
else
|
||||
{
|
||||
returnCode = HwlComputePipeBankXor(pIn, pOut);
|
||||
// The swizzle mode determines how many unused bits there are in the address. We never (ok, rarely...) program
|
||||
// the low eight bits of the address, so the "numSwizzleBits" effectively represents the number of "guaranteed
|
||||
// zero" programmed bits in the address.
|
||||
const UINT_32 numSwizzleBits = GetBlockSizeLog2(pIn->swizzleMode, FALSE) - 8;
|
||||
|
||||
// make sure this configuration supports swizzling
|
||||
if (numSwizzleBits != 0)
|
||||
{
|
||||
// These cases should have been excluded with the "numSwizzleBits" calculation above, but make sure here.
|
||||
ADDR_ASSERT((IsLinear(pIn->swizzleMode) == FALSE) && (IsBlock256b(pIn->swizzleMode) == FALSE));
|
||||
|
||||
pOut->pipeBankXor = pIn->surfIndex % (1 << numSwizzleBits);
|
||||
}
|
||||
else
|
||||
{
|
||||
pOut->pipeBankXor = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return returnCode;
|
||||
|
|
@ -1167,7 +1189,6 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceInfoSanityCheck(
|
|||
return HwlValidateNonSwModeParams(&localIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* Lib::ComputeOffsetFromEquation
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2022-2026 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -44,7 +44,6 @@ struct ADDR3_COORD
|
|||
struct ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT
|
||||
{
|
||||
const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo;
|
||||
void* pvAddrParams;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -155,14 +154,16 @@ protected:
|
|||
Lib(); // Constructor is protected
|
||||
Lib(const Client* pClient);
|
||||
|
||||
UINT_32 m_pipesLog2; ///< Number of pipe per shader engine Log2
|
||||
UINT_32 m_pipeInterleaveLog2; ///< Log2 of pipe interleave bytes
|
||||
|
||||
SwizzleModeFlags m_swizzleModeTable[ADDR3_MAX_TYPE]; ///< Swizzle mode table
|
||||
|
||||
// Number of unique MSAA sample rates (1/2/4/8)
|
||||
static const UINT_32 MaxNumMsaaRates = 4;
|
||||
|
||||
//# These fields exist in the GB_ADDR_CONFIG register; however, the HW does not care about them.
|
||||
//# The HW acts as if the log2(pipes)==5 and log2(pi) == 8, always.
|
||||
static const UINT_32 NumPipesLog2 = 5;
|
||||
static const UINT_32 PipeInterleaveLog2 = 8;
|
||||
|
||||
// Number of equation entries in the table
|
||||
UINT_32 m_numEquations;
|
||||
|
||||
|
|
@ -444,4 +445,4 @@ private:
|
|||
} // V3
|
||||
} // Addr
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -2,7 +2,8 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2024-2026 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
||||
|
|
@ -14,6 +15,7 @@
|
|||
*/
|
||||
|
||||
#include "addrswizzler.h"
|
||||
#include "addrswizzlersimd.h"
|
||||
|
||||
namespace Addr
|
||||
{
|
||||
|
|
@ -98,6 +100,23 @@ void LutAddresser::InitSwizzleProps()
|
|||
m_sLutMask |= m_bit[i].s;
|
||||
}
|
||||
|
||||
// Derive the microblock size from the swizzle equation.
|
||||
UINT_32 xMbMask = 0;
|
||||
UINT_32 yMbMask = 0;
|
||||
UINT_32 zMbMask = 0;
|
||||
for (UINT_32 i = 0; i < 8; i++)
|
||||
{
|
||||
xMbMask |= m_bit[i].x;
|
||||
yMbMask |= m_bit[i].y;
|
||||
zMbMask |= m_bit[i].z;
|
||||
}
|
||||
m_microBlockSize.width = xMbMask + 1;
|
||||
m_microBlockSize.height = yMbMask + 1;
|
||||
m_microBlockSize.depth = zMbMask + 1;
|
||||
ADDR_ASSERT(IsPow2(m_microBlockSize.width));
|
||||
ADDR_ASSERT(IsPow2(m_microBlockSize.height));
|
||||
ADDR_ASSERT(IsPow2(m_microBlockSize.depth));
|
||||
|
||||
// An expandX of 1 is a no-op
|
||||
m_maxExpandX = 1;
|
||||
if (m_sLutMask == 0)
|
||||
|
|
@ -153,7 +172,7 @@ void LutAddresser::InitLuts()
|
|||
m_pYLut = &m_lutData[0];
|
||||
ADDR_ASSERT(m_pYLut[0] == 0);
|
||||
}
|
||||
|
||||
|
||||
if (m_zLutMask != 0)
|
||||
{
|
||||
m_pZLut = &m_lutData[curOffset];
|
||||
|
|
@ -269,82 +288,33 @@ UINT_32 LutAddresser::EvalEquation(
|
|||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* Copy2DSliceUnaligned
|
||||
* CopyRowUnaligned
|
||||
*
|
||||
* @brief
|
||||
* Copies an arbitrary 2D pixel region to or from a surface.
|
||||
* Copies a single row to or from a surface.
|
||||
****************************************************************************************************
|
||||
*/
|
||||
template <int BPELog2, int ExpandX, bool ImgIsDest>
|
||||
void Copy2DSliceUnaligned(
|
||||
void* pImgBlockSliceStart, // Block corresponding to beginning of slice
|
||||
void* pBuf, // Pointer to data starting from the copy origin.
|
||||
size_t bufStrideY, // Stride of each row in pBuf
|
||||
UINT_32 imageBlocksY, // Width of the image slice, in blocks.
|
||||
ADDR_COORD2D origin, // Absolute origin, in elements
|
||||
ADDR_EXTENT2D extent, // Size to copy, in elements
|
||||
UINT_32 sliceXor, // Includes pipeBankXor and z XOR
|
||||
void CopyRowUnaligned(
|
||||
void* pRowImgBlockStart, // Pointer to the image block at x=0
|
||||
void* pBuf, // Pointer to data at x=0
|
||||
UINT_32 xStart, // x value to start at
|
||||
UINT_32 xEnd, // x value to finish at (not inclusive)
|
||||
UINT_32 rowXor, // Value to XOR in for each address (makes up PBX and y/z coords)
|
||||
const LutAddresser& addresser)
|
||||
{
|
||||
UINT_32 xStart = origin.x;
|
||||
UINT_32 xEnd = origin.x + extent.width;
|
||||
|
||||
UINT_32 x = xStart;
|
||||
constexpr UINT_32 PixBytes = (1 << BPELog2);
|
||||
|
||||
// Apply a negative offset now so later code can do eg. pBuf[x] instead of pBuf[x - origin.x]
|
||||
pBuf = VoidPtrDec(pBuf, xStart * PixBytes);
|
||||
|
||||
// Do things one row at a time for unaligned regions.
|
||||
for (UINT_32 y = origin.y; y < (origin.y + extent.height); y++)
|
||||
// Most swizzles pack 2-4 pixels horizontally. Take advantage of this even in non-microblock-aligned
|
||||
// regions to commonly do 2-4x less work. This is still way less good than copying by whole microblocks though.
|
||||
if (ExpandX > 1)
|
||||
{
|
||||
UINT_32 yBlk = (y >> addresser.GetBlockYBits()) * imageBlocksY;
|
||||
UINT_32 rowXor = sliceXor ^ addresser.GetAddressY(y);
|
||||
|
||||
UINT_32 x = xStart;
|
||||
|
||||
// Most swizzles pack 2-4 pixels horizontally. Take advantage of this even in non-microblock-aligned
|
||||
// regions to commonly do 2-4x less work. This is still way less good than copying by whole microblocks though.
|
||||
if (ExpandX > 1)
|
||||
// Unaligned left edge
|
||||
for (; x < Min(xEnd, PowTwoAlign(xStart, ExpandX)); x++)
|
||||
{
|
||||
// Unaligned left edge
|
||||
for (; x < Min(xEnd, PowTwoAlign(xStart, ExpandX)); x++)
|
||||
{
|
||||
UINT_32 blk = (yBlk + (x >> addresser.GetBlockXBits()));
|
||||
void* pImgBlock = VoidPtrInc(pImgBlockSliceStart, blk << addresser.GetBlockBits());
|
||||
void* pPix = VoidPtrInc(pImgBlock, rowXor ^ addresser.GetAddressX(x));
|
||||
if (ImgIsDest)
|
||||
{
|
||||
memcpy(pPix, VoidPtrInc(pBuf, x * PixBytes), PixBytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(VoidPtrInc(pBuf, x * PixBytes), pPix, PixBytes);
|
||||
}
|
||||
}
|
||||
// Aligned middle
|
||||
for (; x < PowTwoAlignDown(xEnd, ExpandX); x += ExpandX)
|
||||
{
|
||||
UINT_32 blk = (yBlk + (x >> addresser.GetBlockXBits()));
|
||||
void* pImgBlock = VoidPtrInc(pImgBlockSliceStart, blk << addresser.GetBlockBits());
|
||||
void* pPix = VoidPtrInc(pImgBlock, rowXor ^ addresser.GetAddressX(x));
|
||||
if (ImgIsDest)
|
||||
{
|
||||
memcpy(pPix, VoidPtrInc(pBuf, x * PixBytes), PixBytes * ExpandX);
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(VoidPtrInc(pBuf, x * PixBytes), pPix, PixBytes * ExpandX);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Unaligned end (or the whole thing when ExpandX == 1)
|
||||
for (; x < xEnd; x++)
|
||||
{
|
||||
// Get the index of the block within the slice
|
||||
UINT_32 blk = (yBlk + (x >> addresser.GetBlockXBits()));
|
||||
// Apply that index to get the base address of the current block.
|
||||
void* pImgBlock = VoidPtrInc(pImgBlockSliceStart, blk << addresser.GetBlockBits());
|
||||
// Grab the x-xor and XOR it all together, adding to get the final address
|
||||
UINT_32 blk = (x >> addresser.GetBlockXBits());
|
||||
void* pImgBlock = VoidPtrInc(pRowImgBlockStart, blk << addresser.GetBlockBits());
|
||||
void* pPix = VoidPtrInc(pImgBlock, rowXor ^ addresser.GetAddressX(x));
|
||||
if (ImgIsDest)
|
||||
{
|
||||
|
|
@ -355,8 +325,478 @@ void Copy2DSliceUnaligned(
|
|||
memcpy(VoidPtrInc(pBuf, x * PixBytes), pPix, PixBytes);
|
||||
}
|
||||
}
|
||||
// Aligned middle
|
||||
for (; x < PowTwoAlignDown(xEnd, ExpandX); x += ExpandX)
|
||||
{
|
||||
UINT_32 blk = (x >> addresser.GetBlockXBits());
|
||||
void* pImgBlock = VoidPtrInc(pRowImgBlockStart, blk << addresser.GetBlockBits());
|
||||
void* pPix = VoidPtrInc(pImgBlock, rowXor ^ addresser.GetAddressX(x));
|
||||
if (ImgIsDest)
|
||||
{
|
||||
memcpy(pPix, VoidPtrInc(pBuf, x * PixBytes), PixBytes * ExpandX);
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(VoidPtrInc(pBuf, x * PixBytes), pPix, PixBytes * ExpandX);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Unaligned end (or the whole thing when ExpandX == 1)
|
||||
for (; x < xEnd; x++)
|
||||
{
|
||||
// Get the index of the block within the slice
|
||||
UINT_32 blk = (x >> addresser.GetBlockXBits());
|
||||
// Apply that index to get the base address of the current block.
|
||||
void* pImgBlock = VoidPtrInc(pRowImgBlockStart, blk << addresser.GetBlockBits());
|
||||
// Grab the x-xor and XOR it all together, adding to get the final address
|
||||
void* pPix = VoidPtrInc(pImgBlock, rowXor ^ addresser.GetAddressX(x));
|
||||
if (ImgIsDest)
|
||||
{
|
||||
memcpy(pPix, VoidPtrInc(pBuf, x * PixBytes), PixBytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(VoidPtrInc(pBuf, x * PixBytes), pPix, PixBytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pBuf = VoidPtrInc(pBuf, bufStrideY);
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* CopyImgUnaligned
|
||||
*
|
||||
* @brief
|
||||
* Copies an arbitrary 3D pixel region to or from a surface.
|
||||
****************************************************************************************************
|
||||
*/
|
||||
template <int BPELog2, int ExpandX, bool ImgIsDest>
|
||||
void CopyImgUnaligned(
|
||||
void* pImgBlockStart, // Block corresponding to beginning of image
|
||||
void* pBuf, // Pointer to data starting from the copy origin.
|
||||
size_t bufStrideY, // Stride of each row in pBuf
|
||||
size_t bufStrideZ, // Stride of each slice in pBuf
|
||||
UINT_32 imageBlocksY, // Width of the image slice, in blocks.
|
||||
UINT_32 imageBlocksZ, // Depth pitch of the image slice, in blocks.
|
||||
ADDR_COORD3D origin, // Absolute origin, in elements
|
||||
ADDR_EXTENT3D extent, // Size to copy, in elements
|
||||
UINT_32 pipeBankXor, // Final value to xor in
|
||||
BOOL_32 isInMipTail, // True if this is in the mip tail.
|
||||
const LutAddresser& addresser)
|
||||
{
|
||||
constexpr UINT_32 PixBytes = (1 << BPELog2);
|
||||
|
||||
// Apply a negative x/y offset now so later code can do eg. pBuf[x] instead of pBuf[x - origin.x]
|
||||
// Keep the z offset.
|
||||
pBuf = VoidPtrDec(pBuf, origin.x * PixBytes);
|
||||
|
||||
void* pSliceBuf = pBuf;
|
||||
// Do things one slice/row at a time for unaligned regions.
|
||||
for (UINT_32 z = origin.z; z < (origin.z + extent.depth); z++)
|
||||
{
|
||||
UINT_32 sliceXor = pipeBankXor ^ addresser.GetAddressZ(z);
|
||||
UINT_32 zBlk = (z >> addresser.GetBlockZBits()) * imageBlocksZ;
|
||||
void* pRowBuf = pSliceBuf;
|
||||
for (UINT_32 y = origin.y; y < (origin.y + extent.height); y++)
|
||||
{
|
||||
UINT_32 yBlk = (y >> addresser.GetBlockYBits()) * imageBlocksY;
|
||||
UINT_32 rowXor = sliceXor ^ addresser.GetAddressY(y);
|
||||
UINT_64 rowOffset = ((zBlk + yBlk) << addresser.GetBlockBits());
|
||||
|
||||
void* pImgBlockRow = VoidPtrInc(pImgBlockStart, rowOffset);
|
||||
|
||||
CopyRowUnaligned<BPELog2, ExpandX, ImgIsDest>(
|
||||
pImgBlockRow,
|
||||
pRowBuf,
|
||||
origin.x,
|
||||
origin.x + extent.width,
|
||||
rowXor,
|
||||
addresser);
|
||||
|
||||
pRowBuf = VoidPtrInc(pRowBuf, bufStrideY);
|
||||
}
|
||||
pSliceBuf = VoidPtrInc(pSliceBuf, bufStrideZ);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* HandleUnalignedRegions
|
||||
*
|
||||
* @brief
|
||||
* Does unaligned copies for any X/Y/Z edges that are not fully aligned, fixing up the
|
||||
* copy region and pointer to point at the aligned region that remains.
|
||||
****************************************************************************************************
|
||||
*/
|
||||
template <int BPELog2, int ExpandX>
|
||||
void HandleUnalignedRegions(
|
||||
void* pImgBlockStart, // Block corresponding to beginning of image
|
||||
void** ppBuf, // Pointer to pointer to data starting from the copy origin.
|
||||
size_t bufStrideY, // Stride of each row in pBuf
|
||||
size_t bufStrideZ, // Stride of each slice in pBuf
|
||||
UINT_32 imageBlocksY, // Width of the image slice, in blocks.
|
||||
UINT_32 imageBlocksZ, // Depth pitch of the image slice, in blocks.
|
||||
ADDR_COORD3D* pOrigin, // Absolute origin, in elements
|
||||
ADDR_EXTENT3D* pExtent, // Size to copy, in elements
|
||||
ADDR_EXTENT3D align, // Size to align on, in elements
|
||||
UINT_32 pipeBankXor, // Final value to xor in
|
||||
BOOL_32 isInMipTail, // True if this is in the mip tail.
|
||||
const LutAddresser& addresser)
|
||||
{
|
||||
constexpr bool ImgIsDest = true;
|
||||
|
||||
// Go through the start/end of the x/y/z extents and copy the parts that aren't aligned.
|
||||
if (pOrigin->x != PowTwoAlign(pOrigin->x, align.width))
|
||||
{
|
||||
UINT_32 xSize = Min(pOrigin->x + pExtent->width, PowTwoAlign(pOrigin->x, align.width)) - pOrigin->x;
|
||||
CopyImgUnaligned<BPELog2, ExpandX, ImgIsDest>(
|
||||
pImgBlockStart,
|
||||
*ppBuf,
|
||||
bufStrideY,
|
||||
bufStrideZ,
|
||||
imageBlocksY,
|
||||
imageBlocksZ,
|
||||
*pOrigin,
|
||||
{ xSize, pExtent->height, pExtent->depth},
|
||||
pipeBankXor,
|
||||
isInMipTail,
|
||||
addresser);
|
||||
pExtent->width -= xSize;
|
||||
pOrigin->x += xSize;
|
||||
*ppBuf = VoidPtrInc(*ppBuf, xSize << BPELog2);
|
||||
}
|
||||
if (pOrigin->y != PowTwoAlign(pOrigin->y, align.height))
|
||||
{
|
||||
UINT_32 ySize = Min(pOrigin->y + pExtent->height, PowTwoAlign(pOrigin->y, align.height)) - pOrigin->y;
|
||||
CopyImgUnaligned<BPELog2, ExpandX, ImgIsDest>(
|
||||
pImgBlockStart,
|
||||
*ppBuf,
|
||||
bufStrideY,
|
||||
bufStrideZ,
|
||||
imageBlocksY,
|
||||
imageBlocksZ,
|
||||
*pOrigin,
|
||||
{ pExtent->width, ySize, pExtent->depth},
|
||||
pipeBankXor,
|
||||
isInMipTail,
|
||||
addresser);
|
||||
pExtent->height -= ySize;
|
||||
pOrigin->y += ySize;
|
||||
*ppBuf = VoidPtrInc(*ppBuf, ySize * bufStrideY);
|
||||
}
|
||||
if (pOrigin->z != PowTwoAlign(pOrigin->z, align.depth))
|
||||
{
|
||||
UINT_32 zSize = Min(pOrigin->z + pExtent->depth, PowTwoAlign(pOrigin->z, align.depth)) - pOrigin->z;
|
||||
CopyImgUnaligned<BPELog2, ExpandX, ImgIsDest>(
|
||||
pImgBlockStart,
|
||||
*ppBuf,
|
||||
bufStrideY,
|
||||
bufStrideZ,
|
||||
imageBlocksY,
|
||||
imageBlocksZ,
|
||||
*pOrigin,
|
||||
{ pExtent->width, pExtent->height, zSize },
|
||||
pipeBankXor,
|
||||
isInMipTail,
|
||||
addresser);
|
||||
pExtent->depth -= zSize;
|
||||
pOrigin->z += zSize;
|
||||
*ppBuf = VoidPtrInc(*ppBuf, zSize * bufStrideZ);
|
||||
}
|
||||
|
||||
// At this point the starts are aligned, so we can care about just size rather than origin+size.
|
||||
if ((pExtent->width) != PowTwoAlignDown(pExtent->width, align.width))
|
||||
{
|
||||
UINT_32 xAlignedSize = PowTwoAlignDown(pOrigin->x + pExtent->width, align.width) - pOrigin->x;
|
||||
void* pBuf = VoidPtrInc(*ppBuf, xAlignedSize << BPELog2);
|
||||
CopyImgUnaligned<BPELog2, ExpandX, ImgIsDest>(
|
||||
pImgBlockStart,
|
||||
pBuf,
|
||||
bufStrideY,
|
||||
bufStrideZ,
|
||||
imageBlocksY,
|
||||
imageBlocksZ,
|
||||
{ pOrigin->x + xAlignedSize, pOrigin->y, pOrigin->z},
|
||||
{ pExtent->width - xAlignedSize, pExtent->height, pExtent->depth },
|
||||
pipeBankXor,
|
||||
isInMipTail,
|
||||
addresser);
|
||||
pExtent->width = xAlignedSize;
|
||||
}
|
||||
|
||||
if ((pExtent->height) != PowTwoAlignDown(pExtent->height, align.height))
|
||||
{
|
||||
UINT_32 yAlignedSize = PowTwoAlignDown(pOrigin->y + pExtent->height, align.height) - pOrigin->y;
|
||||
void* pBuf = VoidPtrInc(*ppBuf, yAlignedSize * bufStrideY);
|
||||
CopyImgUnaligned<BPELog2, ExpandX, ImgIsDest>(
|
||||
pImgBlockStart,
|
||||
pBuf,
|
||||
bufStrideY,
|
||||
bufStrideZ,
|
||||
imageBlocksY,
|
||||
imageBlocksZ,
|
||||
{ pOrigin->x, pOrigin->y + yAlignedSize, pOrigin->z},
|
||||
{ pExtent->width, pExtent->height - yAlignedSize, pExtent->depth },
|
||||
pipeBankXor,
|
||||
isInMipTail,
|
||||
addresser);
|
||||
pExtent->height = yAlignedSize;
|
||||
}
|
||||
|
||||
if ((pExtent->depth) != PowTwoAlignDown(pExtent->depth, align.depth))
|
||||
{
|
||||
UINT_32 zAlignedSize = PowTwoAlignDown(pOrigin->z + pExtent->depth, align.depth) - pOrigin->z;
|
||||
void* pBuf = VoidPtrInc(*ppBuf, zAlignedSize * bufStrideZ);
|
||||
CopyImgUnaligned<BPELog2, ExpandX, ImgIsDest>(
|
||||
pImgBlockStart,
|
||||
pBuf,
|
||||
bufStrideY,
|
||||
bufStrideZ,
|
||||
imageBlocksY,
|
||||
imageBlocksZ,
|
||||
{ pOrigin->x, pOrigin->y, pOrigin->z + zAlignedSize },
|
||||
{ pExtent->width, pExtent->height, pExtent->depth - zAlignedSize },
|
||||
pipeBankXor,
|
||||
isInMipTail,
|
||||
addresser);
|
||||
pExtent->depth = zAlignedSize;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* CopyMemImgHybrid
|
||||
*
|
||||
* @brief
|
||||
* Copies a 3D pixel region to a surface. Uses fast copies for fully covered microblocks.
|
||||
****************************************************************************************************
|
||||
*/
|
||||
template <class MicroSw>
|
||||
AVX2_FUNC NEON_FUNC void CopyMemImgHybrid(
|
||||
void* pImgBlockStart, // Block corresponding to beginning of image
|
||||
void* pBuf, // Pointer to data starting from the copy origin.
|
||||
size_t bufStrideY, // Stride of each row in pBuf
|
||||
size_t bufStrideZ, // Stride of each slice in pBuf
|
||||
UINT_32 imageBlocksY, // Width of the image slice, in blocks.
|
||||
UINT_32 imageBlocksZ, // Depth pitch of the image slice, in blocks.
|
||||
ADDR_COORD3D origin, // Absolute origin, in elements
|
||||
ADDR_EXTENT3D extent, // Size to copy, in elements
|
||||
UINT_32 pipeBankXor, // Final value to xor in
|
||||
BOOL_32 isInMipTail, // True if this is in the mip tail.
|
||||
const LutAddresser& addresser)
|
||||
{
|
||||
// Handle unaligned edges in x/y/z and fixup the extents to match.
|
||||
HandleUnalignedRegions<MicroSw::BpeLog2, MicroSw::ExpandX>(
|
||||
pImgBlockStart,
|
||||
&pBuf,
|
||||
bufStrideY,
|
||||
bufStrideZ,
|
||||
imageBlocksY,
|
||||
imageBlocksZ,
|
||||
&origin,
|
||||
&extent,
|
||||
MicroSw::MicroBlockExtent,
|
||||
pipeBankXor,
|
||||
isInMipTail,
|
||||
addresser
|
||||
);
|
||||
|
||||
// Apply a negative x/y offset now so later code can do eg. pBuf[x] instead of pBuf[x - origin.x]
|
||||
// Keep the z offset.
|
||||
pBuf = VoidPtrDec(pBuf, origin.x << MicroSw::BpeLog2);
|
||||
|
||||
void* pSliceBuf = pBuf;
|
||||
// Do things one slice/row at a time for unaligned regions.
|
||||
for (UINT_32 z = origin.z; z < (origin.z + extent.depth); z += MicroSw::MicroBlockExtent.depth)
|
||||
{
|
||||
UINT_32 sliceXor = pipeBankXor ^ addresser.GetAddressZ(z);
|
||||
UINT_32 zBlk = (z >> addresser.GetBlockZBits()) * imageBlocksZ;
|
||||
void* pRowBuf = pSliceBuf;
|
||||
for (UINT_32 y = origin.y; y < (origin.y + extent.height); y += MicroSw::MicroBlockExtent.height)
|
||||
{
|
||||
UINT_32 yBlk = ((y >> addresser.GetBlockYBits()) * imageBlocksY) + zBlk;
|
||||
UINT_32 rowXor = sliceXor ^ addresser.GetAddressY(y);
|
||||
|
||||
for (UINT_32 x = origin.x; x < (origin.x + extent.width); x += MicroSw::MicroBlockExtent.width)
|
||||
{
|
||||
UINT_32 xBlk = (x >> addresser.GetBlockXBits()) + yBlk;
|
||||
UINT_64 offset = (xBlk << addresser.GetBlockBits());
|
||||
offset ^= rowXor;
|
||||
offset ^= addresser.GetAddressX(x);
|
||||
|
||||
void* pPix = VoidPtrInc(pImgBlockStart, offset);
|
||||
void* pPixBuf = VoidPtrInc(pRowBuf, x << MicroSw::BpeLog2);
|
||||
|
||||
MicroSw::CopyMicroBlock(
|
||||
pPix,
|
||||
pPixBuf,
|
||||
bufStrideY,
|
||||
bufStrideZ
|
||||
);
|
||||
}
|
||||
pRowBuf = VoidPtrInc(pRowBuf, bufStrideY * MicroSw::MicroBlockExtent.height);
|
||||
}
|
||||
pSliceBuf = VoidPtrInc(pSliceBuf, bufStrideZ * MicroSw::MicroBlockExtent.depth);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* CopyMemImgMicroblocks
|
||||
*
|
||||
* @brief
|
||||
* Copies the microblocks of a 3D pixel region to/from a surface.
|
||||
****************************************************************************************************
|
||||
*/
|
||||
template <bool ImgIsDest, bool NonTemporal>
|
||||
AVX2_FUNC NEON_FUNC void CopyMemImgMicroblocks(
|
||||
void* pImgBlockStart, // Block corresponding to beginning of image
|
||||
void* pBuf, // Pointer to data starting from the copy origin.
|
||||
size_t bufStrideY, // Stride of each row in pBuf, ignored.
|
||||
size_t bufStrideZ, // Stride of each slice in pBuf, ignored.
|
||||
UINT_32 imageBlocksY, // Width of the image slice, in blocks.
|
||||
UINT_32 imageBlocksZ, // Depth pitch of the image slice, in blocks.
|
||||
ADDR_COORD3D origin, // Absolute origin, in elements
|
||||
ADDR_EXTENT3D extent, // Size to copy, in elements
|
||||
UINT_32 pipeBankXor, // Final value to xor in
|
||||
BOOL_32 isInMipTail, // True if this is in the mip tail.
|
||||
const LutAddresser& addresser)
|
||||
{
|
||||
// Pad out our dims to microblock boundaries.
|
||||
origin.x = PowTwoAlignDown(origin.x, addresser.GetMicroBlockX());
|
||||
origin.y = PowTwoAlignDown(origin.y, addresser.GetMicroBlockY());
|
||||
origin.z = PowTwoAlignDown(origin.z, addresser.GetMicroBlockZ());
|
||||
extent.width = PowTwoAlign(extent.width, addresser.GetMicroBlockX());
|
||||
extent.height = PowTwoAlign(extent.height, addresser.GetMicroBlockY());
|
||||
extent.depth = PowTwoAlign(extent.depth, addresser.GetMicroBlockZ());
|
||||
|
||||
// Calculate the address of the first pixel in each microblock (256B), then copy it.
|
||||
for (UINT_32 z = origin.z; z < (origin.z + extent.depth); z += addresser.GetMicroBlockZ())
|
||||
{
|
||||
UINT_32 sliceXor = pipeBankXor ^ addresser.GetAddressZ(z);
|
||||
UINT_32 zBlk = (z >> addresser.GetBlockZBits()) * imageBlocksZ;
|
||||
for (UINT_32 y = origin.y; y < (origin.y + extent.height); y += addresser.GetMicroBlockY())
|
||||
{
|
||||
UINT_32 yBlk = ((y >> addresser.GetBlockYBits()) * imageBlocksY) + zBlk;
|
||||
UINT_32 rowXor = sliceXor ^ addresser.GetAddressY(y);
|
||||
|
||||
for (UINT_32 x = origin.x; x < (origin.x + extent.width); x += addresser.GetMicroBlockX())
|
||||
{
|
||||
UINT_32 xBlk = (x >> addresser.GetBlockXBits()) + yBlk;
|
||||
UINT_64 offset = (xBlk << addresser.GetBlockBits());
|
||||
offset ^= rowXor;
|
||||
offset ^= addresser.GetAddressX(x);
|
||||
|
||||
void* pPix = VoidPtrInc(pImgBlockStart, offset);
|
||||
constexpr UINT_32 CopySize = 1 << 8;
|
||||
|
||||
#if ADDR_HAS_AVX2
|
||||
if (NonTemporal && ImgIsDest)
|
||||
{
|
||||
StreamCopyToImgAligned(pPix, pBuf, CopySize);
|
||||
}
|
||||
else if (NonTemporal)
|
||||
{
|
||||
StreamCopyFromImgAligned(pBuf, pPix, CopySize);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
if (ImgIsDest)
|
||||
{
|
||||
memcpy(pPix, pBuf, CopySize);
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(pBuf, pPix, CopySize);
|
||||
}
|
||||
pBuf = VoidPtrInc(pBuf, CopySize);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* CopyMemImgBlocks
|
||||
*
|
||||
* @brief
|
||||
* Copies the blocks of a 3D pixel region to/from a surface.
|
||||
****************************************************************************************************
|
||||
*/
|
||||
template <bool ImgIsDest, bool NonTemporal>
|
||||
AVX2_FUNC NEON_FUNC void CopyMemImgBlocks(
|
||||
void* pImgBlockStart, // Block corresponding to beginning of image
|
||||
void* pBuf, // Pointer to data starting from the copy origin.
|
||||
size_t bufStrideY, // Stride of each row in pBuf, ignored.
|
||||
size_t bufStrideZ, // Stride of each slice in pBuf, ignored.
|
||||
UINT_32 imageBlocksY, // Width of the image slice, in blocks.
|
||||
UINT_32 imageBlocksZ, // Depth pitch of the image slice, in blocks.
|
||||
ADDR_COORD3D origin, // Absolute origin, in elements
|
||||
ADDR_EXTENT3D extent, // Size to copy, in elements
|
||||
UINT_32 pipeBankXor, // Final value to xor in
|
||||
BOOL_32 isInMipTail, // True if this is in the mip tail.
|
||||
const LutAddresser& addresser)
|
||||
{
|
||||
if (isInMipTail)
|
||||
{
|
||||
return CopyMemImgMicroblocks<ImgIsDest, NonTemporal>(
|
||||
pImgBlockStart,
|
||||
pBuf,
|
||||
bufStrideY,
|
||||
bufStrideZ,
|
||||
imageBlocksY,
|
||||
imageBlocksZ,
|
||||
origin,
|
||||
extent,
|
||||
pipeBankXor,
|
||||
isInMipTail,
|
||||
addresser
|
||||
);
|
||||
}
|
||||
|
||||
// Pad out our dims to block boundaries.
|
||||
origin.x = PowTwoAlignDown(origin.x, addresser.GetBlockX());
|
||||
origin.y = PowTwoAlignDown(origin.y, addresser.GetBlockY());
|
||||
origin.z = PowTwoAlignDown(origin.z, addresser.GetBlockZ());
|
||||
extent.width = PowTwoAlign(extent.width, addresser.GetBlockX());
|
||||
extent.height = PowTwoAlign(extent.height, addresser.GetBlockY());
|
||||
extent.depth = PowTwoAlign(extent.depth, addresser.GetBlockZ());
|
||||
|
||||
// Copy block by block. No complex swizzling here, everything is in (strided) typewriter order.
|
||||
for (UINT_32 z = origin.z; z < (origin.z + extent.depth); z += addresser.GetBlockZ())
|
||||
{
|
||||
UINT_32 zBlk = (z >> addresser.GetBlockZBits()) * imageBlocksZ;
|
||||
for (UINT_32 y = origin.y; y < (origin.y + extent.height); y += addresser.GetBlockY())
|
||||
{
|
||||
UINT_32 yBlk = ((y >> addresser.GetBlockYBits()) * imageBlocksY) + zBlk;
|
||||
UINT_32 xBlkStart = (origin.x >> addresser.GetBlockXBits()) + yBlk;
|
||||
UINT_32 numXBlk = extent.width >> addresser.GetBlockXBits();
|
||||
UINT_64 offset = (xBlkStart << addresser.GetBlockBits());
|
||||
|
||||
void* pPix = VoidPtrInc(pImgBlockStart, offset);
|
||||
UINT_32 copySize = numXBlk << addresser.GetBlockBits();
|
||||
|
||||
#if ADDR_HAS_AVX2
|
||||
if (NonTemporal && ImgIsDest)
|
||||
{
|
||||
StreamCopyToImgAligned(pPix, pBuf, copySize);
|
||||
}
|
||||
else if (NonTemporal)
|
||||
{
|
||||
StreamCopyFromImgAligned(pBuf, pPix, copySize);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
if (ImgIsDest)
|
||||
{
|
||||
memcpy(pPix, pBuf, copySize);
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(pBuf, pPix, copySize);
|
||||
}
|
||||
pBuf = VoidPtrInc(pBuf, copySize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -368,33 +808,130 @@ void Copy2DSliceUnaligned(
|
|||
* Determines and returns which copy function to use for copying to images
|
||||
****************************************************************************************************
|
||||
*/
|
||||
UnalignedCopyMemImgFunc LutAddresser::GetCopyMemImgFunc() const
|
||||
UnalignedCopyMemImgFunc LutAddresser::GetCopyMemImgFunc(
|
||||
ADDR_COPY_FLAGS flags
|
||||
) const
|
||||
{
|
||||
UnalignedCopyMemImgFunc pfnRet = nullptr;
|
||||
// This key encodes how the bottom 8 bits (256B) are formed, so we can match to the correct optimized
|
||||
// swizzle function (they are all swizzle-agnostic beyond those 256B).
|
||||
UINT_64 microSwKey = GetMicroSwKey(reinterpret_cast<const UINT_64*>(&m_bit[0]));
|
||||
|
||||
if (flags.blockMemcpy)
|
||||
{
|
||||
#if ADDR_HAS_AVX2
|
||||
if (CpuSupportsAvx2())
|
||||
{
|
||||
pfnRet = CopyMemImgBlocks<true, true>;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
pfnRet = CopyMemImgBlocks<true, false>;
|
||||
}
|
||||
}
|
||||
|
||||
if ((pfnRet == nullptr) && flags.hybridMemcpy)
|
||||
{
|
||||
#if ADDR_HAS_AVX2
|
||||
if (CpuSupportsAvx2())
|
||||
{
|
||||
pfnRet = CopyMemImgMicroblocks<true, true>;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
pfnRet = CopyMemImgMicroblocks<true, false>;
|
||||
}
|
||||
}
|
||||
|
||||
// If this is one of the known microswizzles and CPU support is present, use a hybrid copy that does
|
||||
// SIMD swizzling for aligned regions and falls back for unaligned edges.
|
||||
#if ADDR_HAS_AVX2
|
||||
static constexpr struct {
|
||||
UINT_64 microSwKey;
|
||||
UnalignedCopyMemImgFunc pfn;
|
||||
} AvxFuncs[] = {
|
||||
{ GetMicroSwKey(MicroSw_2D_1BPE_AVX2::MicroEq), CopyMemImgHybrid<MicroSw_2D_1BPE_AVX2>},
|
||||
{ GetMicroSwKey(MicroSw_2D_2BPE_AVX2::MicroEq), CopyMemImgHybrid<MicroSw_2D_2BPE_AVX2>},
|
||||
{ GetMicroSwKey(MicroSw_2D_4BPE_AVX2::MicroEq), CopyMemImgHybrid<MicroSw_2D_4BPE_AVX2>},
|
||||
{ GetMicroSwKey(MicroSw_2D_8BPE_AVX2::MicroEq), CopyMemImgHybrid<MicroSw_2D_8BPE_AVX2>},
|
||||
{ GetMicroSwKey(MicroSw_2D_16BPE_AVX2::MicroEq), CopyMemImgHybrid<MicroSw_2D_16BPE_AVX2>},
|
||||
{ GetMicroSwKey(MicroSw_3D_1BPE_AVX2::MicroEq), CopyMemImgHybrid<MicroSw_3D_1BPE_AVX2>},
|
||||
{ GetMicroSwKey(MicroSw_3D_2BPE_AVX2::MicroEq), CopyMemImgHybrid<MicroSw_3D_2BPE_AVX2>},
|
||||
{ GetMicroSwKey(MicroSw_3D_4BPE_AVX2::MicroEq), CopyMemImgHybrid<MicroSw_3D_4BPE_AVX2>},
|
||||
{ GetMicroSwKey(MicroSw_3D_8BPE_AVX2::MicroEq), CopyMemImgHybrid<MicroSw_3D_8BPE_AVX2>},
|
||||
{ GetMicroSwKey(MicroSw_3D_16BPE_AVX2::MicroEq), CopyMemImgHybrid<MicroSw_3D_16BPE_AVX2>},
|
||||
{ GetMicroSwKey(MicroSw_R_1BPE_AVX2::MicroEq), CopyMemImgHybrid<MicroSw_R_1BPE_AVX2>},
|
||||
{ GetMicroSwKey(MicroSw_R_2BPE_AVX2::MicroEq), CopyMemImgHybrid<MicroSw_R_2BPE_AVX2>},
|
||||
{ GetMicroSwKey(MicroSw_R_4BPE_AVX2::MicroEq), CopyMemImgHybrid<MicroSw_R_4BPE_AVX2>},
|
||||
{ GetMicroSwKey(MicroSw_Z_1BPE_AVX2::MicroEq), CopyMemImgHybrid<MicroSw_Z_1BPE_AVX2>},
|
||||
{ GetMicroSwKey(MicroSw_D_1BPE_AVX2::MicroEq), CopyMemImgHybrid<MicroSw_D_1BPE_AVX2>}
|
||||
};
|
||||
if ((pfnRet == nullptr) && CpuSupportsAvx2())
|
||||
{
|
||||
for (const auto& func : AvxFuncs)
|
||||
{
|
||||
if (func.microSwKey == microSwKey)
|
||||
{
|
||||
pfnRet = func.pfn;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // ADDR_HAS_AVX2
|
||||
|
||||
#if ADDR_HAS_NEON
|
||||
static constexpr struct {
|
||||
UINT_64 microSwKey;
|
||||
UnalignedCopyMemImgFunc pfn;
|
||||
} NeonFuncs[] = {
|
||||
{ GetMicroSwKey(MicroSw_2D_1BPE_NEON::MicroEq), CopyMemImgHybrid<MicroSw_2D_1BPE_NEON>},
|
||||
{ GetMicroSwKey(MicroSw_2D_2BPE_NEON::MicroEq), CopyMemImgHybrid<MicroSw_2D_2BPE_NEON>},
|
||||
{ GetMicroSwKey(MicroSw_2D_4BPE_NEON::MicroEq), CopyMemImgHybrid<MicroSw_2D_4BPE_NEON>},
|
||||
{ GetMicroSwKey(MicroSw_2D_8BPE_NEON::MicroEq), CopyMemImgHybrid<MicroSw_2D_8BPE_NEON>},
|
||||
{ GetMicroSwKey(MicroSw_2D_16BPE_NEON::MicroEq), CopyMemImgHybrid<MicroSw_2D_16BPE_NEON>},
|
||||
{ GetMicroSwKey(MicroSw_3D_1BPE_NEON::MicroEq), CopyMemImgHybrid<MicroSw_3D_1BPE_NEON>},
|
||||
{ GetMicroSwKey(MicroSw_3D_2BPE_NEON::MicroEq), CopyMemImgHybrid<MicroSw_3D_2BPE_NEON>},
|
||||
{ GetMicroSwKey(MicroSw_3D_4BPE_NEON::MicroEq), CopyMemImgHybrid<MicroSw_3D_4BPE_NEON>},
|
||||
{ GetMicroSwKey(MicroSw_3D_8BPE_NEON::MicroEq), CopyMemImgHybrid<MicroSw_3D_8BPE_NEON>},
|
||||
{ GetMicroSwKey(MicroSw_3D_16BPE_NEON::MicroEq), CopyMemImgHybrid<MicroSw_3D_16BPE_NEON>},
|
||||
{ GetMicroSwKey(MicroSw_R_1BPE_NEON::MicroEq), CopyMemImgHybrid<MicroSw_R_1BPE_NEON>},
|
||||
{ GetMicroSwKey(MicroSw_R_2BPE_NEON::MicroEq), CopyMemImgHybrid<MicroSw_R_2BPE_NEON>},
|
||||
{ GetMicroSwKey(MicroSw_R_4BPE_NEON::MicroEq), CopyMemImgHybrid<MicroSw_R_4BPE_NEON>},
|
||||
{ GetMicroSwKey(MicroSw_Z_1BPE_NEON::MicroEq), CopyMemImgHybrid<MicroSw_Z_1BPE_NEON>},
|
||||
{ GetMicroSwKey(MicroSw_D_1BPE_NEON::MicroEq), CopyMemImgHybrid<MicroSw_D_1BPE_NEON>}
|
||||
};
|
||||
if ((pfnRet == nullptr) && CpuSupportsNeon())
|
||||
{
|
||||
for (const auto& func : NeonFuncs)
|
||||
{
|
||||
if (func.microSwKey == microSwKey)
|
||||
{
|
||||
pfnRet = func.pfn;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // ADDR_HAS_NEON
|
||||
|
||||
// While these are all the same function, the codegen gets really bad if the size of each pixel
|
||||
// is not known at compile time. Hence, templates.
|
||||
const UnalignedCopyMemImgFunc Funcs[MaxElementBytesLog2][3] =
|
||||
{
|
||||
// ExpandX = 1, 2, 4
|
||||
{ Copy2DSliceUnaligned<0, 1, true>, Copy2DSliceUnaligned<0, 2, true>, Copy2DSliceUnaligned<0, 4, true> }, // 1BPE
|
||||
{ Copy2DSliceUnaligned<1, 1, true>, Copy2DSliceUnaligned<1, 2, true>, Copy2DSliceUnaligned<1, 4, true> }, // 2BPE
|
||||
{ Copy2DSliceUnaligned<2, 1, true>, Copy2DSliceUnaligned<2, 2, true>, Copy2DSliceUnaligned<2, 4, true> }, // 4BPE
|
||||
{ Copy2DSliceUnaligned<3, 1, true>, Copy2DSliceUnaligned<3, 2, true>, Copy2DSliceUnaligned<3, 4, true> }, // 8BPE
|
||||
{ Copy2DSliceUnaligned<4, 1, true>, Copy2DSliceUnaligned<4, 2, true>, Copy2DSliceUnaligned<4, 4, true> }, // 16BPE
|
||||
{ CopyImgUnaligned<0, 1, true>, CopyImgUnaligned<0, 2, true>, CopyImgUnaligned<0, 4, true> }, // 1BPE
|
||||
{ CopyImgUnaligned<1, 1, true>, CopyImgUnaligned<1, 2, true>, CopyImgUnaligned<1, 4, true> }, // 2BPE
|
||||
{ CopyImgUnaligned<2, 1, true>, CopyImgUnaligned<2, 2, true>, CopyImgUnaligned<2, 4, true> }, // 4BPE
|
||||
{ CopyImgUnaligned<3, 1, true>, CopyImgUnaligned<3, 2, true>, CopyImgUnaligned<3, 4, true> }, // 8BPE
|
||||
{ CopyImgUnaligned<4, 1, true>, CopyImgUnaligned<4, 2, true>, CopyImgUnaligned<4, 4, true> }, // 16BPE
|
||||
};
|
||||
|
||||
UnalignedCopyMemImgFunc pfnRet = nullptr;
|
||||
ADDR_ASSERT(m_bpeLog2 < MaxElementBytesLog2);
|
||||
if (m_maxExpandX >= 4)
|
||||
// Fallback functions
|
||||
if (pfnRet == nullptr)
|
||||
{
|
||||
pfnRet = Funcs[m_bpeLog2][2];
|
||||
}
|
||||
else if (m_maxExpandX >= 2)
|
||||
{
|
||||
pfnRet = Funcs[m_bpeLog2][1];
|
||||
}
|
||||
else
|
||||
{
|
||||
pfnRet = Funcs[m_bpeLog2][0];
|
||||
ADDR_ASSERT(m_bpeLog2 < MaxElementBytesLog2);
|
||||
pfnRet = Funcs[m_bpeLog2][Min(2U, Log2(m_maxExpandX))];
|
||||
}
|
||||
return pfnRet;
|
||||
}
|
||||
|
|
@ -407,35 +944,139 @@ UnalignedCopyMemImgFunc LutAddresser::GetCopyMemImgFunc() const
|
|||
* Determines and returns which copy function to use for copying from images
|
||||
****************************************************************************************************
|
||||
*/
|
||||
UnalignedCopyMemImgFunc LutAddresser::GetCopyImgMemFunc() const
|
||||
UnalignedCopyMemImgFunc LutAddresser::GetCopyImgMemFunc(
|
||||
ADDR_COPY_FLAGS flags
|
||||
) const
|
||||
{
|
||||
UnalignedCopyMemImgFunc pfnRet = nullptr;
|
||||
if (flags.blockMemcpy)
|
||||
{
|
||||
#if ADDR_HAS_AVX2
|
||||
if (CpuSupportsAvx2())
|
||||
{
|
||||
pfnRet = CopyMemImgBlocks<false, true>;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
pfnRet = CopyMemImgBlocks<false, false>;
|
||||
}
|
||||
}
|
||||
|
||||
if ((pfnRet == nullptr) && flags.hybridMemcpy)
|
||||
{
|
||||
#if ADDR_HAS_AVX2
|
||||
if (CpuSupportsAvx2())
|
||||
{
|
||||
pfnRet = CopyMemImgMicroblocks<false, true>;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
pfnRet = CopyMemImgMicroblocks<false, false>;
|
||||
}
|
||||
}
|
||||
// While these are all the same function, the codegen gets really bad if the size of each pixel
|
||||
// is not known at compile time. Hence, templates.
|
||||
const UnalignedCopyMemImgFunc Funcs[MaxElementBytesLog2][3] =
|
||||
{
|
||||
// ExpandX = 1, 2, 4
|
||||
{ Copy2DSliceUnaligned<0, 1, false>, Copy2DSliceUnaligned<0, 2, false>, Copy2DSliceUnaligned<0, 4, false> }, // 1BPE
|
||||
{ Copy2DSliceUnaligned<1, 1, false>, Copy2DSliceUnaligned<1, 2, false>, Copy2DSliceUnaligned<1, 4, false> }, // 2BPE
|
||||
{ Copy2DSliceUnaligned<2, 1, false>, Copy2DSliceUnaligned<2, 2, false>, Copy2DSliceUnaligned<2, 4, false> }, // 4BPE
|
||||
{ Copy2DSliceUnaligned<3, 1, false>, Copy2DSliceUnaligned<3, 2, false>, Copy2DSliceUnaligned<3, 4, false> }, // 8BPE
|
||||
{ Copy2DSliceUnaligned<4, 1, false>, Copy2DSliceUnaligned<4, 2, false>, Copy2DSliceUnaligned<4, 4, false> }, // 16BPE
|
||||
{ CopyImgUnaligned<0, 1, false>, CopyImgUnaligned<0, 2, false>, CopyImgUnaligned<0, 4, false> }, // 1BPE
|
||||
{ CopyImgUnaligned<1, 1, false>, CopyImgUnaligned<1, 2, false>, CopyImgUnaligned<1, 4, false> }, // 2BPE
|
||||
{ CopyImgUnaligned<2, 1, false>, CopyImgUnaligned<2, 2, false>, CopyImgUnaligned<2, 4, false> }, // 4BPE
|
||||
{ CopyImgUnaligned<3, 1, false>, CopyImgUnaligned<3, 2, false>, CopyImgUnaligned<3, 4, false> }, // 8BPE
|
||||
{ CopyImgUnaligned<4, 1, false>, CopyImgUnaligned<4, 2, false>, CopyImgUnaligned<4, 4, false> }, // 16BPE
|
||||
};
|
||||
|
||||
UnalignedCopyMemImgFunc pfnRet = nullptr;
|
||||
ADDR_ASSERT(m_bpeLog2 < MaxElementBytesLog2);
|
||||
if (m_maxExpandX >= 4)
|
||||
if (pfnRet == nullptr)
|
||||
{
|
||||
pfnRet = Funcs[m_bpeLog2][2];
|
||||
}
|
||||
else if (m_maxExpandX >= 2)
|
||||
{
|
||||
pfnRet = Funcs[m_bpeLog2][1];
|
||||
}
|
||||
else
|
||||
{
|
||||
pfnRet = Funcs[m_bpeLog2][0];
|
||||
pfnRet = Funcs[m_bpeLog2][Min(2U, Log2(m_maxExpandX))];
|
||||
}
|
||||
return pfnRet;
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* LutAddresser::DoCopyImgMemPreFlushes
|
||||
*
|
||||
* @brief
|
||||
* Does any flushes required for nontemporal SIMD instructions to access the image memory.
|
||||
****************************************************************************************************
|
||||
*/
|
||||
void LutAddresser::DoCopyImgMemPreFlushes(
|
||||
ADDR_COPY_FLAGS flags
|
||||
) const
|
||||
{
|
||||
#if ADDR_HAS_AVX2
|
||||
if ((flags.blockMemcpy || flags.hybridMemcpy) && CpuSupportsAvx2())
|
||||
{
|
||||
// Loads are weakly ordered, and we need to ensure they start after the previous copy
|
||||
NonTemporalLoadStoreFence();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
****************************************************************************************************
|
||||
* LutAddresser::DoCopyMemImgPostFlushes
|
||||
*
|
||||
* @brief
|
||||
* Does any flushes required for nontemporal SIMD instructions to access the image memory.
|
||||
****************************************************************************************************
|
||||
*/
|
||||
void LutAddresser::DoCopyMemImgPostFlushes(
|
||||
ADDR_COPY_FLAGS flags
|
||||
) const
|
||||
{
|
||||
#if ADDR_HAS_AVX2
|
||||
if (CpuSupportsAvx2())
|
||||
{
|
||||
// Stores are weakly ordered, and we need to ensure they finish before the next submission
|
||||
// or copy.
|
||||
NonTemporalStoreFence();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#if __cplusplus < 201703L
|
||||
// Constexpr arrays need an additional definition at namespace scope until c++17
|
||||
#if ADDR_HAS_AVX2
|
||||
constexpr ADDR_EXTENT3D MicroSw_2D_1BPE_AVX2::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_2D_2BPE_AVX2::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_2D_4BPE_AVX2::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_2D_8BPE_AVX2::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_2D_16BPE_AVX2::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_3D_1BPE_AVX2::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_3D_2BPE_AVX2::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_3D_4BPE_AVX2::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_3D_8BPE_AVX2::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_3D_16BPE_AVX2::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_R_1BPE_AVX2::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_R_2BPE_AVX2::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_R_4BPE_AVX2::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_Z_1BPE_AVX2::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_D_1BPE_AVX2::MicroBlockExtent;
|
||||
#endif
|
||||
#if ADDR_HAS_NEON
|
||||
constexpr ADDR_EXTENT3D MicroSw_2D_1BPE_NEON::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_2D_2BPE_NEON::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_2D_4BPE_NEON::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_2D_8BPE_NEON::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_2D_16BPE_NEON::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_3D_1BPE_NEON::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_3D_2BPE_NEON::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_3D_4BPE_NEON::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_3D_8BPE_NEON::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_3D_16BPE_NEON::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_R_1BPE_NEON::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_R_2BPE_NEON::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_R_4BPE_NEON::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_Z_1BPE_NEON::MicroBlockExtent;
|
||||
constexpr ADDR_EXTENT3D MicroSw_D_1BPE_NEON::MicroBlockExtent;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2024-2026 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
/**
|
||||
|
|
@ -26,10 +27,13 @@ typedef void (*UnalignedCopyMemImgFunc)(
|
|||
void* pImgBlockSliceStart, // Block corresponding to beginning of slice
|
||||
void* pBuf, // Pointer to data starting from the copy origin.
|
||||
size_t bufStrideY, // Stride of each row in pBuf
|
||||
size_t bufStrideZ, // Stride of each slice in pBuf
|
||||
UINT_32 imageBlocksY, // Width of the image slice, in blocks.
|
||||
ADDR_COORD2D origin, // Absolute origin, in elements
|
||||
ADDR_EXTENT2D extent, // Size to copy, in elements
|
||||
UINT_32 sliceXor, // Includes pipeBankXor and z XOR
|
||||
UINT_32 imageBlocksZ, // Depth pitch of the image slice, in blocks.
|
||||
ADDR_COORD3D origin, // Absolute origin, in elements
|
||||
ADDR_EXTENT3D extent, // Size to copy, in elements
|
||||
UINT_32 pipeBankXor, // Final value to XOR into the address
|
||||
BOOL_32 isInMipTail, // True if this is in the mip tail.
|
||||
const LutAddresser& addresser);
|
||||
|
||||
// This class calculates and holds up to four lookup tables (x/y/z/s) which can be used to cheaply calculate the
|
||||
|
|
@ -60,10 +64,21 @@ public:
|
|||
|
||||
// Get the block size
|
||||
UINT_32 GetBlockBits() const { return m_blockBits; }
|
||||
UINT_32 GetBlockX() const { return m_blockSize.width; }
|
||||
UINT_32 GetBlockY() const { return m_blockSize.height; }
|
||||
UINT_32 GetBlockZ() const { return m_blockSize.depth; }
|
||||
UINT_32 GetBlockXBits() const { return Log2(m_blockSize.width); }
|
||||
UINT_32 GetBlockYBits() const { return Log2(m_blockSize.height); }
|
||||
UINT_32 GetBlockZBits() const { return Log2(m_blockSize.depth); }
|
||||
|
||||
// Get the microblock size
|
||||
UINT_32 GetMicroBlockX() const { return m_microBlockSize.width; }
|
||||
UINT_32 GetMicroBlockY() const { return m_microBlockSize.height; }
|
||||
UINT_32 GetMicroBlockZ() const { return m_microBlockSize.depth; }
|
||||
|
||||
// Get other image props
|
||||
UINT_32 GetBpeLog2() const { return m_bpeLog2; }
|
||||
|
||||
// "Fast single channel" functions to get the part that each channel contributes to be XORd together.
|
||||
UINT_32 GetAddressX(UINT_32 x) const { return m_pXLut[x & m_xLutMask];}
|
||||
UINT_32 GetAddressY(UINT_32 y) const { return m_pYLut[y & m_yLutMask];}
|
||||
|
|
@ -71,8 +86,11 @@ public:
|
|||
UINT_32 GetAddressS(UINT_32 s) const { return m_pSLut[s & m_sLutMask];}
|
||||
|
||||
// Get a function that can copy a single 2D slice of an image with this swizzle.
|
||||
UnalignedCopyMemImgFunc GetCopyMemImgFunc() const;
|
||||
UnalignedCopyMemImgFunc GetCopyImgMemFunc() const;
|
||||
UnalignedCopyMemImgFunc GetCopyMemImgFunc(ADDR_COPY_FLAGS flags) const;
|
||||
UnalignedCopyMemImgFunc GetCopyImgMemFunc(ADDR_COPY_FLAGS flags) const;
|
||||
|
||||
void DoCopyMemImgPostFlushes(ADDR_COPY_FLAGS flags) const;
|
||||
void DoCopyImgMemPreFlushes(ADDR_COPY_FLAGS flags) const;
|
||||
private:
|
||||
// Calculate general properties of the swizzle equations
|
||||
void InitSwizzleProps();
|
||||
|
|
@ -99,6 +117,9 @@ private:
|
|||
|
||||
// The block size
|
||||
ADDR_EXTENT3D m_blockSize;
|
||||
|
||||
// The microblock size
|
||||
ADDR_EXTENT3D m_microBlockSize;
|
||||
|
||||
// Number of 'x' bits at the bottom of the equation. Must be a pow2 and at least 1.
|
||||
// This will be used as a simple optimization to batch together operations on adjacent x pixels.
|
||||
|
|
|
|||
2031
src/amd/addrlib/src/core/addrswizzlersimd.h
Normal file
2031
src/amd/addrlib/src/core/addrswizzlersimd.h
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2026 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -2782,9 +2782,11 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
|
|||
|
||||
if ((forbid64KbBlockType == FALSE) && (forbidVarBlockType == FALSE))
|
||||
{
|
||||
UINT_32 ratioLow;
|
||||
UINT_32 ratioHi;
|
||||
GetSwizzleModePreferenceRatio(pIn, &ratioLow, &ratioHi);
|
||||
|
||||
const UINT_8 maxFmaskSwizzleModeType = 2;
|
||||
const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
|
||||
const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
|
||||
const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
|
||||
const UINT_32 numSlices = Max(pIn->numSlices, 1u);
|
||||
const UINT_32 width = Max(pIn->width, 1u);
|
||||
|
|
@ -3097,8 +3099,10 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
|
|||
// Tracks the size of each valid swizzle mode's surface in bytes
|
||||
UINT_64 padSize[AddrBlockMaxTiledType] = {};
|
||||
|
||||
const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
|
||||
const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
|
||||
UINT_32 ratioLow;
|
||||
UINT_32 ratioHi;
|
||||
GetSwizzleModePreferenceRatio(pIn, &ratioLow, &ratioHi);
|
||||
|
||||
const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
|
||||
UINT_32 minSizeBlk = AddrBlockMicro; // Tracks the most optimal block to use
|
||||
UINT_64 minSize = 0; // Tracks the minimum acceptable block type
|
||||
|
|
@ -4111,7 +4115,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
|
|||
* Gfx10Lib::HwlCopyMemToSurface
|
||||
*
|
||||
* @brief
|
||||
* Copy multiple regions from memory to a non-linear surface.
|
||||
* Copy multiple regions from memory to a non-linear surface.
|
||||
*
|
||||
* @return
|
||||
* Error or success.
|
||||
|
|
@ -4177,7 +4181,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlCopyMemToSurface(
|
|||
|
||||
LutAddresser addresser = LutAddresser();
|
||||
addresser.Init(fullSwizzlePattern, ADDR_MAX_EQUATION_BIT, blockExtent, blkSizeLog2);
|
||||
UnalignedCopyMemImgFunc pfnCopyUnaligned = addresser.GetCopyMemImgFunc();
|
||||
UnalignedCopyMemImgFunc pfnCopyUnaligned = addresser.GetCopyMemImgFunc(pIn->copyFlags);
|
||||
if (pfnCopyUnaligned == nullptr)
|
||||
{
|
||||
ADDR_ASSERT_ALWAYS();
|
||||
|
|
@ -4192,35 +4196,27 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlCopyMemToSurface(
|
|||
const ADDR2_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
|
||||
UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
|
||||
UINT_32 yBlks = pMipInfo->pitch / localOut.blockWidth;
|
||||
UINT_32 zBlks = localOut.sliceSize >> (addresser.GetBlockBits() - addresser.GetBlockZBits());
|
||||
|
||||
UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
|
||||
UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
|
||||
UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
|
||||
ADDR_COORD3D rawOrigin = {
|
||||
pCurRegion->x + pMipInfo->mipTailCoordX,
|
||||
pCurRegion->y + pMipInfo->mipTailCoordY,
|
||||
pCurRegion->slice + pMipInfo->mipTailCoordZ
|
||||
};
|
||||
|
||||
for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
|
||||
{
|
||||
// The copy functions take the base address of the hardware slice, not the logical slice. Those are
|
||||
// not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
|
||||
// for unaligned copies.
|
||||
UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockSlices);
|
||||
UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
|
||||
|
||||
UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
|
||||
UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
|
||||
|
||||
ADDR_COORD2D sliceOrigin = { xStart, yStart };
|
||||
ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
|
||||
|
||||
pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
|
||||
VoidPtrInc(pCurRegion->pMem, memOffset),
|
||||
pCurRegion->memRowPitch,
|
||||
yBlks,
|
||||
sliceOrigin,
|
||||
sliceExtent,
|
||||
sliceXor,
|
||||
addresser);
|
||||
}
|
||||
pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, mipOffset),
|
||||
pCurRegion->pMem,
|
||||
pCurRegion->memRowPitch,
|
||||
pCurRegion->memSlicePitch,
|
||||
yBlks,
|
||||
zBlks,
|
||||
rawOrigin,
|
||||
pCurRegion->copyDims,
|
||||
pIn->pbXor,
|
||||
(pCurRegion->mipId >= localOut.firstMipIdInTail),
|
||||
addresser);
|
||||
}
|
||||
addresser.DoCopyMemImgPostFlushes(pIn->copyFlags);
|
||||
}
|
||||
return returnCode;
|
||||
}
|
||||
|
|
@ -4230,7 +4226,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlCopyMemToSurface(
|
|||
* Gfx10Lib::HwlCopySurfaceToMem
|
||||
*
|
||||
* @brief
|
||||
* Copy multiple regions from a non-linear surface to memory.
|
||||
* Copy multiple regions from a non-linear surface to memory.
|
||||
*
|
||||
* @return
|
||||
* Error or success.
|
||||
|
|
@ -4296,7 +4292,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlCopySurfaceToMem(
|
|||
|
||||
LutAddresser addresser = LutAddresser();
|
||||
addresser.Init(fullSwizzlePattern, ADDR_MAX_EQUATION_BIT, blockExtent, blkSizeLog2);
|
||||
UnalignedCopyMemImgFunc pfnCopyUnaligned = addresser.GetCopyImgMemFunc();
|
||||
UnalignedCopyMemImgFunc pfnCopyUnaligned = addresser.GetCopyImgMemFunc(pIn->copyFlags);
|
||||
if (pfnCopyUnaligned == nullptr)
|
||||
{
|
||||
ADDR_ASSERT_ALWAYS();
|
||||
|
|
@ -4305,40 +4301,32 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlCopySurfaceToMem(
|
|||
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
addresser.DoCopyImgMemPreFlushes(pIn->copyFlags);
|
||||
for (UINT_32 regionIdx = 0; regionIdx < regionCount; regionIdx++)
|
||||
{
|
||||
const ADDR2_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
|
||||
const ADDR2_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
|
||||
UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
|
||||
UINT_32 yBlks = pMipInfo->pitch / localOut.blockWidth;
|
||||
UINT_32 zBlks = localOut.sliceSize >> (addresser.GetBlockBits() - addresser.GetBlockZBits());
|
||||
|
||||
UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
|
||||
UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
|
||||
UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
|
||||
ADDR_COORD3D rawOrigin = {
|
||||
pCurRegion->x + pMipInfo->mipTailCoordX,
|
||||
pCurRegion->y + pMipInfo->mipTailCoordY,
|
||||
pCurRegion->slice + pMipInfo->mipTailCoordZ
|
||||
};
|
||||
|
||||
for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
|
||||
{
|
||||
// The copy functions take the base address of the hardware slice, not the logical slice. Those are
|
||||
// not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
|
||||
// for unaligned copies.
|
||||
UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockSlices);
|
||||
UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
|
||||
|
||||
UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
|
||||
UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
|
||||
|
||||
ADDR_COORD2D sliceOrigin = { xStart, yStart };
|
||||
ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
|
||||
|
||||
pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
|
||||
VoidPtrInc(pCurRegion->pMem, memOffset),
|
||||
pCurRegion->memRowPitch,
|
||||
yBlks,
|
||||
sliceOrigin,
|
||||
sliceExtent,
|
||||
sliceXor,
|
||||
addresser);
|
||||
}
|
||||
pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, mipOffset),
|
||||
pCurRegion->pMem,
|
||||
pCurRegion->memRowPitch,
|
||||
pCurRegion->memSlicePitch,
|
||||
yBlks,
|
||||
zBlks,
|
||||
rawOrigin,
|
||||
pCurRegion->copyDims,
|
||||
pIn->pbXor,
|
||||
(pCurRegion->mipId >= localOut.firstMipIdInTail),
|
||||
addresser);
|
||||
}
|
||||
}
|
||||
return returnCode;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2026 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -758,6 +758,14 @@ ChipFamily Gfx11Lib::HwlConvertChipFamily(
|
|||
case FAMILY_PHX:
|
||||
m_settings.isPhoenix = 1;
|
||||
break;
|
||||
case FAMILY_GFX1170:
|
||||
{
|
||||
if (ASICREV_IS_GFX1170(chipRevision))
|
||||
{
|
||||
m_settings.isGfx1170 = 1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ADDR_ASSERT(!"Unknown chip family");
|
||||
break;
|
||||
|
|
@ -2651,10 +2659,13 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting(
|
|||
|
||||
UINT_64 padSize[AddrBlockMaxTiledType] = {};
|
||||
|
||||
const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
|
||||
const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
|
||||
UINT_32 ratioLow;
|
||||
UINT_32 ratioHi;
|
||||
GetSwizzleModePreferenceRatio(pIn, &ratioLow, &ratioHi);
|
||||
|
||||
const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
|
||||
UINT_32 minSizeBlk = AddrBlockMicro;
|
||||
UINT_32 selectedBlk = AddrBlockMaxTiledType;
|
||||
UINT_64 minSize = 0;
|
||||
|
||||
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
|
||||
|
|
@ -2678,11 +2689,66 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting(
|
|||
{
|
||||
padSize[i] = localOut.surfSize;
|
||||
|
||||
if ((minSize == 0) ||
|
||||
Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], ratioLow, ratioHi))
|
||||
if (pIn->useBlockBasedHeuristic)
|
||||
{
|
||||
minSize = padSize[i];
|
||||
minSizeBlk = i;
|
||||
const UINT_32 blockCountX = localOut.pitch / localOut.blockWidth;
|
||||
const UINT_32 blockCountY = localOut.height / localOut.blockHeight;
|
||||
const UINT_32 blockCountZ = localOut.numSlices / localOut.blockSlices;
|
||||
|
||||
UINT_32 requiredBlockCountX = 1;
|
||||
UINT_32 requiredBlockCountY = 1;
|
||||
UINT_32 requiredBlockCountZ = 1;
|
||||
|
||||
switch (pIn->resourceType)
|
||||
{
|
||||
case ADDR_RSRC_TEX_1D:
|
||||
requiredBlockCountX = 2;
|
||||
break;
|
||||
case ADDR_RSRC_TEX_2D:
|
||||
requiredBlockCountX = 2;
|
||||
requiredBlockCountY = 2;
|
||||
break;
|
||||
case ADDR_RSRC_TEX_3D:
|
||||
requiredBlockCountX = 2;
|
||||
requiredBlockCountY = 2;
|
||||
if (IsThick(pIn->resourceType, localIn.swizzleMode))
|
||||
{
|
||||
requiredBlockCountZ = 2;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ADDR_ASSERT_ALWAYS();
|
||||
}
|
||||
|
||||
// If the block count is sufficient, select this block type. Otherwise, track the block type with minimum size to
|
||||
// fall back to it, in case no block type can satisfy the block count requirement.
|
||||
if ((blockCountX >= requiredBlockCountX) &&
|
||||
(blockCountY >= requiredBlockCountY) &&
|
||||
(blockCountZ >= requiredBlockCountZ) &&
|
||||
(localIn.swizzleMode != ADDR_SW_LINEAR))
|
||||
{
|
||||
selectedBlk = i;
|
||||
}
|
||||
else
|
||||
{
|
||||
const bool has3DThick = (allowedSwModeSet.value & Gfx11Rsrc3dThickSwModeMask) != 0;
|
||||
const bool is3DThin = (pOut->resourceType == ADDR_RSRC_TEX_3D) &&
|
||||
IsThin(pOut->resourceType, swMode[i]);
|
||||
if (((has3DThick && is3DThin) == FALSE) && (minSize == 0 || (padSize[i] < minSize)))
|
||||
{
|
||||
minSize = padSize[i];
|
||||
minSizeBlk = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((minSize == 0) ||
|
||||
Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], ratioLow, ratioHi))
|
||||
{
|
||||
minSize = padSize[i];
|
||||
minSizeBlk = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
@ -2693,63 +2759,77 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting(
|
|||
}
|
||||
}
|
||||
|
||||
if (pIn->memoryBudget > 1.0)
|
||||
if (pIn->useBlockBasedHeuristic)
|
||||
{
|
||||
// If minimum size is given by swizzle mode with bigger-block type, then don't ever check
|
||||
// smaller-block type again in coming loop
|
||||
switch (minSizeBlk)
|
||||
// If there was no block size that would satisfy block based heuristic, fall back to the budget-based heuristic.
|
||||
if (selectedBlk == AddrBlockMaxTiledType)
|
||||
{
|
||||
case AddrBlockThick256KB:
|
||||
allowedBlockSet.gfx11.thin256KB = 0;
|
||||
case AddrBlockThin256KB:
|
||||
allowedBlockSet.macroThick64KB = 0;
|
||||
case AddrBlockThick64KB:
|
||||
allowedBlockSet.macroThin64KB = 0;
|
||||
case AddrBlockThin64KB:
|
||||
allowedBlockSet.macroThick4KB = 0;
|
||||
case AddrBlockThick4KB:
|
||||
allowedBlockSet.macroThin4KB = 0;
|
||||
case AddrBlockThin4KB:
|
||||
allowedBlockSet.micro = 0;
|
||||
case AddrBlockMicro:
|
||||
allowedBlockSet.linear = 0;
|
||||
case AddrBlockLinear:
|
||||
break;
|
||||
|
||||
default:
|
||||
ADDR_ASSERT_ALWAYS();
|
||||
break;
|
||||
selectedBlk = minSizeBlk;
|
||||
}
|
||||
|
||||
for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
|
||||
}
|
||||
else
|
||||
{
|
||||
if (pIn->memoryBudget > 1.0)
|
||||
{
|
||||
if ((i != minSizeBlk) &&
|
||||
Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
|
||||
// If minimum size is given by swizzle mode with bigger-block type, then don't ever check
|
||||
// smaller-block type again in coming loop
|
||||
switch (minSizeBlk)
|
||||
{
|
||||
if (Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], 0, 0, pIn->memoryBudget) == FALSE)
|
||||
case AddrBlockThick256KB:
|
||||
allowedBlockSet.gfx11.thin256KB = 0;
|
||||
case AddrBlockThin256KB:
|
||||
allowedBlockSet.macroThick64KB = 0;
|
||||
case AddrBlockThick64KB:
|
||||
allowedBlockSet.macroThin64KB = 0;
|
||||
case AddrBlockThin64KB:
|
||||
allowedBlockSet.macroThick4KB = 0;
|
||||
case AddrBlockThick4KB:
|
||||
allowedBlockSet.macroThin4KB = 0;
|
||||
case AddrBlockThin4KB:
|
||||
allowedBlockSet.micro = 0;
|
||||
case AddrBlockMicro:
|
||||
allowedBlockSet.linear = 0;
|
||||
case AddrBlockLinear:
|
||||
break;
|
||||
|
||||
default:
|
||||
ADDR_ASSERT_ALWAYS();
|
||||
break;
|
||||
}
|
||||
|
||||
for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
|
||||
{
|
||||
if ((i != minSizeBlk) &&
|
||||
Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<AddrBlockType>(i)))
|
||||
{
|
||||
// Clear the block type if the memory waste is unacceptable
|
||||
allowedBlockSet.value &= ~(1u << (i - 1));
|
||||
if (Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], 0, 0, pIn->memoryBudget) == FALSE)
|
||||
{
|
||||
// Clear the block type if the memory waste is unacceptable
|
||||
allowedBlockSet.value &= ~(1u << (i - 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove linear block type if 2 or more block types are allowed
|
||||
if (IsPow2(allowedBlockSet.value) == FALSE)
|
||||
{
|
||||
allowedBlockSet.linear = 0;
|
||||
}
|
||||
|
||||
// Select the biggest allowed block type
|
||||
minSizeBlk = Log2(allowedBlockSet.value) + 1;
|
||||
|
||||
if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
|
||||
{
|
||||
minSizeBlk = AddrBlockLinear;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove linear block type if 2 or more block types are allowed
|
||||
if (IsPow2(allowedBlockSet.value) == FALSE)
|
||||
{
|
||||
allowedBlockSet.linear = 0;
|
||||
}
|
||||
|
||||
// Select the biggest allowed block type
|
||||
minSizeBlk = Log2(allowedBlockSet.value) + 1;
|
||||
|
||||
if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
|
||||
{
|
||||
minSizeBlk = AddrBlockLinear;
|
||||
}
|
||||
selectedBlk = minSizeBlk;
|
||||
}
|
||||
|
||||
switch (minSizeBlk)
|
||||
|
||||
switch (selectedBlk)
|
||||
{
|
||||
case AddrBlockLinear:
|
||||
allowedSwModeSet.value &= Gfx11LinearSwModeMask;
|
||||
|
|
@ -3685,7 +3765,7 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceAddrFromCoordTiled(
|
|||
* Gfx11Lib::HwlCopyMemToSurface
|
||||
*
|
||||
* @brief
|
||||
* Copy multiple regions from memory to a non-linear surface.
|
||||
* Copy multiple regions from memory to a non-linear surface.
|
||||
*
|
||||
* @return
|
||||
* Error or success.
|
||||
|
|
@ -3751,7 +3831,7 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlCopyMemToSurface(
|
|||
|
||||
LutAddresser addresser = LutAddresser();
|
||||
addresser.Init(fullSwizzlePattern, ADDR_MAX_EQUATION_BIT, blockExtent, blkSizeLog2);
|
||||
UnalignedCopyMemImgFunc pfnCopyUnaligned = addresser.GetCopyMemImgFunc();
|
||||
UnalignedCopyMemImgFunc pfnCopyUnaligned = addresser.GetCopyMemImgFunc(pIn->copyFlags);
|
||||
if (pfnCopyUnaligned == nullptr)
|
||||
{
|
||||
ADDR_ASSERT_ALWAYS();
|
||||
|
|
@ -3766,35 +3846,27 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlCopyMemToSurface(
|
|||
const ADDR2_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
|
||||
UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
|
||||
UINT_32 yBlks = pMipInfo->pitch / localOut.blockWidth;
|
||||
UINT_32 zBlks = localOut.sliceSize >> (addresser.GetBlockBits() - addresser.GetBlockZBits());
|
||||
|
||||
UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
|
||||
UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
|
||||
UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
|
||||
ADDR_COORD3D rawOrigin = {
|
||||
pCurRegion->x + pMipInfo->mipTailCoordX,
|
||||
pCurRegion->y + pMipInfo->mipTailCoordY,
|
||||
pCurRegion->slice + pMipInfo->mipTailCoordZ
|
||||
};
|
||||
|
||||
for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
|
||||
{
|
||||
// The copy functions take the base address of the hardware slice, not the logical slice. Those are
|
||||
// not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
|
||||
// for unaligned copies.
|
||||
UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockSlices);
|
||||
UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
|
||||
|
||||
UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
|
||||
UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
|
||||
|
||||
ADDR_COORD2D sliceOrigin = { xStart, yStart };
|
||||
ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
|
||||
|
||||
pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
|
||||
VoidPtrInc(pCurRegion->pMem, memOffset),
|
||||
pCurRegion->memRowPitch,
|
||||
yBlks,
|
||||
sliceOrigin,
|
||||
sliceExtent,
|
||||
sliceXor,
|
||||
addresser);
|
||||
}
|
||||
pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, mipOffset),
|
||||
pCurRegion->pMem,
|
||||
pCurRegion->memRowPitch,
|
||||
pCurRegion->memSlicePitch,
|
||||
yBlks,
|
||||
zBlks,
|
||||
rawOrigin,
|
||||
pCurRegion->copyDims,
|
||||
pIn->pbXor,
|
||||
(pCurRegion->mipId >= localOut.firstMipIdInTail),
|
||||
addresser);
|
||||
}
|
||||
addresser.DoCopyMemImgPostFlushes(pIn->copyFlags);
|
||||
}
|
||||
return returnCode;
|
||||
}
|
||||
|
|
@ -3804,7 +3876,7 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlCopyMemToSurface(
|
|||
* Gfx11Lib::HwlCopySurfaceToMem
|
||||
*
|
||||
* @brief
|
||||
* Copy multiple regions from a non-linear surface to memory.
|
||||
* Copy multiple regions from a non-linear surface to memory.
|
||||
*
|
||||
* @return
|
||||
* Error or success.
|
||||
|
|
@ -3870,7 +3942,7 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlCopySurfaceToMem(
|
|||
|
||||
LutAddresser addresser = LutAddresser();
|
||||
addresser.Init(fullSwizzlePattern, ADDR_MAX_EQUATION_BIT, blockExtent, blkSizeLog2);
|
||||
UnalignedCopyMemImgFunc pfnCopyUnaligned = addresser.GetCopyImgMemFunc();
|
||||
UnalignedCopyMemImgFunc pfnCopyUnaligned = addresser.GetCopyImgMemFunc(pIn->copyFlags);
|
||||
if (pfnCopyUnaligned == nullptr)
|
||||
{
|
||||
ADDR_ASSERT_ALWAYS();
|
||||
|
|
@ -3879,40 +3951,32 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlCopySurfaceToMem(
|
|||
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
addresser.DoCopyImgMemPreFlushes(pIn->copyFlags);
|
||||
for (UINT_32 regionIdx = 0; regionIdx < regionCount; regionIdx++)
|
||||
{
|
||||
const ADDR2_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
|
||||
const ADDR2_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
|
||||
UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
|
||||
UINT_32 yBlks = pMipInfo->pitch / localOut.blockWidth;
|
||||
UINT_32 zBlks = localOut.sliceSize >> (addresser.GetBlockBits() - addresser.GetBlockZBits());
|
||||
|
||||
UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
|
||||
UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
|
||||
UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
|
||||
ADDR_COORD3D rawOrigin = {
|
||||
pCurRegion->x + pMipInfo->mipTailCoordX,
|
||||
pCurRegion->y + pMipInfo->mipTailCoordY,
|
||||
pCurRegion->slice + pMipInfo->mipTailCoordZ
|
||||
};
|
||||
|
||||
for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
|
||||
{
|
||||
// The copy functions take the base address of the hardware slice, not the logical slice. Those are
|
||||
// not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
|
||||
// for unaligned copies.
|
||||
UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockSlices);
|
||||
UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
|
||||
|
||||
UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
|
||||
UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
|
||||
|
||||
ADDR_COORD2D sliceOrigin = { xStart, yStart };
|
||||
ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
|
||||
|
||||
pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
|
||||
VoidPtrInc(pCurRegion->pMem, memOffset),
|
||||
pCurRegion->memRowPitch,
|
||||
yBlks,
|
||||
sliceOrigin,
|
||||
sliceExtent,
|
||||
sliceXor,
|
||||
addresser);
|
||||
}
|
||||
pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, mipOffset),
|
||||
pCurRegion->pMem,
|
||||
pCurRegion->memRowPitch,
|
||||
pCurRegion->memSlicePitch,
|
||||
yBlks,
|
||||
zBlks,
|
||||
rawOrigin,
|
||||
pCurRegion->copyDims,
|
||||
pIn->pbXor,
|
||||
(pCurRegion->mipId >= localOut.firstMipIdInTail),
|
||||
addresser);
|
||||
}
|
||||
}
|
||||
return returnCode;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2026 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -36,7 +36,8 @@ struct Gfx11ChipSettings
|
|||
{
|
||||
UINT_32 isStrix : 1;
|
||||
UINT_32 isPhoenix : 1;
|
||||
UINT_32 reserved1 : 30;
|
||||
UINT_32 isGfx1170 : 1;
|
||||
UINT_32 reserved1 : 29;
|
||||
|
||||
// Misc configuration bits
|
||||
UINT_32 reserved2 : 32;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2022-2026 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -71,8 +71,7 @@ const SwizzleModeFlags Gfx12Lib::SwizzleModeTable[ADDR3_MAX_TYPE] =
|
|||
Gfx12Lib::Gfx12Lib(
|
||||
const Client* pClient)
|
||||
:
|
||||
Lib(pClient),
|
||||
m_numSwizzleBits(0)
|
||||
Lib(pClient)
|
||||
{
|
||||
memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
|
||||
}
|
||||
|
|
@ -878,7 +877,7 @@ ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSurfaceAddrFromCoordTiled(
|
|||
* Gfx12Lib::HwlCopyMemToSurface
|
||||
*
|
||||
* @brief
|
||||
* Copy multiple regions from memory to a non-linear surface.
|
||||
* Copy multiple regions from memory to a non-linear surface.
|
||||
*
|
||||
* @return
|
||||
* Error or success.
|
||||
|
|
@ -925,7 +924,7 @@ ADDR_E_RETURNCODE Gfx12Lib::HwlCopyMemToSurface(
|
|||
}
|
||||
|
||||
LutAddresser addresser = LutAddresser();
|
||||
UnalignedCopyMemImgFunc pfnCopyUnaligned = nullptr;
|
||||
UnalignedCopyMemImgFunc pfnCopyUnaligned = nullptr;
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
|
||||
|
|
@ -936,7 +935,7 @@ ADDR_E_RETURNCODE Gfx12Lib::HwlCopyMemToSurface(
|
|||
ADDR_BIT_SETTING fullSwizzlePattern[Log2Size256K] = {};
|
||||
GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
|
||||
addresser.Init(fullSwizzlePattern, Log2Size256K, localOut.blockExtent, blkSizeLog2);
|
||||
pfnCopyUnaligned = addresser.GetCopyMemImgFunc();
|
||||
pfnCopyUnaligned = addresser.GetCopyMemImgFunc(pIn->copyFlags);
|
||||
if (pfnCopyUnaligned == nullptr)
|
||||
{
|
||||
ADDR_ASSERT_ALWAYS(); // What format is this?
|
||||
|
|
@ -952,35 +951,27 @@ ADDR_E_RETURNCODE Gfx12Lib::HwlCopyMemToSurface(
|
|||
const ADDR3_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
|
||||
UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
|
||||
UINT_32 yBlks = pMipInfo->pitch / localOut.blockExtent.width;
|
||||
UINT_32 zBlks = localOut.sliceSize >> (addresser.GetBlockBits() - addresser.GetBlockZBits());
|
||||
|
||||
UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
|
||||
UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
|
||||
UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
|
||||
ADDR_COORD3D rawOrigin = {
|
||||
pCurRegion->x + pMipInfo->mipTailCoordX,
|
||||
pCurRegion->y + pMipInfo->mipTailCoordY,
|
||||
pCurRegion->slice + pMipInfo->mipTailCoordZ
|
||||
};
|
||||
|
||||
for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
|
||||
{
|
||||
// The copy functions take the base address of the hardware slice, not the logical slice. Those are
|
||||
// not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
|
||||
// for unaligned copies.
|
||||
UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockExtent.depth);
|
||||
UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
|
||||
|
||||
UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
|
||||
UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
|
||||
|
||||
ADDR_COORD2D sliceOrigin = { xStart, yStart };
|
||||
ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
|
||||
|
||||
pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
|
||||
VoidPtrInc(pCurRegion->pMem, memOffset),
|
||||
pCurRegion->memRowPitch,
|
||||
yBlks,
|
||||
sliceOrigin,
|
||||
sliceExtent,
|
||||
sliceXor,
|
||||
addresser);
|
||||
}
|
||||
pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, mipOffset),
|
||||
pCurRegion->pMem,
|
||||
pCurRegion->memRowPitch,
|
||||
pCurRegion->memSlicePitch,
|
||||
yBlks,
|
||||
zBlks,
|
||||
rawOrigin,
|
||||
pCurRegion->copyDims,
|
||||
pIn->pbXor,
|
||||
(pCurRegion->mipId >= localOut.firstMipIdInTail),
|
||||
addresser);
|
||||
}
|
||||
addresser.DoCopyMemImgPostFlushes(pIn->copyFlags);
|
||||
}
|
||||
return returnCode;
|
||||
}
|
||||
|
|
@ -990,7 +981,7 @@ ADDR_E_RETURNCODE Gfx12Lib::HwlCopyMemToSurface(
|
|||
* Gfx12Lib::HwlCopySurfaceToMem
|
||||
*
|
||||
* @brief
|
||||
* Copy multiple regions from a non-linear surface to memory.
|
||||
* Copy multiple regions from a non-linear surface to memory.
|
||||
*
|
||||
* @return
|
||||
* Error or success.
|
||||
|
|
@ -1037,7 +1028,7 @@ ADDR_E_RETURNCODE Gfx12Lib::HwlCopySurfaceToMem(
|
|||
}
|
||||
|
||||
LutAddresser addresser = LutAddresser();
|
||||
UnalignedCopyMemImgFunc pfnCopyUnaligned = nullptr;
|
||||
UnalignedCopyMemImgFunc pfnCopyUnaligned = nullptr;
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
|
||||
|
|
@ -1048,7 +1039,7 @@ ADDR_E_RETURNCODE Gfx12Lib::HwlCopySurfaceToMem(
|
|||
ADDR_BIT_SETTING fullSwizzlePattern[Log2Size256K] = {};
|
||||
GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
|
||||
addresser.Init(fullSwizzlePattern, Log2Size256K, localOut.blockExtent, blkSizeLog2);
|
||||
pfnCopyUnaligned = addresser.GetCopyImgMemFunc();
|
||||
pfnCopyUnaligned = addresser.GetCopyImgMemFunc(pIn->copyFlags);
|
||||
if (pfnCopyUnaligned == nullptr)
|
||||
{
|
||||
ADDR_ASSERT_ALWAYS(); // What format is this?
|
||||
|
|
@ -1058,78 +1049,37 @@ ADDR_E_RETURNCODE Gfx12Lib::HwlCopySurfaceToMem(
|
|||
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
addresser.DoCopyImgMemPreFlushes(pIn->copyFlags);
|
||||
for (UINT_32 regionIdx = 0; regionIdx < regionCount; regionIdx++)
|
||||
{
|
||||
const ADDR3_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
|
||||
const ADDR3_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
|
||||
UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
|
||||
UINT_32 yBlks = pMipInfo->pitch / localOut.blockExtent.width;
|
||||
UINT_32 zBlks = localOut.sliceSize >> (addresser.GetBlockBits() - addresser.GetBlockZBits());
|
||||
|
||||
UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
|
||||
UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
|
||||
UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
|
||||
ADDR_COORD3D rawOrigin = {
|
||||
pCurRegion->x + pMipInfo->mipTailCoordX,
|
||||
pCurRegion->y + pMipInfo->mipTailCoordY,
|
||||
pCurRegion->slice + pMipInfo->mipTailCoordZ
|
||||
};
|
||||
|
||||
for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
|
||||
{
|
||||
// The copy functions take the base address of the hardware slice, not the logical slice. Those are
|
||||
// not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
|
||||
// for unaligned copies.
|
||||
UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockExtent.depth);
|
||||
UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
|
||||
|
||||
UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
|
||||
UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
|
||||
|
||||
ADDR_COORD2D sliceOrigin = { xStart, yStart };
|
||||
ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
|
||||
|
||||
pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
|
||||
VoidPtrInc(pCurRegion->pMem, memOffset),
|
||||
pCurRegion->memRowPitch,
|
||||
yBlks,
|
||||
sliceOrigin,
|
||||
sliceExtent,
|
||||
sliceXor,
|
||||
addresser);
|
||||
}
|
||||
pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, mipOffset),
|
||||
pCurRegion->pMem,
|
||||
pCurRegion->memRowPitch,
|
||||
pCurRegion->memSlicePitch,
|
||||
yBlks,
|
||||
zBlks,
|
||||
rawOrigin,
|
||||
pCurRegion->copyDims,
|
||||
pIn->pbXor,
|
||||
(pCurRegion->mipId >= localOut.firstMipIdInTail),
|
||||
addresser);
|
||||
}
|
||||
}
|
||||
return returnCode;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* Gfx12Lib::HwlComputePipeBankXor
|
||||
*
|
||||
* @brief
|
||||
* Generate a PipeBankXor value to be ORed into bits above numSwizzleBits of address
|
||||
*
|
||||
* @return
|
||||
* PipeBankXor value
|
||||
************************************************************************************************************************
|
||||
*/
|
||||
ADDR_E_RETURNCODE Gfx12Lib::HwlComputePipeBankXor(
|
||||
const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
|
||||
ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
|
||||
) const
|
||||
{
|
||||
if ((m_numSwizzleBits != 0) && // does this configuration support swizzling
|
||||
// base address XOR in GFX12 will be applied to all blk_size = 4KB, 64KB, or 256KB swizzle modes,
|
||||
// Note that Linear and 256B are excluded.
|
||||
(IsLinear(pIn->swizzleMode) == FALSE) &&
|
||||
(IsBlock256b(pIn->swizzleMode) == FALSE))
|
||||
{
|
||||
pOut->pipeBankXor = pIn->surfIndex % (1 << m_numSwizzleBits);
|
||||
}
|
||||
else
|
||||
{
|
||||
pOut->pipeBankXor = 0;
|
||||
}
|
||||
|
||||
return ADDR_OK;
|
||||
}
|
||||
|
||||
/**
|
||||
************************************************************************************************************************
|
||||
* Gfx12Lib::GetSwizzlePatternInfo
|
||||
|
|
@ -1263,72 +1213,13 @@ const ADDR_SW_PATINFO* Gfx12Lib::GetSwizzlePatternInfo(
|
|||
BOOL_32 Gfx12Lib::HwlInitGlobalParams(
|
||||
const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
|
||||
{
|
||||
BOOL_32 valid = TRUE;
|
||||
GB_ADDR_CONFIG_GFX12 gbAddrConfig;
|
||||
|
||||
gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
|
||||
|
||||
switch (gbAddrConfig.bits.NUM_PIPES)
|
||||
{
|
||||
case ADDR_CONFIG_1_PIPE:
|
||||
m_pipesLog2 = 0;
|
||||
break;
|
||||
case ADDR_CONFIG_2_PIPE:
|
||||
m_pipesLog2 = 1;
|
||||
break;
|
||||
case ADDR_CONFIG_4_PIPE:
|
||||
m_pipesLog2 = 2;
|
||||
break;
|
||||
case ADDR_CONFIG_8_PIPE:
|
||||
m_pipesLog2 = 3;
|
||||
break;
|
||||
case ADDR_CONFIG_16_PIPE:
|
||||
m_pipesLog2 = 4;
|
||||
break;
|
||||
case ADDR_CONFIG_32_PIPE:
|
||||
m_pipesLog2 = 5;
|
||||
break;
|
||||
case ADDR_CONFIG_64_PIPE:
|
||||
m_pipesLog2 = 6;
|
||||
break;
|
||||
default:
|
||||
ADDR_ASSERT_ALWAYS();
|
||||
valid = FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
|
||||
{
|
||||
case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
|
||||
m_pipeInterleaveLog2 = 8;
|
||||
break;
|
||||
case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
|
||||
m_pipeInterleaveLog2 = 9;
|
||||
break;
|
||||
case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
|
||||
m_pipeInterleaveLog2 = 10;
|
||||
break;
|
||||
case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
|
||||
m_pipeInterleaveLog2 = 11;
|
||||
break;
|
||||
default:
|
||||
ADDR_ASSERT_ALWAYS();
|
||||
valid = FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
m_numSwizzleBits = ((m_pipesLog2 >= 3) ? m_pipesLog2 - 2 : 0);
|
||||
|
||||
// Gfx10+ chips treat packed 8-bit 422 formats as 32bpe with 2pix/elem.
|
||||
m_configFlags.use32bppFor422Fmt = TRUE;
|
||||
|
||||
if (valid)
|
||||
{
|
||||
InitEquationTable();
|
||||
InitBlockDimensionTable();
|
||||
}
|
||||
InitEquationTable();
|
||||
InitBlockDimensionTable();
|
||||
|
||||
return valid;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1579,10 +1470,10 @@ ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSlicePipeBankXor(
|
|||
pIn->slice,
|
||||
0);
|
||||
|
||||
const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
|
||||
const UINT_32 pipeBankXor = pipeBankXorOffset >> PipeInterleaveLog2;
|
||||
|
||||
// Should have no bit set under pipe interleave
|
||||
ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
|
||||
ADDR_ASSERT((pipeBankXor << PipeInterleaveLog2) == pipeBankXorOffset);
|
||||
|
||||
pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
|
||||
}
|
||||
|
|
@ -2043,7 +1934,7 @@ ADDR_E_RETURNCODE Gfx12Lib::HwlComputeStereoInfo(
|
|||
UINT_32 yPosMask = 0;
|
||||
|
||||
// First get "max y bit"
|
||||
for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
|
||||
for (UINT_32 i = PipeInterleaveLog2; i < blkSizeLog2; i++)
|
||||
{
|
||||
ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);
|
||||
|
||||
|
|
@ -2055,7 +1946,7 @@ ADDR_E_RETURNCODE Gfx12Lib::HwlComputeStereoInfo(
|
|||
}
|
||||
|
||||
// Then loop again for populating a position mask of "max Y bit"
|
||||
for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
|
||||
for (UINT_32 i = PipeInterleaveLog2; i < blkSizeLog2; i++)
|
||||
{
|
||||
if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
|
||||
(m_equationTable[eqIndex].addr[i].index == yMax))
|
||||
|
|
@ -2074,7 +1965,7 @@ ADDR_E_RETURNCODE Gfx12Lib::HwlComputeStereoInfo(
|
|||
|
||||
if ((alignedHeight >> yMax) & 1)
|
||||
{
|
||||
*pRightXor = yPosMask >> m_pipeInterleaveLog2;
|
||||
*pRightXor = yPosMask >> PipeInterleaveLog2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
************************************************************************************************************************
|
||||
*
|
||||
* Copyright (C) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2022-2026 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
***********************************************************************************************************************/
|
||||
|
|
@ -147,10 +147,6 @@ private:
|
|||
static const UINT_32 MaxImageDim = 32768; // Max image size is 32k
|
||||
static const UINT_32 MaxMipLevels = 16;
|
||||
|
||||
virtual ADDR_E_RETURNCODE HwlComputePipeBankXor(
|
||||
const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn,
|
||||
ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const override;
|
||||
|
||||
virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn) override;
|
||||
|
||||
virtual ADDR_E_RETURNCODE HwlComputeStereoInfo(
|
||||
|
|
@ -172,8 +168,6 @@ private:
|
|||
const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
|
||||
UINT_32 regionCount) const override;
|
||||
|
||||
UINT_32 m_numSwizzleBits;
|
||||
|
||||
// Initialize equation table
|
||||
VOID InitEquationTable();
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue