amdgpu: reserve half of the addr space to implement a workaround for PRT

On GFX6-GFX12, except GFX9, SMEM loads on NULL PRT pages would just
fault and hang the GPU. The better workaround that we found is to split
the address space in two equal parts "LOW"/"HIGH", here's more details:

To workaround that, the driver splits the total VA space in half, so
that a single bit controls whether it's the "HIGH" or the "LOW"
address space. Every sparse residency buffer allocations that might
be used with SMEM get two allocations:

- the "HIGH" address space is mapped normally and its VA is returned
  to the application.
- the "LOW" address space is explicitly mapped to a zero-initialized
  buffer when it's allocated or when it's unmapped.

Other buffer allocations are always allocated in the "LOW" address.

The driver also creates a zero-allocated BO that will be used to map
partially resident buffers at creation and when explicitly unmapped.
The size is arbitrary but it seems that allocating 8MiB is perfectly
reasonable and fast enough in most cases. This BO is marked as
read-only for the GPU because the backend compilers don't use SMEM
stores.

For example this makes the "LOW" half of the HIGH VA range like
[0xffff800100000000,0xffffbfffffbfe000], and the "HIGH" half is left
for PRT.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
Samuel Pitoiset 2026-04-09 18:20:10 +02:00
parent 8de45ef60d
commit 8cc2d09c29
6 changed files with 171 additions and 9 deletions

View file

@ -76,7 +76,9 @@ amdgpu_query_video_caps_info
amdgpu_read_mm_registers
amdgpu_va_manager_alloc
amdgpu_va_manager_init
amdgpu_va_manager_init2
amdgpu_va_manager_deinit
amdgpu_va_manager_query_sw_info
amdgpu_va_range_alloc
amdgpu_va_range_alloc2
amdgpu_va_range_free

View file

@ -105,6 +105,19 @@ enum amdgpu_gpu_va_range
enum amdgpu_sw_info {
amdgpu_sw_info_address32_hi = 0,
/** Query the PRT control bit when the half VA range is reserved for
* PRT when the device is initialized. This depends on the GFX version.
* A return value of ~0 should be ignored.
*/
amdgpu_sw_info_address_prt_wa_control_bit = 1,
};
enum amdgpu_va_manager_sw_info {
/** Query the PRT control bit when the half VA range is reserved for
* PRT with AMDGPU_VA_MGR_RESERVE_HALF_VA_FOR_PRT. The default value of
* ~0 shouldn't be considered a valid value.
*/
amdgpu_va_manager_sw_info_address_prt_wa_control_bit = 0,
};
/*--------------------------------------------------------------------------*/
@ -1467,6 +1480,14 @@ void amdgpu_va_manager_init(amdgpu_va_manager_handle va_mgr,
uint64_t high_va_offset, uint64_t high_va_max,
uint32_t virtual_address_alignment);
#define AMDGPU_VA_MGR_RESERVE_HALF_VA_FOR_PRT 0x1
void amdgpu_va_manager_init2(struct amdgpu_va_manager *va_mgr,
uint64_t low_va_offset, uint64_t low_va_max,
uint64_t high_va_offset, uint64_t high_va_max,
uint32_t virtual_address_alignment,
uint32_t flags);
void amdgpu_va_manager_deinit(amdgpu_va_manager_handle va_mgr);
/**
@ -1484,6 +1505,21 @@ int amdgpu_va_range_alloc2(amdgpu_va_manager_handle va_mgr,
amdgpu_va_handle *va_range_handle,
uint64_t flags);
/**
* Query VA manager information.
*
* \param va_mgr - \c [in] VA manager
* \param info - \c [in] amdgpu_va_manager_sw_info_*
* \param value - \c [out] Pointer to the return value.
*
* \return 0 on success\n
* <0 - Negative POSIX error code
*
*/
int amdgpu_va_manager_query_sw_info(struct amdgpu_va_manager *va_mgr,
enum amdgpu_va_manager_sw_info info,
void *value);
/**
* VA mapping/unmapping for the buffer object
*

View file

@ -144,6 +144,39 @@ static void amdgpu_device_reference(struct amdgpu_device **dst,
*dst = src;
}
static int amdgpu_query_gfx_level_major(amdgpu_device_handle dev,
uint8_t *gfx_ip_version_major)
{
struct drm_amdgpu_info_hw_ip ip_info;
uint32_t gfx_ip_count = 0;
int r;
*gfx_ip_version_major = 0;
r = amdgpu_query_hw_ip_count(dev, AMDGPU_HW_IP_GFX, &gfx_ip_count);
if (r)
return r;
/* No graphics support. */
if (gfx_ip_count == 0)
return 0;
memset(&ip_info, 0, sizeof(ip_info));
r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_GFX, 0, &ip_info);
if (r)
return r;
/* GFX6-8 don't set ip_discovery_version. */
if (dev->minor_version >= 48 && ip_info.ip_discovery_version) {
*gfx_ip_version_major = (ip_info.ip_discovery_version >> 16) & 0xff;
} else {
*gfx_ip_version_major = ip_info.hw_ip_version_major;
}
return r;
}
static int _amdgpu_device_initialize(int fd,
uint32_t *major_version,
uint32_t *minor_version,
@ -151,11 +184,13 @@ static int _amdgpu_device_initialize(int fd,
bool deduplicate_device)
{
struct amdgpu_device *dev = NULL;
uint8_t gfx_ip_version_major = 0;
drmVersionPtr version;
int r;
int flag_auth = 0;
int flag_authexist=0;
uint32_t accel_working = 0;
uint32_t va_mgr_flags = 0;
*device_handle = NULL;
@ -244,12 +279,25 @@ static int _amdgpu_device_initialize(int fd,
goto cleanup;
}
amdgpu_va_manager_init(&dev->va_mgr,
dev->dev_info.virtual_address_offset,
dev->dev_info.virtual_address_max,
dev->dev_info.high_va_offset,
dev->dev_info.high_va_max,
dev->dev_info.virtual_address_alignment);
r = amdgpu_query_gfx_level_major(dev, &gfx_ip_version_major);
if (!r) {
/* Split the HIGH addr space for GFX6-GFX12, except GFX9 to
* implement a workaround for SMEM loads with NULL PRT pages.
* This is silently ignored if querying the GFX level failed.
*/
if (gfx_ip_version_major >= 6 && gfx_ip_version_major <= 12 &&
gfx_ip_version_major != 9) {
va_mgr_flags |= AMDGPU_VA_MGR_RESERVE_HALF_VA_FOR_PRT;
}
}
amdgpu_va_manager_init2(&dev->va_mgr,
dev->dev_info.virtual_address_offset,
dev->dev_info.virtual_address_max,
dev->dev_info.high_va_offset,
dev->dev_info.high_va_max,
dev->dev_info.virtual_address_alignment,
va_mgr_flags);
amdgpu_parse_asic_ids(dev);
@ -322,6 +370,9 @@ drm_public int amdgpu_query_sw_info(amdgpu_device_handle dev,
else
*val32 = (dev->va_mgr.vamgr_32.va_max - 1) >> 32;
return 0;
case amdgpu_sw_info_address_prt_wa_control_bit:
*val32 = dev->va_mgr.address_prt_wa_control_bit;
return 0;
}
return -EINVAL;
}

View file

@ -72,6 +72,11 @@ struct amdgpu_va_manager {
struct amdgpu_bo_va_mgr vamgr_high;
/** The VA manager for the 32bit high address space */
struct amdgpu_bo_va_mgr vamgr_high_32;
/** The bit to control whether it's the "LOW" or "HIGH" halves, when
* half of the address space is reserved for PRT to implement a SW
* workaround. */
unsigned address_prt_wa_control_bit;
};
struct amdgpu_device {

View file

@ -323,19 +323,42 @@ drm_public amdgpu_va_manager_handle amdgpu_va_manager_alloc(void)
}
drm_public void amdgpu_va_manager_init(struct amdgpu_va_manager *va_mgr,
uint64_t low_va_offset, uint64_t low_va_max,
uint64_t high_va_offset, uint64_t high_va_max,
uint32_t virtual_address_alignment)
{
amdgpu_va_manager_init2(va_mgr, low_va_offset, low_va_max,
high_va_offset, high_va_max,
virtual_address_alignment, 0);
}
drm_public void amdgpu_va_manager_init2(struct amdgpu_va_manager *va_mgr,
uint64_t low_va_offset, uint64_t low_va_max,
uint64_t high_va_offset, uint64_t high_va_max,
uint32_t virtual_address_alignment)
uint32_t virtual_address_alignment,
uint32_t flags)
{
uint64_t start, max;
va_mgr->address_prt_wa_control_bit = ~0;
start = low_va_offset;
max = MIN2(low_va_max, 0x100000000ULL);
amdgpu_vamgr_init(&va_mgr->vamgr_32, start, max,
virtual_address_alignment);
start = max;
max = MAX2(low_va_max, 0x100000000ULL);
if ((flags & AMDGPU_VA_MGR_RESERVE_HALF_VA_FOR_PRT) && !high_va_max) {
/* Reserve the half VA range for PRT by splitting it in two
* equal halves where one bit controls whether it's the LOW or
* HIGH half.
*/
va_mgr->address_prt_wa_control_bit = util_last_bit64(low_va_offset ^ low_va_max) - 1;
max = low_va_max ^ (1ull << va_mgr->address_prt_wa_control_bit);
} else {
max = MAX2(low_va_max, 0x100000000ULL);
}
amdgpu_vamgr_init(&va_mgr->vamgr_low, start, max,
virtual_address_alignment);
@ -345,7 +368,17 @@ drm_public void amdgpu_va_manager_init(struct amdgpu_va_manager *va_mgr,
virtual_address_alignment);
start = max;
max = MAX2(high_va_max, (start & ~0xffffffffULL) + 0x100000000ULL);
if ((flags & AMDGPU_VA_MGR_RESERVE_HALF_VA_FOR_PRT) && high_va_max) {
/* Reserve the half VA range for PRT by splitting it in two
* equal halves where one bit controls whether it's the LOW or
* HIGH half.
*/
va_mgr->address_prt_wa_control_bit = util_last_bit64(high_va_offset ^ high_va_max) - 1;
max = high_va_max ^ (1ull << va_mgr->address_prt_wa_control_bit);
} else {
max = MAX2(high_va_max, (start & ~0xffffffffULL) + 0x100000000ULL);
}
amdgpu_vamgr_init(&va_mgr->vamgr_high, start, max,
virtual_address_alignment);
}
@ -357,3 +390,17 @@ drm_public void amdgpu_va_manager_deinit(struct amdgpu_va_manager *va_mgr)
amdgpu_vamgr_deinit(&va_mgr->vamgr_high_32);
amdgpu_vamgr_deinit(&va_mgr->vamgr_high);
}
drm_public int amdgpu_va_manager_query_sw_info(struct amdgpu_va_manager *va_mgr,
enum amdgpu_va_manager_sw_info info,
void *value)
{
uint32_t *val32 = (uint32_t*)value;
switch (info) {
case amdgpu_va_manager_sw_info_address_prt_wa_control_bit:
*val32 = va_mgr->address_prt_wa_control_bit;
return 0;
}
return -EINVAL;
}

View file

@ -31,4 +31,25 @@
#define __align_mask(value, mask) (((value) + (mask)) & ~(mask))
#define ALIGN(value, alignment) __align_mask(value, (__typeof__(value))((alignment) - 1))
static inline unsigned
util_last_bit64(uint64_t u)
{
#if defined(HAVE___BUILTIN_CLZLL)
return u == 0 ? 0 : 64 - __builtin_clzll(u);
#elif defined(_MSC_VER) && (_M_AMD64 || _M_ARM64 || _M_IA64)
unsigned long index;
if (_BitScanReverse64(&index, u))
return index + 1;
else
return 0;
#else
unsigned r = 0;
while (u) {
r++;
u >>= 1;
}
return r;
#endif
}
#endif /*_UTIL_MATH_H_*/