diff --git a/amdgpu/amdgpu-symbols.txt b/amdgpu/amdgpu-symbols.txt index 8cd5559c..60c7d065 100644 --- a/amdgpu/amdgpu-symbols.txt +++ b/amdgpu/amdgpu-symbols.txt @@ -76,7 +76,9 @@ amdgpu_query_video_caps_info amdgpu_read_mm_registers amdgpu_va_manager_alloc amdgpu_va_manager_init +amdgpu_va_manager_init2 amdgpu_va_manager_deinit +amdgpu_va_manager_query_sw_info amdgpu_va_range_alloc amdgpu_va_range_alloc2 amdgpu_va_range_free diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h index 6fb2b60b..6efb7a7d 100644 --- a/amdgpu/amdgpu.h +++ b/amdgpu/amdgpu.h @@ -105,6 +105,19 @@ enum amdgpu_gpu_va_range enum amdgpu_sw_info { amdgpu_sw_info_address32_hi = 0, + /** Query the PRT control bit when the half VA range is reserved for + * PRT when the device is initialized. This depends on the GFX version. + * A return value of ~0 should be ignored. + */ + amdgpu_sw_info_address_prt_wa_control_bit = 1, +}; + +enum amdgpu_va_manager_sw_info { + /** Query the PRT control bit when the half VA range is reserved for + * PRT with AMDGPU_VA_MGR_RESERVE_HALF_VA_FOR_PRT. The default value of + * ~0 shouldn't be considered a valid value. + */ + amdgpu_va_manager_sw_info_address_prt_wa_control_bit = 0, }; /*--------------------------------------------------------------------------*/ @@ -1467,6 +1480,14 @@ void amdgpu_va_manager_init(amdgpu_va_manager_handle va_mgr, uint64_t high_va_offset, uint64_t high_va_max, uint32_t virtual_address_alignment); +#define AMDGPU_VA_MGR_RESERVE_HALF_VA_FOR_PRT 0x1 + +void amdgpu_va_manager_init2(struct amdgpu_va_manager *va_mgr, + uint64_t low_va_offset, uint64_t low_va_max, + uint64_t high_va_offset, uint64_t high_va_max, + uint32_t virtual_address_alignment, + uint32_t flags); + void amdgpu_va_manager_deinit(amdgpu_va_manager_handle va_mgr); /** @@ -1484,6 +1505,21 @@ int amdgpu_va_range_alloc2(amdgpu_va_manager_handle va_mgr, amdgpu_va_handle *va_range_handle, uint64_t flags); +/** + * Query VA manager information. + * + * \param va_mgr - \c [in] VA manager + * \param info - \c [in] amdgpu_va_manager_sw_info_* + * \param value - \c [out] Pointer to the return value. + * + * \return 0 on success\n + * <0 - Negative POSIX error code + * +*/ +int amdgpu_va_manager_query_sw_info(struct amdgpu_va_manager *va_mgr, + enum amdgpu_va_manager_sw_info info, + void *value); + /** * VA mapping/unmapping for the buffer object * diff --git a/amdgpu/amdgpu_device.c b/amdgpu/amdgpu_device.c index 34463b53..b7d5589a 100644 --- a/amdgpu/amdgpu_device.c +++ b/amdgpu/amdgpu_device.c @@ -144,6 +144,39 @@ static void amdgpu_device_reference(struct amdgpu_device **dst, *dst = src; } +static int amdgpu_query_gfx_level_major(amdgpu_device_handle dev, + uint8_t *gfx_ip_version_major) +{ + struct drm_amdgpu_info_hw_ip ip_info; + uint32_t gfx_ip_count = 0; + int r; + + *gfx_ip_version_major = 0; + + r = amdgpu_query_hw_ip_count(dev, AMDGPU_HW_IP_GFX, &gfx_ip_count); + if (r) + return r; + + /* No graphics support. */ + if (gfx_ip_count == 0) + return 0; + + memset(&ip_info, 0, sizeof(ip_info)); + + r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_GFX, 0, &ip_info); + if (r) + return r; + + /* GFX6-8 don't set ip_discovery_version. */ + if (dev->minor_version >= 48 && ip_info.ip_discovery_version) { + *gfx_ip_version_major = (ip_info.ip_discovery_version >> 16) & 0xff; + } else { + *gfx_ip_version_major = ip_info.hw_ip_version_major; + } + + return r; +} + static int _amdgpu_device_initialize(int fd, uint32_t *major_version, uint32_t *minor_version, @@ -151,11 +184,13 @@ static int _amdgpu_device_initialize(int fd, bool deduplicate_device) { struct amdgpu_device *dev = NULL; + uint8_t gfx_ip_version_major = 0; drmVersionPtr version; int r; int flag_auth = 0; int flag_authexist=0; uint32_t accel_working = 0; + uint32_t va_mgr_flags = 0; *device_handle = NULL; @@ -244,12 +279,25 @@ static int _amdgpu_device_initialize(int fd, goto cleanup; } - amdgpu_va_manager_init(&dev->va_mgr, - dev->dev_info.virtual_address_offset, - dev->dev_info.virtual_address_max, - dev->dev_info.high_va_offset, - dev->dev_info.high_va_max, - dev->dev_info.virtual_address_alignment); + r = amdgpu_query_gfx_level_major(dev, &gfx_ip_version_major); + if (!r) { + /* Split the HIGH addr space for GFX6-GFX12, except GFX9 to + * implement a workaround for SMEM loads with NULL PRT pages. + * This is silently ignored if querying the GFX level failed. + */ + if (gfx_ip_version_major >= 6 && gfx_ip_version_major <= 12 && + gfx_ip_version_major != 9) { + va_mgr_flags |= AMDGPU_VA_MGR_RESERVE_HALF_VA_FOR_PRT; + } + } + + amdgpu_va_manager_init2(&dev->va_mgr, + dev->dev_info.virtual_address_offset, + dev->dev_info.virtual_address_max, + dev->dev_info.high_va_offset, + dev->dev_info.high_va_max, + dev->dev_info.virtual_address_alignment, + va_mgr_flags); amdgpu_parse_asic_ids(dev); @@ -322,6 +370,9 @@ drm_public int amdgpu_query_sw_info(amdgpu_device_handle dev, else *val32 = (dev->va_mgr.vamgr_32.va_max - 1) >> 32; return 0; + case amdgpu_sw_info_address_prt_wa_control_bit: + *val32 = dev->va_mgr.address_prt_wa_control_bit; + return 0; } return -EINVAL; } diff --git a/amdgpu/amdgpu_internal.h b/amdgpu/amdgpu_internal.h index af85b849..593b9f81 100644 --- a/amdgpu/amdgpu_internal.h +++ b/amdgpu/amdgpu_internal.h @@ -72,6 +72,11 @@ struct amdgpu_va_manager { struct amdgpu_bo_va_mgr vamgr_high; /** The VA manager for the 32bit high address space */ struct amdgpu_bo_va_mgr vamgr_high_32; + + /** The bit to control whether it's the "LOW" or "HIGH" halves, when + * half of the address space is reserved for PRT to implement a SW + * workaround. */ + unsigned address_prt_wa_control_bit; }; struct amdgpu_device { diff --git a/amdgpu/amdgpu_vamgr.c b/amdgpu/amdgpu_vamgr.c index 29944ec8..6e9cb654 100644 --- a/amdgpu/amdgpu_vamgr.c +++ b/amdgpu/amdgpu_vamgr.c @@ -323,19 +323,42 @@ drm_public amdgpu_va_manager_handle amdgpu_va_manager_alloc(void) } drm_public void amdgpu_va_manager_init(struct amdgpu_va_manager *va_mgr, + uint64_t low_va_offset, uint64_t low_va_max, + uint64_t high_va_offset, uint64_t high_va_max, + uint32_t virtual_address_alignment) +{ + amdgpu_va_manager_init2(va_mgr, low_va_offset, low_va_max, + high_va_offset, high_va_max, + virtual_address_alignment, 0); +} + +drm_public void amdgpu_va_manager_init2(struct amdgpu_va_manager *va_mgr, uint64_t low_va_offset, uint64_t low_va_max, uint64_t high_va_offset, uint64_t high_va_max, - uint32_t virtual_address_alignment) + uint32_t virtual_address_alignment, + uint32_t flags) { uint64_t start, max; + va_mgr->address_prt_wa_control_bit = ~0; + start = low_va_offset; max = MIN2(low_va_max, 0x100000000ULL); amdgpu_vamgr_init(&va_mgr->vamgr_32, start, max, virtual_address_alignment); start = max; - max = MAX2(low_va_max, 0x100000000ULL); + if ((flags & AMDGPU_VA_MGR_RESERVE_HALF_VA_FOR_PRT) && !high_va_max) { + /* Reserve the half VA range for PRT by splitting it in two + * equal halves where one bit controls whether it's the LOW or + * HIGH half. + */ + va_mgr->address_prt_wa_control_bit = util_last_bit64(low_va_offset ^ low_va_max) - 1; + max = low_va_max ^ (1ull << va_mgr->address_prt_wa_control_bit); + } else { + max = MAX2(low_va_max, 0x100000000ULL); + } + amdgpu_vamgr_init(&va_mgr->vamgr_low, start, max, virtual_address_alignment); @@ -345,7 +368,17 @@ drm_public void amdgpu_va_manager_init(struct amdgpu_va_manager *va_mgr, virtual_address_alignment); start = max; - max = MAX2(high_va_max, (start & ~0xffffffffULL) + 0x100000000ULL); + if ((flags & AMDGPU_VA_MGR_RESERVE_HALF_VA_FOR_PRT) && high_va_max) { + /* Reserve the half VA range for PRT by splitting it in two + * equal halves where one bit controls whether it's the LOW or + * HIGH half. + */ + va_mgr->address_prt_wa_control_bit = util_last_bit64(high_va_offset ^ high_va_max) - 1; + max = high_va_max ^ (1ull << va_mgr->address_prt_wa_control_bit); + } else { + max = MAX2(high_va_max, (start & ~0xffffffffULL) + 0x100000000ULL); + } + amdgpu_vamgr_init(&va_mgr->vamgr_high, start, max, virtual_address_alignment); } @@ -357,3 +390,17 @@ drm_public void amdgpu_va_manager_deinit(struct amdgpu_va_manager *va_mgr) amdgpu_vamgr_deinit(&va_mgr->vamgr_high_32); amdgpu_vamgr_deinit(&va_mgr->vamgr_high); } + +drm_public int amdgpu_va_manager_query_sw_info(struct amdgpu_va_manager *va_mgr, + enum amdgpu_va_manager_sw_info info, + void *value) +{ + uint32_t *val32 = (uint32_t*)value; + + switch (info) { + case amdgpu_va_manager_sw_info_address_prt_wa_control_bit: + *val32 = va_mgr->address_prt_wa_control_bit; + return 0; + } + return -EINVAL; +} diff --git a/util_math.h b/util_math.h index 35bf4512..1799545b 100644 --- a/util_math.h +++ b/util_math.h @@ -31,4 +31,25 @@ #define __align_mask(value, mask) (((value) + (mask)) & ~(mask)) #define ALIGN(value, alignment) __align_mask(value, (__typeof__(value))((alignment) - 1)) +static inline unsigned +util_last_bit64(uint64_t u) +{ +#if defined(HAVE___BUILTIN_CLZLL) + return u == 0 ? 0 : 64 - __builtin_clzll(u); +#elif defined(_MSC_VER) && (_M_AMD64 || _M_ARM64 || _M_IA64) + unsigned long index; + if (_BitScanReverse64(&index, u)) + return index + 1; + else + return 0; +#else + unsigned r = 0; + while (u) { + r++; + u >>= 1; + } + return r; +#endif +} + #endif /*_UTIL_MATH_H_*/