radv: Return VK_ERROR_INCOMPATIBLE_DRIVER for unsupported devices
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

VK_ERROR_INITIALIZATION_FAILED will fail physical device enumeration.
Returning VK_ERROR_INCOMPATIBLE_DRIVER means that the driver can still
be used on supported GPUs when multiple GPUs are installed.

cc: mesa-stable

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34783>
This commit is contained in:
Konstantin Seurer 2025-05-01 09:23:49 +02:00
parent 5d72ebf3e7
commit 84b9c281fe
6 changed files with 80 additions and 63 deletions

View file

@ -513,8 +513,9 @@ static void handle_env_var_force_family(struct radeon_info *info)
exit(1);
}
bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
bool require_pci_bus_info)
enum ac_query_gpu_info_result
ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
bool require_pci_bus_info)
{
struct amdgpu_gpu_info amdinfo;
struct drm_amdgpu_info_device device_info = {0};
@ -538,7 +539,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
if (!ac_query_pci_bus_info(fd, info)) {
if (require_pci_bus_info)
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
assert(info->drm_major == 3);
@ -548,27 +549,27 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
fprintf(stderr, "amdgpu: DRM version is %u.%u.%u, but this driver is "
"only compatible with 3.42.0 (kernel 5.15+) or later.\n",
info->drm_major, info->drm_minor, info->drm_patchlevel);
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
uint64_t cap;
r = drmGetCap(fd, DRM_CAP_SYNCOBJ, &cap);
if (r != 0 || cap == 0) {
fprintf(stderr, "amdgpu: syncobj support is missing but is required.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
/* Query hardware and driver information. */
r = ac_drm_query_gpu_info(dev, &amdinfo);
if (r) {
fprintf(stderr, "amdgpu: ac_drm_query_gpu_info failed.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
r = ac_drm_query_info(dev, AMDGPU_INFO_DEV_INFO, sizeof(device_info), &device_info);
if (r) {
fprintf(stderr, "amdgpu: ac_drm_query_info(dev_info) failed.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
info->userq_ip_mask = device_info.userq_ip_mask;
@ -650,35 +651,35 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
/* Only require gfx or compute. */
if (!info->ip[AMD_IP_GFX].num_queues && !info->ip[AMD_IP_COMPUTE].num_queues) {
fprintf(stderr, "amdgpu: failed to find gfx or compute.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
r = ac_drm_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_ME, 0, 0, &info->me_fw_version,
&info->me_fw_feature);
if (r) {
fprintf(stderr, "amdgpu: ac_drm_query_firmware_version(me) failed.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
r = ac_drm_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_MEC, 0, 0, &info->mec_fw_version,
&info->mec_fw_feature);
if (r) {
fprintf(stderr, "amdgpu: ac_drm_query_firmware_version(mec) failed.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
r = ac_drm_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_PFP, 0, 0, &info->pfp_fw_version,
&info->pfp_fw_feature);
if (r) {
fprintf(stderr, "amdgpu: ac_drm_query_firmware_version(pfp) failed.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
if (info->ip[AMD_IP_VCN_DEC].num_queues || info->ip[AMD_IP_VCN_UNIFIED].num_queues) {
r = ac_drm_query_firmware_version(dev, AMDGPU_INFO_FW_VCN, 0, 0, &vidip_fw_version, &vidip_fw_feature);
if (r) {
fprintf(stderr, "amdgpu: ac_drm_query_firmware_version(vcn) failed.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
} else {
info->vcn_dec_version = (vidip_fw_version & 0x0F000000) >> 24;
info->vcn_enc_major_version = (vidip_fw_version & 0x00F00000) >> 20;
@ -689,7 +690,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
r = ac_drm_query_firmware_version(dev, AMDGPU_INFO_FW_VCE, 0, 0, &vidip_fw_version, &vidip_fw_feature);
if (r) {
fprintf(stderr, "amdgpu: ac_drm_query_firmware_version(vce) failed.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
} else
info->vce_fw_version = vidip_fw_version;
}
@ -698,7 +699,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
r = ac_drm_query_firmware_version(dev, AMDGPU_INFO_FW_UVD, 0, 0, &vidip_fw_version, &vidip_fw_feature);
if (r) {
fprintf(stderr, "amdgpu: ac_drm_query_firmware_version(uvd) failed.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
} else
info->uvd_fw_version = vidip_fw_version;
}
@ -707,7 +708,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
r = ac_drm_query_sw_info(dev, amdgpu_sw_info_address32_hi, &info->address32_hi);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_sw_info(address32_hi) failed.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
struct drm_amdgpu_memory_info meminfo = {0};
@ -715,7 +716,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
r = ac_drm_query_info(dev, AMDGPU_INFO_MEMORY, sizeof(meminfo), &meminfo);
if (r) {
fprintf(stderr, "amdgpu: ac_drm_query_info(memory) failed.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
/* Note: usable_heap_size values can be random and can't be relied on. */
@ -854,7 +855,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
else {
fprintf(stderr, "amdgpu: Unknown gfx version: %u.%u\n",
info->ip[AMD_IP_GFX].ver_major, info->ip[AMD_IP_GFX].ver_minor);
return false;
return AC_QUERY_GPU_INFO_UNIMPLEMENTED_HW;
}
info->family_id = device_info.family;
@ -869,7 +870,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
if (!info->name) {
fprintf(stderr, "amdgpu: unknown (family_id, chip_external_rev): (%u, %u)\n",
device_info.family, device_info.external_rev);
return false;
return AC_QUERY_GPU_INFO_UNIMPLEMENTED_HW;
}
memset(info->lowercase_name, 0, sizeof(info->lowercase_name));
@ -1703,7 +1704,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
r = ac_drm_query_uq_fw_area_info(dev, AMDGPU_HW_IP_GFX, 0, &fw_info);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_uq_fw_area_info() failed.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
info->has_fw_based_shadowing = true;
@ -1848,7 +1849,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
exit(0);
}
}
return true;
return AC_QUERY_GPU_INFO_SUCCESS;
}
void ac_compute_driver_uuid(char *uuid, size_t size)

View file

@ -338,8 +338,14 @@ struct radeon_info {
bool has_image_bvh_intersect_ray;
};
bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
bool require_pci_bus_info);
enum ac_query_gpu_info_result {
AC_QUERY_GPU_INFO_SUCCESS,
AC_QUERY_GPU_INFO_FAIL,
AC_QUERY_GPU_INFO_UNIMPLEMENTED_HW,
};
enum ac_query_gpu_info_result ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
bool require_pci_bus_info);
void ac_compute_driver_uuid(char *uuid, size_t size);

View file

@ -2071,19 +2071,23 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm
#ifdef _WIN32
pdev->ws = radv_null_winsys_create();
if (!pdev->ws)
result = VK_ERROR_OUT_OF_HOST_MEMORY;
#else
if (drm_device) {
bool reserve_vmid = instance->vk.trace_mode & RADV_TRACE_MODE_RGP;
pdev->ws =
radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags, reserve_vmid, is_virtio);
result = radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags, reserve_vmid, is_virtio,
&pdev->ws);
} else {
pdev->ws = radv_null_winsys_create();
if (!pdev->ws)
result = VK_ERROR_OUT_OF_HOST_MEMORY;
}
#endif
if (!pdev->ws) {
result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to initialize winsys");
if (result != VK_SUCCESS) {
result = vk_errorf(instance, result, "failed to initialize winsys");
goto fail_base;
}

View file

@ -22,31 +22,6 @@
#include "vk_drm_syncobj.h"
#include "xf86drm.h"
static bool
do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
{
if (!ac_query_gpu_info(fd, ws->dev, &ws->info, true))
return false;
/*
* Override the max submits on video queues.
* If you submit multiple session contexts in the same IB sequence the
* hardware gets upset as it expects a kernel fence to be emitted to reset
* the session context in the hardware.
* Avoid this problem by never submitted more than one IB at a time.
* This possibly should be fixed in the kernel, and if it is this can be
* resolved.
*/
for (enum amd_ip_type ip_type = AMD_IP_UVD; ip_type <= AMD_IP_VCN_ENC; ip_type++)
ws->info.max_submitted_ibs[ip_type] = 1;
ws->info.ip[AMD_IP_SDMA].num_queues = MIN2(ws->info.ip[AMD_IP_SDMA].num_queues, MAX_RINGS_PER_TYPE);
ws->info.ip[AMD_IP_COMPUTE].num_queues = MIN2(ws->info.ip[AMD_IP_COMPUTE].num_queues, MAX_RINGS_PER_TYPE);
ws->use_ib_bos = true;
return true;
}
static void
radv_amdgpu_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *gpu_info)
{
@ -195,9 +170,12 @@ radv_amdgpu_winsys_get_sync_types(struct radeon_winsys *rws)
return ws->sync_types;
}
struct radeon_winsys *
radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags, bool reserve_vmid, bool is_virtio)
VkResult
radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags, bool reserve_vmid, bool is_virtio,
struct radeon_winsys **winsys)
{
VkResult result = VK_SUCCESS;
uint32_t drm_major, drm_minor, r;
ac_drm_device *dev;
struct radv_amdgpu_winsys *ws = NULL;
@ -205,7 +183,7 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags,
r = ac_drm_device_initialize(fd, is_virtio, &drm_major, &drm_minor, &dev);
if (r) {
fprintf(stderr, "radv/amdgpu: failed to initialize device.\n");
return NULL;
return VK_ERROR_INITIALIZATION_FAILED;
}
/* We have to keep this lock till insertion. */
@ -214,6 +192,7 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags,
winsyses = _mesa_pointer_hash_table_create(NULL);
if (!winsyses) {
fprintf(stderr, "radv/amdgpu: failed to alloc winsys hash table.\n");
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
}
@ -232,19 +211,22 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags,
((debug_flags & RADV_DEBUG_HANG) && !ws->debug_log_bos) ||
((debug_flags & RADV_DEBUG_NO_IBS) && ws->use_ib_bos) || (perftest_flags != ws->perftest)) {
fprintf(stderr, "radv/amdgpu: Found options that differ from the existing winsys.\n");
return NULL;
return VK_ERROR_INITIALIZATION_FAILED;
}
/* RADV_DEBUG_ZERO_VRAM is the only option that is allowed to be set again. */
if (debug_flags & RADV_DEBUG_ZERO_VRAM)
ws->zero_all_vram_allocs = true;
return &ws->base;
*winsys = &ws->base;
return VK_SUCCESS;
}
ws = calloc(1, sizeof(struct radv_amdgpu_winsys));
if (!ws)
if (!ws) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
}
ws->refcount = 1;
ws->dev = dev;
@ -252,8 +234,29 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags,
ws->info.drm_major = drm_major;
ws->info.drm_minor = drm_minor;
ws->info.is_virtio = is_virtio;
if (!do_winsys_init(ws, fd))
enum ac_query_gpu_info_result info_result = ac_query_gpu_info(fd, ws->dev, &ws->info, true);
if (info_result != AC_QUERY_GPU_INFO_SUCCESS) {
result = info_result == AC_QUERY_GPU_INFO_FAIL ? VK_ERROR_INITIALIZATION_FAILED : VK_ERROR_INCOMPATIBLE_DRIVER;
goto winsys_fail;
}
/*
* Override the max submits on video queues.
* If you submit multiple session contexts in the same IB sequence the
* hardware gets upset as it expects a kernel fence to be emitted to reset
* the session context in the hardware.
* Avoid this problem by never submitted more than one IB at a time.
* This possibly should be fixed in the kernel, and if it is this can be
* resolved.
*/
for (enum amd_ip_type ip_type = AMD_IP_UVD; ip_type <= AMD_IP_VCN_ENC; ip_type++)
ws->info.max_submitted_ibs[ip_type] = 1;
ws->info.ip[AMD_IP_SDMA].num_queues = MIN2(ws->info.ip[AMD_IP_SDMA].num_queues, MAX_RINGS_PER_TYPE);
ws->info.ip[AMD_IP_COMPUTE].num_queues = MIN2(ws->info.ip[AMD_IP_COMPUTE].num_queues, MAX_RINGS_PER_TYPE);
ws->use_ib_bos = true;
ws->debug_all_bos = !!(debug_flags & RADV_DEBUG_ALL_BOS);
ws->debug_log_bos = debug_flags & RADV_DEBUG_HANG;
@ -265,6 +268,7 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags,
r = ac_drm_vm_reserve_vmid(ws->dev, 0);
if (r) {
fprintf(stderr, "radv/amdgpu: failed to reserve vmid.\n");
result = VK_ERROR_INITIALIZATION_FAILED;
goto winsys_fail;
}
}
@ -311,7 +315,9 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags,
_mesa_hash_table_insert(winsyses, (void *)ac_drm_device_get_cookie(dev), ws);
simple_mtx_unlock(&winsys_creation_mutex);
return &ws->base;
*winsys = &ws->base;
return result;
winsys_fail:
free(ws);
@ -322,5 +328,5 @@ fail:
}
simple_mtx_unlock(&winsys_creation_mutex);
ac_drm_device_deinitialize(dev);
return NULL;
return result;
}

View file

@ -12,8 +12,8 @@
#ifndef RADV_AMDGPU_WINSYS_PUBLIC_H
#define RADV_AMDGPU_WINSYS_PUBLIC_H
struct radeon_winsys *radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags,
bool reserve_vmid, bool is_virtio);
VkResult radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags, bool reserve_vmid,
bool is_virtio, struct radeon_winsys **winsys);
struct radeon_winsys *radv_dummy_winsys_create(void);

View file

@ -36,7 +36,7 @@ static bool do_winsys_init(struct amdgpu_winsys *aws,
const struct pipe_screen_config *config,
int fd)
{
if (!ac_query_gpu_info(fd, aws->dev, &aws->info, false))
if (ac_query_gpu_info(fd, aws->dev, &aws->info, false) != AC_QUERY_GPU_INFO_SUCCESS)
goto fail;
aws->addrlib = ac_addrlib_create(&aws->info, &aws->info.max_alignment);