diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h index 80d370c6803..b2542707462 100644 --- a/src/amd/vulkan/radv_radeon_winsys.h +++ b/src/amd/vulkan/radv_radeon_winsys.h @@ -52,6 +52,7 @@ enum radeon_bo_flag { /* bitfield */ RADEON_FLAG_REPLAYABLE = (1 << 11), RADEON_FLAG_DISCARDABLE = (1 << 12), RADEON_FLAG_GFX12_ALLOW_DCC = (1 << 13), + RADEON_FLAG_VM_UPDATE_WAIT = (1 << 14), }; enum radeon_ctx_priority { diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c index 0be2a3395f0..cdd2961557f 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c @@ -35,6 +35,8 @@ radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, uint32_t bo_handle, uint64_t uint32_t bo_flags, uint64_t internal_flags, uint32_t ops) { uint64_t flags = internal_flags; + int r; + if (bo_handle) { flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_EXECUTABLE; @@ -47,7 +49,35 @@ radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, uint32_t bo_handle, uint64_t size = align64(size, getpagesize()); - return ac_drm_bo_va_op_raw(ws->dev, bo_handle, offset, size, addr, flags, ops); + if (bo_flags & RADEON_FLAG_VM_UPDATE_WAIT) { + /* Wait for VM MAP updates when requested instead of delaying the updates at submit time. + * This is a workaround to mitigate application bugs like use-before-alloc. Note that there is + * still a very short period of time where the submit could start before the VM MAP updates + * are actually done but this is deep UB territory. Also the BO VA will be only visible to the + * application after VM updates are done, so it should be safe in most scenarios. + */ + assert(ops == AMDGPU_VA_OP_MAP); + + simple_mtx_lock(&ws->vm_ioctl_lock); + + uint64_t vm_timeline_point = ++ws->vm_timeline_seq_num; + + r = ac_drm_bo_va_op_raw2(ws->dev, bo_handle, offset, size, addr, flags, ops, ws->vm_timeline_syncobj, + vm_timeline_point, 0, 0); + + simple_mtx_unlock(&ws->vm_ioctl_lock); + + if (r) + return r; + + r = ac_drm_cs_syncobj_timeline_wait(ws->dev, &ws->vm_timeline_syncobj, &vm_timeline_point, 1, INT64_MAX, + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL | DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, + NULL); + } else { + r = ac_drm_bo_va_op_raw(ws->dev, bo_handle, offset, size, addr, flags, ops); + } + + return r; } static int diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c index 2e2e6954e23..f193b95301a 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c @@ -148,6 +148,9 @@ radv_amdgpu_winsys_destroy(struct radeon_winsys *rws) u_rwlock_destroy(&ws->global_bo_list.lock); free(ws->global_bo_list.bos); + ac_drm_cs_destroy_syncobj(ws->dev, ws->vm_timeline_syncobj); + simple_mtx_destroy(&ws->vm_ioctl_lock); + if (ws->reserve_vmid) ac_drm_vm_unreserve_vmid(ws->dev, 0); @@ -323,6 +326,11 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags, ws->sync_types[num_sync_types++] = NULL; assert(num_sync_types <= ARRAY_SIZE(ws->sync_types)); + if (ac_drm_cs_create_syncobj2(ws->dev, 0, &ws->vm_timeline_syncobj)) + goto winsys_fail; + + simple_mtx_init(&ws->vm_ioctl_lock, mtx_plain); + ws->perftest = perftest_flags; ws->zero_all_vram_allocs = debug_flags & RADV_DEBUG_ZERO_VRAM; u_rwlock_init(&ws->global_bo_list.lock); diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h index 37d7d32d8b3..78cfff68126 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h @@ -14,6 +14,7 @@ #include #include "util/list.h" #include "util/rwlock.h" +#include "util/simple_mtx.h" #include "ac_gpu_info.h" #include "ac_linux_drm.h" #include "radv_radeon_winsys.h" @@ -56,6 +57,10 @@ struct radv_amdgpu_winsys { struct vk_sync_type syncobj_sync_type; struct vk_sync_timeline_type emulated_timeline_sync_type; + simple_mtx_t vm_ioctl_lock; + uint32_t vm_timeline_syncobj; + uint64_t vm_timeline_seq_num; + uint32_t refcount; };