radv/amdgpu: add a way to wait for VM updates at alloc time

RADEON_FLAG_VM_UPDATE_WAIT can be passed to wait for VM updates at allocation time instead of delaying them at submit time. There is no reason to delay the waiting when the memory is bound to images/buffers because in DX12 ressources are allocated and bound immediately. This flag will be used to workaround an use-before-alloc in FH5 (game bug) which causes GPU hangs. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38031>
2026-02-22 05:30:31 +01:00 · 2025-10-23 17:55:57 +02:00 · 2025-10-23 17:55:57 +02:00 · fc0cfaae1c
commit fc0cfaae1c
parent bbaffa22ff
4 changed files with 45 additions and 1 deletions
--- a/src/amd/vulkan/radv_radeon_winsys.h
+++ b/src/amd/vulkan/radv_radeon_winsys.h
@ -52,6 +52,7 @@ enum radeon_bo_flag { /* bitfield */
                      RADEON_FLAG_REPLAYABLE = (1 << 11),
                      RADEON_FLAG_DISCARDABLE = (1 << 12),
                      RADEON_FLAG_GFX12_ALLOW_DCC = (1 << 13),
+                      RADEON_FLAG_VM_UPDATE_WAIT = (1 << 14),
 };

 enum radeon_ctx_priority {
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
@ -35,6 +35,8 @@ radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, uint32_t bo_handle, uint64_t
                     uint32_t bo_flags, uint64_t internal_flags, uint32_t ops)
 {
   uint64_t flags = internal_flags;
+   int r;
+
   if (bo_handle) {
      flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_EXECUTABLE;

@ -47,7 +49,35 @@ radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, uint32_t bo_handle, uint64_t

   size = align64(size, getpagesize());

-   return ac_drm_bo_va_op_raw(ws->dev, bo_handle, offset, size, addr, flags, ops);
+   if (bo_flags & RADEON_FLAG_VM_UPDATE_WAIT) {
+      /* Wait for VM MAP updates when requested instead of delaying the updates at submit time.
+       * This is a workaround to mitigate application bugs like use-before-alloc. Note that there is
+       * still a very short period of time where the submit could start before the VM MAP updates
+       * are actually done but this is deep UB territory. Also the BO VA will be only visible to the
+       * application after VM updates are done, so it should be safe in most scenarios.
+       */
+      assert(ops == AMDGPU_VA_OP_MAP);
+
+      simple_mtx_lock(&ws->vm_ioctl_lock);
+
+      uint64_t vm_timeline_point = ++ws->vm_timeline_seq_num;
+
+      r = ac_drm_bo_va_op_raw2(ws->dev, bo_handle, offset, size, addr, flags, ops, ws->vm_timeline_syncobj,
+                               vm_timeline_point, 0, 0);
+
+      simple_mtx_unlock(&ws->vm_ioctl_lock);
+
+      if (r)
+         return r;
+
+      r = ac_drm_cs_syncobj_timeline_wait(ws->dev, &ws->vm_timeline_syncobj, &vm_timeline_point, 1, INT64_MAX,
+                                          DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL | DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+                                          NULL);
+   } else {
+      r = ac_drm_bo_va_op_raw(ws->dev, bo_handle, offset, size, addr, flags, ops);
+   }
+
+   return r;
 }

 static int
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
@ -148,6 +148,9 @@ radv_amdgpu_winsys_destroy(struct radeon_winsys *rws)
   u_rwlock_destroy(&ws->global_bo_list.lock);
   free(ws->global_bo_list.bos);

+   ac_drm_cs_destroy_syncobj(ws->dev, ws->vm_timeline_syncobj);
+   simple_mtx_destroy(&ws->vm_ioctl_lock);
+
   if (ws->reserve_vmid)
      ac_drm_vm_unreserve_vmid(ws->dev, 0);

@ -323,6 +326,11 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags,
   ws->sync_types[num_sync_types++] = NULL;
   assert(num_sync_types <= ARRAY_SIZE(ws->sync_types));

+   if (ac_drm_cs_create_syncobj2(ws->dev, 0, &ws->vm_timeline_syncobj))
+      goto winsys_fail;
+
+   simple_mtx_init(&ws->vm_ioctl_lock, mtx_plain);
+
   ws->perftest = perftest_flags;
   ws->zero_all_vram_allocs = debug_flags & RADV_DEBUG_ZERO_VRAM;
   u_rwlock_init(&ws->global_bo_list.lock);
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h
@ -14,6 +14,7 @@
 #include <pthread.h>
 #include "util/list.h"
 #include "util/rwlock.h"
+#include "util/simple_mtx.h"
 #include "ac_gpu_info.h"
 #include "ac_linux_drm.h"
 #include "radv_radeon_winsys.h"
@ -56,6 +57,10 @@ struct radv_amdgpu_winsys {
   struct vk_sync_type syncobj_sync_type;
   struct vk_sync_timeline_type emulated_timeline_sync_type;

+   simple_mtx_t vm_ioctl_lock;
+   uint32_t vm_timeline_syncobj;
+   uint64_t vm_timeline_seq_num;
+
   uint32_t refcount;
 };