radv/amdgpu: add a way to wait for VM updates at alloc time

RADEON_FLAG_VM_UPDATE_WAIT can be passed to wait for VM updates at
allocation time instead of delaying them at submit time. There is no
reason to delay the waiting when the memory is bound to images/buffers
because in DX12 ressources are allocated and bound immediately.

This flag will be used to workaround an use-before-alloc in FH5
(game bug) which causes GPU hangs.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38031>
This commit is contained in:
Samuel Pitoiset 2025-10-23 17:55:57 +02:00 committed by Marge Bot
parent bbaffa22ff
commit fc0cfaae1c
4 changed files with 45 additions and 1 deletions

View file

@ -52,6 +52,7 @@ enum radeon_bo_flag { /* bitfield */
RADEON_FLAG_REPLAYABLE = (1 << 11),
RADEON_FLAG_DISCARDABLE = (1 << 12),
RADEON_FLAG_GFX12_ALLOW_DCC = (1 << 13),
RADEON_FLAG_VM_UPDATE_WAIT = (1 << 14),
};
enum radeon_ctx_priority {

View file

@ -35,6 +35,8 @@ radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, uint32_t bo_handle, uint64_t
uint32_t bo_flags, uint64_t internal_flags, uint32_t ops)
{
uint64_t flags = internal_flags;
int r;
if (bo_handle) {
flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_EXECUTABLE;
@ -47,7 +49,35 @@ radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, uint32_t bo_handle, uint64_t
size = align64(size, getpagesize());
return ac_drm_bo_va_op_raw(ws->dev, bo_handle, offset, size, addr, flags, ops);
if (bo_flags & RADEON_FLAG_VM_UPDATE_WAIT) {
/* Wait for VM MAP updates when requested instead of delaying the updates at submit time.
* This is a workaround to mitigate application bugs like use-before-alloc. Note that there is
* still a very short period of time where the submit could start before the VM MAP updates
* are actually done but this is deep UB territory. Also the BO VA will be only visible to the
* application after VM updates are done, so it should be safe in most scenarios.
*/
assert(ops == AMDGPU_VA_OP_MAP);
simple_mtx_lock(&ws->vm_ioctl_lock);
uint64_t vm_timeline_point = ++ws->vm_timeline_seq_num;
r = ac_drm_bo_va_op_raw2(ws->dev, bo_handle, offset, size, addr, flags, ops, ws->vm_timeline_syncobj,
vm_timeline_point, 0, 0);
simple_mtx_unlock(&ws->vm_ioctl_lock);
if (r)
return r;
r = ac_drm_cs_syncobj_timeline_wait(ws->dev, &ws->vm_timeline_syncobj, &vm_timeline_point, 1, INT64_MAX,
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL | DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
NULL);
} else {
r = ac_drm_bo_va_op_raw(ws->dev, bo_handle, offset, size, addr, flags, ops);
}
return r;
}
static int

View file

@ -148,6 +148,9 @@ radv_amdgpu_winsys_destroy(struct radeon_winsys *rws)
u_rwlock_destroy(&ws->global_bo_list.lock);
free(ws->global_bo_list.bos);
ac_drm_cs_destroy_syncobj(ws->dev, ws->vm_timeline_syncobj);
simple_mtx_destroy(&ws->vm_ioctl_lock);
if (ws->reserve_vmid)
ac_drm_vm_unreserve_vmid(ws->dev, 0);
@ -323,6 +326,11 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags,
ws->sync_types[num_sync_types++] = NULL;
assert(num_sync_types <= ARRAY_SIZE(ws->sync_types));
if (ac_drm_cs_create_syncobj2(ws->dev, 0, &ws->vm_timeline_syncobj))
goto winsys_fail;
simple_mtx_init(&ws->vm_ioctl_lock, mtx_plain);
ws->perftest = perftest_flags;
ws->zero_all_vram_allocs = debug_flags & RADV_DEBUG_ZERO_VRAM;
u_rwlock_init(&ws->global_bo_list.lock);

View file

@ -14,6 +14,7 @@
#include <pthread.h>
#include "util/list.h"
#include "util/rwlock.h"
#include "util/simple_mtx.h"
#include "ac_gpu_info.h"
#include "ac_linux_drm.h"
#include "radv_radeon_winsys.h"
@ -56,6 +57,10 @@ struct radv_amdgpu_winsys {
struct vk_sync_type syncobj_sync_type;
struct vk_sync_timeline_type emulated_timeline_sync_type;
simple_mtx_t vm_ioctl_lock;
uint32_t vm_timeline_syncobj;
uint64_t vm_timeline_seq_num;
uint32_t refcount;
};