From a0a9f9dda143d2e4de5121ceb7e762545fc4fb33 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 26 May 2026 16:21:05 +0200 Subject: [PATCH] radv/amdgpu: rework tracking allocated memory for budget Deduplicating the winsys just for budget looks more like a hack than a real implementation. Reworking tracking allocated memory to remove the dedup. Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c | 18 ++--- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 4 +- .../vulkan/winsys/amdgpu/radv_amdgpu_winsys.c | 72 ++++++++++++++++++- .../vulkan/winsys/amdgpu/radv_amdgpu_winsys.h | 19 ++++- 4 files changed, 96 insertions(+), 17 deletions(-) diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c index 222a50b3c30..1d7175df04b 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c @@ -388,14 +388,14 @@ radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo if (bo->base.initial_domain & RADEON_DOMAIN_VRAM) { if (bo->base.vram_no_cpu_access) { - p_atomic_add(&ws->allocated_vram, -align64(bo->base.size, ws->info.gart_page_size)); + p_atomic_add(&ws->alloc_tracker->allocated_vram, -align64(bo->base.size, ws->info.gart_page_size)); } else { - p_atomic_add(&ws->allocated_vram_vis, -align64(bo->base.size, ws->info.gart_page_size)); + p_atomic_add(&ws->alloc_tracker->allocated_vram_vis, -align64(bo->base.size, ws->info.gart_page_size)); } } if (bo->base.initial_domain & RADEON_DOMAIN_GTT) - p_atomic_add(&ws->allocated_gtt, -align64(bo->base.size, ws->info.gart_page_size)); + p_atomic_add(&ws->alloc_tracker->allocated_gtt, -align64(bo->base.size, ws->info.gart_page_size)); ac_drm_va_range_free(bo->va_handle); FREE(bo); @@ -642,14 +642,14 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned * visible counter because they can be mapped. */ if (bo->base.vram_no_cpu_access) { - p_atomic_add(&ws->allocated_vram, align64(bo->base.size, ws->info.gart_page_size)); + p_atomic_add(&ws->alloc_tracker->allocated_vram, align64(bo->base.size, ws->info.gart_page_size)); } else { - p_atomic_add(&ws->allocated_vram_vis, align64(bo->base.size, ws->info.gart_page_size)); + p_atomic_add(&ws->alloc_tracker->allocated_vram_vis, align64(bo->base.size, ws->info.gart_page_size)); } } if (initial_domain & RADEON_DOMAIN_GTT) - p_atomic_add(&ws->allocated_gtt, align64(bo->base.size, ws->info.gart_page_size)); + p_atomic_add(&ws->alloc_tracker->allocated_gtt, align64(bo->base.size, ws->info.gart_page_size)); if (ws->debug_all_bos) radv_amdgpu_global_bo_list_add(ws, bo); @@ -817,7 +817,7 @@ radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws, void *pointer, uint64_ bo->cpu_map = NULL; bo->base.obj_id = (uintptr_t)(buf_handle.abo); - p_atomic_add(&ws->allocated_gtt, align64(bo->base.size, ws->info.gart_page_size)); + p_atomic_add(&ws->alloc_tracker->allocated_gtt, align64(bo->base.size, ws->info.gart_page_size)); if (ws->debug_all_bos) radv_amdgpu_global_bo_list_add(ws, bo); @@ -910,9 +910,9 @@ radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, int fd, unsigned priori bo->base.obj_id = (uintptr_t)(result.bo.abo); if (bo->base.initial_domain & RADEON_DOMAIN_VRAM) - p_atomic_add(&ws->allocated_vram, align64(bo->base.size, ws->info.gart_page_size)); + p_atomic_add(&ws->alloc_tracker->allocated_vram, align64(bo->base.size, ws->info.gart_page_size)); if (bo->base.initial_domain & RADEON_DOMAIN_GTT) - p_atomic_add(&ws->allocated_gtt, align64(bo->base.size, ws->info.gart_page_size)); + p_atomic_add(&ws->alloc_tracker->allocated_gtt, align64(bo->base.size, ws->info.gart_page_size)); if (ws->debug_all_bos) radv_amdgpu_global_bo_list_add(ws, bo); diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index 97ebb56e357..7c3de6517e3 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -203,8 +203,8 @@ radv_amdgpu_cs_domain(const struct radeon_winsys *_ws) { const struct radv_amdgpu_winsys *ws = (const struct radv_amdgpu_winsys *)_ws; - bool enough_vram = ws->info.all_vram_visible || - p_atomic_read_relaxed(&ws->allocated_vram_vis) * 2 <= (uint64_t)ws->info.vram_vis_size_kb * 1024; + bool enough_vram = ws->info.all_vram_visible || p_atomic_read_relaxed(&ws->alloc_tracker->allocated_vram_vis) * 2 <= + (uint64_t)ws->info.vram_vis_size_kb * 1024; /* Bandwidth should be equivalent to at least PCIe 3.0 x8. * If there is no PCIe info, assume there is enough bandwidth. diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c index 5b56342b43d..6c57c4b4c46 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c @@ -19,6 +19,7 @@ #include "radv_amdgpu_winsys_public.h" #include "radv_debug.h" #include "vk_drm_syncobj.h" +#include "util/hash_table.h" #include "util/u_memory.h" static void @@ -36,11 +37,11 @@ radv_amdgpu_winsys_query_value(struct radeon_winsys *rws, enum radeon_value_id v switch (value) { case RADEON_ALLOCATED_VRAM: - return ws->allocated_vram; + return ws->alloc_tracker->allocated_vram; case RADEON_ALLOCATED_VRAM_VIS: - return ws->allocated_vram_vis; + return ws->alloc_tracker->allocated_vram_vis; case RADEON_ALLOCATED_GTT: - return ws->allocated_gtt; + return ws->alloc_tracker->allocated_gtt; case RADEON_TIMESTAMP: ac_drm_query_info(ws->dev, AMDGPU_INFO_TIMESTAMP, 8, &retval); return retval; @@ -110,6 +111,61 @@ radv_amdgpu_winsys_query_gpuvm_fault(struct radeon_winsys *rws, struct radv_wins return true; } +static simple_mtx_t tracker_mutex = SIMPLE_MTX_INITIALIZER; +static struct hash_table *alloc_trackers = NULL; + +static struct radv_amdgpu_alloc_tracker * +radv_amdgpu_alloc_tracker_acquire(uintptr_t cookie) +{ + struct radv_amdgpu_alloc_tracker *tracker = NULL; + + simple_mtx_lock(&tracker_mutex); + + if (!alloc_trackers) + alloc_trackers = _mesa_pointer_hash_table_create(NULL); + if (!alloc_trackers) { + simple_mtx_unlock(&tracker_mutex); + return NULL; + } + + struct hash_entry *entry = _mesa_hash_table_search(alloc_trackers, (void *)cookie); + if (entry) { + tracker = entry->data; + tracker->refcount++; + } else { + tracker = calloc(1, sizeof(*tracker)); + if (!tracker) { + simple_mtx_unlock(&tracker_mutex); + return NULL; + } + + tracker->refcount = 1; + tracker->cookie = cookie; /* used for release. */ + _mesa_hash_table_insert(alloc_trackers, (void *)cookie, tracker); + } + + simple_mtx_unlock(&tracker_mutex); + return tracker; +} + +static void +radv_amdgpu_alloc_tracker_release(struct radv_amdgpu_alloc_tracker *tracker) +{ + simple_mtx_lock(&tracker_mutex); + + if (!--tracker->refcount) { + _mesa_hash_table_remove_key(alloc_trackers, (void *)tracker->cookie); + free(tracker); + + if (_mesa_hash_table_num_entries(alloc_trackers) == 0) { + _mesa_hash_table_destroy(alloc_trackers, NULL); + alloc_trackers = NULL; + } + } + + simple_mtx_unlock(&tracker_mutex); +} + static simple_mtx_t winsys_creation_mutex = SIMPLE_MTX_INITIALIZER; static struct hash_table *winsyses = NULL; @@ -177,6 +233,8 @@ radv_amdgpu_winsys_destroy(struct radeon_winsys *rws) radv_amdgpu_null_prt_bug_finish(rws); + radv_amdgpu_alloc_tracker_release(ws->alloc_tracker); + ac_drm_device_deinitialize(ws->dev); FREE(rws); } @@ -293,6 +351,12 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags, ws->info.drm_minor = drm_minor; ws->info.is_virtio = is_virtio; + ws->alloc_tracker = radv_amdgpu_alloc_tracker_acquire(ac_drm_device_get_cookie(dev)); + if (!ws->alloc_tracker) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto winsys_fail; + } + enum ac_query_gpu_info_result info_result = ac_query_gpu_info(ws->fd, ws->dev, &ws->info, true, !(debug_flags & RADV_DEBUG_NO_CACHE_COMPAT)); if (info_result != AC_QUERY_GPU_INFO_SUCCESS) { @@ -362,6 +426,8 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags, return result; winsys_fail: + if (ws->alloc_tracker) + radv_amdgpu_alloc_tracker_release(ws->alloc_tracker); free(ws); fail: if (winsyses && _mesa_hash_table_num_entries(winsyses) == 0) { diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h index 9db76fc9d0d..01bf2c57816 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h @@ -22,6 +22,21 @@ #include "vk_sync.h" #include "vk_sync_timeline.h" +/** + * Process-global per-GPU allocation tracker. + * + * Tracks userspace BO allocation counters across all winsys instances for + * the same GPU within this process. This ensures VK_EXT_memory_budget + * reports correct process-wide usage even with multiple VkInstance objects. + */ +struct radv_amdgpu_alloc_tracker { + uintptr_t cookie; + alignas(8) uint64_t allocated_vram; + alignas(8) uint64_t allocated_vram_vis; + alignas(8) uint64_t allocated_gtt; + uint32_t refcount; +}; + struct radv_amdgpu_winsys { struct radeon_winsys base; ac_drm_device *dev; @@ -38,9 +53,7 @@ struct radv_amdgpu_winsys { bool debug_vm; uint64_t perftest; - alignas(8) uint64_t allocated_vram; - alignas(8) uint64_t allocated_vram_vis; - alignas(8) uint64_t allocated_gtt; + struct radv_amdgpu_alloc_tracker *alloc_tracker; /* Global BO list */ struct {