radv/amdgpu: rework tracking allocated memory for budget

Deduplicating the winsys just for budget looks more like a hack than
a real implementation. Reworking tracking allocated memory to remove
the dedup.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41805>
This commit is contained in:
Samuel Pitoiset 2026-05-26 16:21:05 +02:00 committed by Marge Bot
parent feb9ac168b
commit a0a9f9dda1
4 changed files with 96 additions and 17 deletions

View file

@ -388,14 +388,14 @@ radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo
if (bo->base.initial_domain & RADEON_DOMAIN_VRAM) {
if (bo->base.vram_no_cpu_access) {
p_atomic_add(&ws->allocated_vram, -align64(bo->base.size, ws->info.gart_page_size));
p_atomic_add(&ws->alloc_tracker->allocated_vram, -align64(bo->base.size, ws->info.gart_page_size));
} else {
p_atomic_add(&ws->allocated_vram_vis, -align64(bo->base.size, ws->info.gart_page_size));
p_atomic_add(&ws->alloc_tracker->allocated_vram_vis, -align64(bo->base.size, ws->info.gart_page_size));
}
}
if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
p_atomic_add(&ws->allocated_gtt, -align64(bo->base.size, ws->info.gart_page_size));
p_atomic_add(&ws->alloc_tracker->allocated_gtt, -align64(bo->base.size, ws->info.gart_page_size));
ac_drm_va_range_free(bo->va_handle);
FREE(bo);
@ -642,14 +642,14 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned
* visible counter because they can be mapped.
*/
if (bo->base.vram_no_cpu_access) {
p_atomic_add(&ws->allocated_vram, align64(bo->base.size, ws->info.gart_page_size));
p_atomic_add(&ws->alloc_tracker->allocated_vram, align64(bo->base.size, ws->info.gart_page_size));
} else {
p_atomic_add(&ws->allocated_vram_vis, align64(bo->base.size, ws->info.gart_page_size));
p_atomic_add(&ws->alloc_tracker->allocated_vram_vis, align64(bo->base.size, ws->info.gart_page_size));
}
}
if (initial_domain & RADEON_DOMAIN_GTT)
p_atomic_add(&ws->allocated_gtt, align64(bo->base.size, ws->info.gart_page_size));
p_atomic_add(&ws->alloc_tracker->allocated_gtt, align64(bo->base.size, ws->info.gart_page_size));
if (ws->debug_all_bos)
radv_amdgpu_global_bo_list_add(ws, bo);
@ -817,7 +817,7 @@ radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws, void *pointer, uint64_
bo->cpu_map = NULL;
bo->base.obj_id = (uintptr_t)(buf_handle.abo);
p_atomic_add(&ws->allocated_gtt, align64(bo->base.size, ws->info.gart_page_size));
p_atomic_add(&ws->alloc_tracker->allocated_gtt, align64(bo->base.size, ws->info.gart_page_size));
if (ws->debug_all_bos)
radv_amdgpu_global_bo_list_add(ws, bo);
@ -910,9 +910,9 @@ radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, int fd, unsigned priori
bo->base.obj_id = (uintptr_t)(result.bo.abo);
if (bo->base.initial_domain & RADEON_DOMAIN_VRAM)
p_atomic_add(&ws->allocated_vram, align64(bo->base.size, ws->info.gart_page_size));
p_atomic_add(&ws->alloc_tracker->allocated_vram, align64(bo->base.size, ws->info.gart_page_size));
if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
p_atomic_add(&ws->allocated_gtt, align64(bo->base.size, ws->info.gart_page_size));
p_atomic_add(&ws->alloc_tracker->allocated_gtt, align64(bo->base.size, ws->info.gart_page_size));
if (ws->debug_all_bos)
radv_amdgpu_global_bo_list_add(ws, bo);

View file

@ -203,8 +203,8 @@ radv_amdgpu_cs_domain(const struct radeon_winsys *_ws)
{
const struct radv_amdgpu_winsys *ws = (const struct radv_amdgpu_winsys *)_ws;
bool enough_vram = ws->info.all_vram_visible ||
p_atomic_read_relaxed(&ws->allocated_vram_vis) * 2 <= (uint64_t)ws->info.vram_vis_size_kb * 1024;
bool enough_vram = ws->info.all_vram_visible || p_atomic_read_relaxed(&ws->alloc_tracker->allocated_vram_vis) * 2 <=
(uint64_t)ws->info.vram_vis_size_kb * 1024;
/* Bandwidth should be equivalent to at least PCIe 3.0 x8.
* If there is no PCIe info, assume there is enough bandwidth.

View file

@ -19,6 +19,7 @@
#include "radv_amdgpu_winsys_public.h"
#include "radv_debug.h"
#include "vk_drm_syncobj.h"
#include "util/hash_table.h"
#include "util/u_memory.h"
static void
@ -36,11 +37,11 @@ radv_amdgpu_winsys_query_value(struct radeon_winsys *rws, enum radeon_value_id v
switch (value) {
case RADEON_ALLOCATED_VRAM:
return ws->allocated_vram;
return ws->alloc_tracker->allocated_vram;
case RADEON_ALLOCATED_VRAM_VIS:
return ws->allocated_vram_vis;
return ws->alloc_tracker->allocated_vram_vis;
case RADEON_ALLOCATED_GTT:
return ws->allocated_gtt;
return ws->alloc_tracker->allocated_gtt;
case RADEON_TIMESTAMP:
ac_drm_query_info(ws->dev, AMDGPU_INFO_TIMESTAMP, 8, &retval);
return retval;
@ -110,6 +111,61 @@ radv_amdgpu_winsys_query_gpuvm_fault(struct radeon_winsys *rws, struct radv_wins
return true;
}
static simple_mtx_t tracker_mutex = SIMPLE_MTX_INITIALIZER;
static struct hash_table *alloc_trackers = NULL;
static struct radv_amdgpu_alloc_tracker *
radv_amdgpu_alloc_tracker_acquire(uintptr_t cookie)
{
struct radv_amdgpu_alloc_tracker *tracker = NULL;
simple_mtx_lock(&tracker_mutex);
if (!alloc_trackers)
alloc_trackers = _mesa_pointer_hash_table_create(NULL);
if (!alloc_trackers) {
simple_mtx_unlock(&tracker_mutex);
return NULL;
}
struct hash_entry *entry = _mesa_hash_table_search(alloc_trackers, (void *)cookie);
if (entry) {
tracker = entry->data;
tracker->refcount++;
} else {
tracker = calloc(1, sizeof(*tracker));
if (!tracker) {
simple_mtx_unlock(&tracker_mutex);
return NULL;
}
tracker->refcount = 1;
tracker->cookie = cookie; /* used for release. */
_mesa_hash_table_insert(alloc_trackers, (void *)cookie, tracker);
}
simple_mtx_unlock(&tracker_mutex);
return tracker;
}
static void
radv_amdgpu_alloc_tracker_release(struct radv_amdgpu_alloc_tracker *tracker)
{
simple_mtx_lock(&tracker_mutex);
if (!--tracker->refcount) {
_mesa_hash_table_remove_key(alloc_trackers, (void *)tracker->cookie);
free(tracker);
if (_mesa_hash_table_num_entries(alloc_trackers) == 0) {
_mesa_hash_table_destroy(alloc_trackers, NULL);
alloc_trackers = NULL;
}
}
simple_mtx_unlock(&tracker_mutex);
}
static simple_mtx_t winsys_creation_mutex = SIMPLE_MTX_INITIALIZER;
static struct hash_table *winsyses = NULL;
@ -177,6 +233,8 @@ radv_amdgpu_winsys_destroy(struct radeon_winsys *rws)
radv_amdgpu_null_prt_bug_finish(rws);
radv_amdgpu_alloc_tracker_release(ws->alloc_tracker);
ac_drm_device_deinitialize(ws->dev);
FREE(rws);
}
@ -293,6 +351,12 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags,
ws->info.drm_minor = drm_minor;
ws->info.is_virtio = is_virtio;
ws->alloc_tracker = radv_amdgpu_alloc_tracker_acquire(ac_drm_device_get_cookie(dev));
if (!ws->alloc_tracker) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto winsys_fail;
}
enum ac_query_gpu_info_result info_result =
ac_query_gpu_info(ws->fd, ws->dev, &ws->info, true, !(debug_flags & RADV_DEBUG_NO_CACHE_COMPAT));
if (info_result != AC_QUERY_GPU_INFO_SUCCESS) {
@ -362,6 +426,8 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags,
return result;
winsys_fail:
if (ws->alloc_tracker)
radv_amdgpu_alloc_tracker_release(ws->alloc_tracker);
free(ws);
fail:
if (winsyses && _mesa_hash_table_num_entries(winsyses) == 0) {

View file

@ -22,6 +22,21 @@
#include "vk_sync.h"
#include "vk_sync_timeline.h"
/**
* Process-global per-GPU allocation tracker.
*
* Tracks userspace BO allocation counters across all winsys instances for
* the same GPU within this process. This ensures VK_EXT_memory_budget
* reports correct process-wide usage even with multiple VkInstance objects.
*/
struct radv_amdgpu_alloc_tracker {
uintptr_t cookie;
alignas(8) uint64_t allocated_vram;
alignas(8) uint64_t allocated_vram_vis;
alignas(8) uint64_t allocated_gtt;
uint32_t refcount;
};
struct radv_amdgpu_winsys {
struct radeon_winsys base;
ac_drm_device *dev;
@ -38,9 +53,7 @@ struct radv_amdgpu_winsys {
bool debug_vm;
uint64_t perftest;
alignas(8) uint64_t allocated_vram;
alignas(8) uint64_t allocated_vram_vis;
alignas(8) uint64_t allocated_gtt;
struct radv_amdgpu_alloc_tracker *alloc_tracker;
/* Global BO list */
struct {