mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-05 23:48:17 +02:00
radv/amdgpu: rework tracking allocated memory for budget
Deduplicating the winsys just for budget looks more like a hack than a real implementation. Reworking tracking allocated memory to remove the dedup. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41805>
This commit is contained in:
parent
feb9ac168b
commit
a0a9f9dda1
4 changed files with 96 additions and 17 deletions
|
|
@ -388,14 +388,14 @@ radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo
|
|||
|
||||
if (bo->base.initial_domain & RADEON_DOMAIN_VRAM) {
|
||||
if (bo->base.vram_no_cpu_access) {
|
||||
p_atomic_add(&ws->allocated_vram, -align64(bo->base.size, ws->info.gart_page_size));
|
||||
p_atomic_add(&ws->alloc_tracker->allocated_vram, -align64(bo->base.size, ws->info.gart_page_size));
|
||||
} else {
|
||||
p_atomic_add(&ws->allocated_vram_vis, -align64(bo->base.size, ws->info.gart_page_size));
|
||||
p_atomic_add(&ws->alloc_tracker->allocated_vram_vis, -align64(bo->base.size, ws->info.gart_page_size));
|
||||
}
|
||||
}
|
||||
|
||||
if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
|
||||
p_atomic_add(&ws->allocated_gtt, -align64(bo->base.size, ws->info.gart_page_size));
|
||||
p_atomic_add(&ws->alloc_tracker->allocated_gtt, -align64(bo->base.size, ws->info.gart_page_size));
|
||||
|
||||
ac_drm_va_range_free(bo->va_handle);
|
||||
FREE(bo);
|
||||
|
|
@ -642,14 +642,14 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned
|
|||
* visible counter because they can be mapped.
|
||||
*/
|
||||
if (bo->base.vram_no_cpu_access) {
|
||||
p_atomic_add(&ws->allocated_vram, align64(bo->base.size, ws->info.gart_page_size));
|
||||
p_atomic_add(&ws->alloc_tracker->allocated_vram, align64(bo->base.size, ws->info.gart_page_size));
|
||||
} else {
|
||||
p_atomic_add(&ws->allocated_vram_vis, align64(bo->base.size, ws->info.gart_page_size));
|
||||
p_atomic_add(&ws->alloc_tracker->allocated_vram_vis, align64(bo->base.size, ws->info.gart_page_size));
|
||||
}
|
||||
}
|
||||
|
||||
if (initial_domain & RADEON_DOMAIN_GTT)
|
||||
p_atomic_add(&ws->allocated_gtt, align64(bo->base.size, ws->info.gart_page_size));
|
||||
p_atomic_add(&ws->alloc_tracker->allocated_gtt, align64(bo->base.size, ws->info.gart_page_size));
|
||||
|
||||
if (ws->debug_all_bos)
|
||||
radv_amdgpu_global_bo_list_add(ws, bo);
|
||||
|
|
@ -817,7 +817,7 @@ radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws, void *pointer, uint64_
|
|||
bo->cpu_map = NULL;
|
||||
bo->base.obj_id = (uintptr_t)(buf_handle.abo);
|
||||
|
||||
p_atomic_add(&ws->allocated_gtt, align64(bo->base.size, ws->info.gart_page_size));
|
||||
p_atomic_add(&ws->alloc_tracker->allocated_gtt, align64(bo->base.size, ws->info.gart_page_size));
|
||||
|
||||
if (ws->debug_all_bos)
|
||||
radv_amdgpu_global_bo_list_add(ws, bo);
|
||||
|
|
@ -910,9 +910,9 @@ radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, int fd, unsigned priori
|
|||
bo->base.obj_id = (uintptr_t)(result.bo.abo);
|
||||
|
||||
if (bo->base.initial_domain & RADEON_DOMAIN_VRAM)
|
||||
p_atomic_add(&ws->allocated_vram, align64(bo->base.size, ws->info.gart_page_size));
|
||||
p_atomic_add(&ws->alloc_tracker->allocated_vram, align64(bo->base.size, ws->info.gart_page_size));
|
||||
if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
|
||||
p_atomic_add(&ws->allocated_gtt, align64(bo->base.size, ws->info.gart_page_size));
|
||||
p_atomic_add(&ws->alloc_tracker->allocated_gtt, align64(bo->base.size, ws->info.gart_page_size));
|
||||
|
||||
if (ws->debug_all_bos)
|
||||
radv_amdgpu_global_bo_list_add(ws, bo);
|
||||
|
|
|
|||
|
|
@ -203,8 +203,8 @@ radv_amdgpu_cs_domain(const struct radeon_winsys *_ws)
|
|||
{
|
||||
const struct radv_amdgpu_winsys *ws = (const struct radv_amdgpu_winsys *)_ws;
|
||||
|
||||
bool enough_vram = ws->info.all_vram_visible ||
|
||||
p_atomic_read_relaxed(&ws->allocated_vram_vis) * 2 <= (uint64_t)ws->info.vram_vis_size_kb * 1024;
|
||||
bool enough_vram = ws->info.all_vram_visible || p_atomic_read_relaxed(&ws->alloc_tracker->allocated_vram_vis) * 2 <=
|
||||
(uint64_t)ws->info.vram_vis_size_kb * 1024;
|
||||
|
||||
/* Bandwidth should be equivalent to at least PCIe 3.0 x8.
|
||||
* If there is no PCIe info, assume there is enough bandwidth.
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@
|
|||
#include "radv_amdgpu_winsys_public.h"
|
||||
#include "radv_debug.h"
|
||||
#include "vk_drm_syncobj.h"
|
||||
#include "util/hash_table.h"
|
||||
#include "util/u_memory.h"
|
||||
|
||||
static void
|
||||
|
|
@ -36,11 +37,11 @@ radv_amdgpu_winsys_query_value(struct radeon_winsys *rws, enum radeon_value_id v
|
|||
|
||||
switch (value) {
|
||||
case RADEON_ALLOCATED_VRAM:
|
||||
return ws->allocated_vram;
|
||||
return ws->alloc_tracker->allocated_vram;
|
||||
case RADEON_ALLOCATED_VRAM_VIS:
|
||||
return ws->allocated_vram_vis;
|
||||
return ws->alloc_tracker->allocated_vram_vis;
|
||||
case RADEON_ALLOCATED_GTT:
|
||||
return ws->allocated_gtt;
|
||||
return ws->alloc_tracker->allocated_gtt;
|
||||
case RADEON_TIMESTAMP:
|
||||
ac_drm_query_info(ws->dev, AMDGPU_INFO_TIMESTAMP, 8, &retval);
|
||||
return retval;
|
||||
|
|
@ -110,6 +111,61 @@ radv_amdgpu_winsys_query_gpuvm_fault(struct radeon_winsys *rws, struct radv_wins
|
|||
return true;
|
||||
}
|
||||
|
||||
static simple_mtx_t tracker_mutex = SIMPLE_MTX_INITIALIZER;
|
||||
static struct hash_table *alloc_trackers = NULL;
|
||||
|
||||
static struct radv_amdgpu_alloc_tracker *
|
||||
radv_amdgpu_alloc_tracker_acquire(uintptr_t cookie)
|
||||
{
|
||||
struct radv_amdgpu_alloc_tracker *tracker = NULL;
|
||||
|
||||
simple_mtx_lock(&tracker_mutex);
|
||||
|
||||
if (!alloc_trackers)
|
||||
alloc_trackers = _mesa_pointer_hash_table_create(NULL);
|
||||
if (!alloc_trackers) {
|
||||
simple_mtx_unlock(&tracker_mutex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct hash_entry *entry = _mesa_hash_table_search(alloc_trackers, (void *)cookie);
|
||||
if (entry) {
|
||||
tracker = entry->data;
|
||||
tracker->refcount++;
|
||||
} else {
|
||||
tracker = calloc(1, sizeof(*tracker));
|
||||
if (!tracker) {
|
||||
simple_mtx_unlock(&tracker_mutex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tracker->refcount = 1;
|
||||
tracker->cookie = cookie; /* used for release. */
|
||||
_mesa_hash_table_insert(alloc_trackers, (void *)cookie, tracker);
|
||||
}
|
||||
|
||||
simple_mtx_unlock(&tracker_mutex);
|
||||
return tracker;
|
||||
}
|
||||
|
||||
static void
|
||||
radv_amdgpu_alloc_tracker_release(struct radv_amdgpu_alloc_tracker *tracker)
|
||||
{
|
||||
simple_mtx_lock(&tracker_mutex);
|
||||
|
||||
if (!--tracker->refcount) {
|
||||
_mesa_hash_table_remove_key(alloc_trackers, (void *)tracker->cookie);
|
||||
free(tracker);
|
||||
|
||||
if (_mesa_hash_table_num_entries(alloc_trackers) == 0) {
|
||||
_mesa_hash_table_destroy(alloc_trackers, NULL);
|
||||
alloc_trackers = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
simple_mtx_unlock(&tracker_mutex);
|
||||
}
|
||||
|
||||
static simple_mtx_t winsys_creation_mutex = SIMPLE_MTX_INITIALIZER;
|
||||
static struct hash_table *winsyses = NULL;
|
||||
|
||||
|
|
@ -177,6 +233,8 @@ radv_amdgpu_winsys_destroy(struct radeon_winsys *rws)
|
|||
|
||||
radv_amdgpu_null_prt_bug_finish(rws);
|
||||
|
||||
radv_amdgpu_alloc_tracker_release(ws->alloc_tracker);
|
||||
|
||||
ac_drm_device_deinitialize(ws->dev);
|
||||
FREE(rws);
|
||||
}
|
||||
|
|
@ -293,6 +351,12 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags,
|
|||
ws->info.drm_minor = drm_minor;
|
||||
ws->info.is_virtio = is_virtio;
|
||||
|
||||
ws->alloc_tracker = radv_amdgpu_alloc_tracker_acquire(ac_drm_device_get_cookie(dev));
|
||||
if (!ws->alloc_tracker) {
|
||||
result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
goto winsys_fail;
|
||||
}
|
||||
|
||||
enum ac_query_gpu_info_result info_result =
|
||||
ac_query_gpu_info(ws->fd, ws->dev, &ws->info, true, !(debug_flags & RADV_DEBUG_NO_CACHE_COMPAT));
|
||||
if (info_result != AC_QUERY_GPU_INFO_SUCCESS) {
|
||||
|
|
@ -362,6 +426,8 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags,
|
|||
return result;
|
||||
|
||||
winsys_fail:
|
||||
if (ws->alloc_tracker)
|
||||
radv_amdgpu_alloc_tracker_release(ws->alloc_tracker);
|
||||
free(ws);
|
||||
fail:
|
||||
if (winsyses && _mesa_hash_table_num_entries(winsyses) == 0) {
|
||||
|
|
|
|||
|
|
@ -22,6 +22,21 @@
|
|||
#include "vk_sync.h"
|
||||
#include "vk_sync_timeline.h"
|
||||
|
||||
/**
|
||||
* Process-global per-GPU allocation tracker.
|
||||
*
|
||||
* Tracks userspace BO allocation counters across all winsys instances for
|
||||
* the same GPU within this process. This ensures VK_EXT_memory_budget
|
||||
* reports correct process-wide usage even with multiple VkInstance objects.
|
||||
*/
|
||||
struct radv_amdgpu_alloc_tracker {
|
||||
uintptr_t cookie;
|
||||
alignas(8) uint64_t allocated_vram;
|
||||
alignas(8) uint64_t allocated_vram_vis;
|
||||
alignas(8) uint64_t allocated_gtt;
|
||||
uint32_t refcount;
|
||||
};
|
||||
|
||||
struct radv_amdgpu_winsys {
|
||||
struct radeon_winsys base;
|
||||
ac_drm_device *dev;
|
||||
|
|
@ -38,9 +53,7 @@ struct radv_amdgpu_winsys {
|
|||
bool debug_vm;
|
||||
uint64_t perftest;
|
||||
|
||||
alignas(8) uint64_t allocated_vram;
|
||||
alignas(8) uint64_t allocated_vram_vis;
|
||||
alignas(8) uint64_t allocated_gtt;
|
||||
struct radv_amdgpu_alloc_tracker *alloc_tracker;
|
||||
|
||||
/* Global BO list */
|
||||
struct {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue