mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-03 16:28:08 +02:00
An example when the memory leak happens: requested_size = 4 and alignment = 65536 in anv_slab_bo_alloc:
The alloc_size = 65536 and requested = 4 in this case.
The group to allocate the entry is the group of size 65536 based on the entry size,
while the group to reclaim the entry is the group of size 4 due to the bo->size is
registered as the requested_size=4 and used in anv_slab_bo_free.
That means, the entry is allocated in group[order of size 65535]->free,
moved from group[order of size 65535]->free to the user, and then moved
to group[order of size 4]->reclaim, so the entries is accumulated in
group[order of size 4]->reclaim and group[order of size 65535] keeps
allocating new entries and leading to OOM.
The solution is to use `bo->actual_size` to get the group in pb_slab_bo_free using the allocation size.
Fixes: dabb012423 ("anv: Implement anv_slab_bo and enable memory pool")
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/14396
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: hwandy <hwandy@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38989>
391 lines
12 KiB
C
391 lines
12 KiB
C
/* Copyright © 2025 Intel Corporation
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#include "anv_slab_bo.h"
|
|
|
|
enum anv_bo_slab_heap {
|
|
ANV_BO_SLAB_HEAP_CACHED_COHERENT_CAPTURE, /* main usage is batch buffers but other buffers also matches */
|
|
ANV_BO_SLAB_HEAP_DYNAMIC_VISIBLE_POOL,
|
|
ANV_BO_SLAB_HEAP_DESCRIPTOR_POOL,
|
|
ANV_BO_SLAB_HEAP_SMEM_CACHED_COHERENT,
|
|
ANV_BO_SLAB_HEAP_SMEM_CACHED_INCOHERENT,
|
|
ANV_BO_SLAB_HEAP_SMEM_COHERENT,
|
|
ANV_BO_SLAB_HEAP_COMPRESSED, /* used by integrated and discrete GPUs */
|
|
ANV_BO_SLAB_HEAP_LMEM_SMEM,
|
|
ANV_BO_SLAB_HEAP_LMEM_ONLY,
|
|
ANV_BO_SLAB_NOT_SUPPORTED,
|
|
};
|
|
|
|
struct anv_slab {
|
|
struct pb_slab base;
|
|
|
|
/** The BO representing the entire slab */
|
|
struct anv_bo *bo;
|
|
|
|
/** Array of anv_bo structs representing BOs allocated out of this slab */
|
|
struct anv_bo *entries;
|
|
};
|
|
|
|
static enum anv_bo_slab_heap
|
|
anv_bo_alloc_flags_to_slab_heap(struct anv_device *device,
|
|
enum anv_bo_alloc_flags alloc_flags)
|
|
{
|
|
enum anv_bo_alloc_flags not_supported = ANV_BO_ALLOC_32BIT_ADDRESS |
|
|
ANV_BO_ALLOC_EXTERNAL |
|
|
ANV_BO_ALLOC_CAPTURE |
|
|
ANV_BO_ALLOC_FIXED_ADDRESS |
|
|
ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS |
|
|
ANV_BO_ALLOC_DESCRIPTOR_POOL |
|
|
ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE |
|
|
ANV_BO_ALLOC_SCANOUT |
|
|
ANV_BO_ALLOC_PROTECTED |
|
|
ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL |
|
|
ANV_BO_ALLOC_IMPORTED |
|
|
ANV_BO_ALLOC_SLAB_PARENT;
|
|
|
|
if (device->info->kmd_type == INTEL_KMD_TYPE_I915) {
|
|
not_supported |= (ANV_BO_ALLOC_IMPLICIT_SYNC |
|
|
ANV_BO_ALLOC_IMPLICIT_WRITE);
|
|
}
|
|
|
|
if (alloc_flags == ANV_BO_ALLOC_BATCH_BUFFER_FLAGS ||
|
|
alloc_flags == ANV_BO_ALLOC_BATCH_BUFFER_INTERNAL_FLAGS)
|
|
return ANV_BO_SLAB_HEAP_CACHED_COHERENT_CAPTURE;
|
|
|
|
if (alloc_flags == ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL_FLAGS)
|
|
return ANV_BO_SLAB_HEAP_DYNAMIC_VISIBLE_POOL;
|
|
|
|
if (alloc_flags == ANV_BO_ALLOC_DESCRIPTOR_POOL_FLAGS)
|
|
return ANV_BO_SLAB_HEAP_DESCRIPTOR_POOL;
|
|
|
|
if (alloc_flags & not_supported)
|
|
return ANV_BO_SLAB_NOT_SUPPORTED;
|
|
|
|
if (alloc_flags & ANV_BO_ALLOC_COMPRESSED)
|
|
return ANV_BO_SLAB_HEAP_COMPRESSED;
|
|
|
|
if (anv_physical_device_has_vram(device->physical)) {
|
|
if (alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM)
|
|
return ANV_BO_SLAB_HEAP_SMEM_CACHED_COHERENT;
|
|
if (alloc_flags & (ANV_BO_ALLOC_MAPPED | ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE))
|
|
return ANV_BO_SLAB_HEAP_LMEM_SMEM;
|
|
return ANV_BO_SLAB_HEAP_LMEM_ONLY;
|
|
}
|
|
|
|
if ((alloc_flags & ANV_BO_ALLOC_HOST_CACHED_COHERENT) == ANV_BO_ALLOC_HOST_CACHED_COHERENT)
|
|
return ANV_BO_SLAB_HEAP_SMEM_CACHED_COHERENT;
|
|
if (alloc_flags & ANV_BO_ALLOC_HOST_CACHED)
|
|
return ANV_BO_SLAB_HEAP_SMEM_CACHED_INCOHERENT;
|
|
return ANV_BO_SLAB_HEAP_SMEM_COHERENT;
|
|
}
|
|
|
|
/* Return the power of two size of a slab entry matching the input size. */
|
|
static unsigned
|
|
get_slab_pot_entry_size(struct anv_device *device, unsigned size)
|
|
{
|
|
unsigned entry_size = util_next_power_of_two(size);
|
|
unsigned min_entry_size = 1 << device->bo_slabs[0].min_order;
|
|
|
|
return MAX2(entry_size, min_entry_size);
|
|
}
|
|
|
|
static struct pb_slabs *
|
|
get_slabs(struct anv_device *device, uint64_t size)
|
|
{
|
|
const unsigned num_slab_allocator = ARRAY_SIZE(device->bo_slabs);
|
|
|
|
for (unsigned i = 0; i < num_slab_allocator; i++) {
|
|
struct pb_slabs *slabs = &device->bo_slabs[i];
|
|
|
|
if (size <= (1ull << (slabs->min_order + slabs->num_orders - 1)))
|
|
return slabs;
|
|
}
|
|
|
|
UNREACHABLE("should have found a valid slab for this size");
|
|
return NULL;
|
|
}
|
|
|
|
static inline bool
|
|
anv_slab_bo_is_disabled(struct anv_device *device)
|
|
{
|
|
return device->bo_slabs[0].num_heaps == 0;
|
|
}
|
|
|
|
struct anv_bo *
|
|
anv_slab_bo_alloc(struct anv_device *device, const char *name, uint64_t requested_size,
|
|
uint32_t alignment, enum anv_bo_alloc_flags alloc_flags)
|
|
{
|
|
if (anv_slab_bo_is_disabled(device))
|
|
return NULL;
|
|
|
|
const enum anv_bo_slab_heap slab_heap = anv_bo_alloc_flags_to_slab_heap(device, alloc_flags);
|
|
if (slab_heap == ANV_BO_SLAB_NOT_SUPPORTED)
|
|
return NULL;
|
|
|
|
/* Don't always use slab if AUX_TT_ALIGNED is required and AUX alignment is
|
|
* >= 1MB, enabling this causes a high memory consumption that causes out
|
|
* of memory when running several parallel GPU applications.
|
|
*/
|
|
if ((alloc_flags & ANV_BO_ALLOC_AUX_TT_ALIGNED) &&
|
|
(intel_aux_map_get_alignment(device->aux_map_ctx) >= 1024 * 1024) &&
|
|
(requested_size < (1024 * 1024 / 2)))
|
|
return NULL;
|
|
|
|
const unsigned num_slab_allocator = ARRAY_SIZE(device->bo_slabs);
|
|
struct pb_slabs *last_slab = &device->bo_slabs[num_slab_allocator - 1];
|
|
const uint64_t max_slab_entry_size = BITFIELD64_BIT(last_slab->min_order + last_slab->num_orders - 1);
|
|
|
|
if (requested_size > max_slab_entry_size)
|
|
return NULL;
|
|
|
|
uint64_t alloc_size = MAX2(alignment, requested_size);
|
|
alloc_size = get_slab_pot_entry_size(device, alloc_size);
|
|
|
|
if (alloc_size > max_slab_entry_size)
|
|
return NULL;
|
|
|
|
struct pb_slabs *slabs = get_slabs(device, alloc_size);
|
|
struct pb_slab_entry *entry = pb_slab_alloc(slabs, alloc_size, slab_heap);
|
|
if (!entry) {
|
|
/* Clean up and try again... */
|
|
pb_slabs_reclaim(slabs);
|
|
|
|
entry = pb_slab_alloc(slabs, alloc_size, slab_heap);
|
|
}
|
|
if (!entry)
|
|
return NULL;
|
|
|
|
struct anv_bo *bo = container_of(entry, struct anv_bo, slab_entry);
|
|
bo->name = name;
|
|
bo->refcount = 1;
|
|
bo->size = requested_size;
|
|
bo->alloc_flags = alloc_flags;
|
|
bo->flags = device->kmd_backend->bo_alloc_flags_to_bo_flags(device, alloc_flags);
|
|
|
|
assert(bo->flags == bo->slab_parent->flags);
|
|
assert((intel_48b_address(bo->offset) & (alignment - 1)) == 0);
|
|
|
|
if (alloc_flags & ANV_BO_ALLOC_MAPPED) {
|
|
if (anv_device_map_bo(device, bo, 0, bo->size, NULL, &bo->map) != VK_SUCCESS) {
|
|
anv_slab_bo_free(device, bo);
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
return bo;
|
|
}
|
|
|
|
void
|
|
anv_slab_bo_free(struct anv_device *device, struct anv_bo *bo)
|
|
{
|
|
assert(bo->slab_parent);
|
|
|
|
if (bo->map) {
|
|
anv_device_unmap_bo(device, bo, bo->map, bo->size, false /* replace */);
|
|
bo->map = NULL;
|
|
}
|
|
|
|
bo->refcount = 0;
|
|
pb_slab_free(get_slabs(device, bo->actual_size), &bo->slab_entry);
|
|
}
|
|
|
|
static unsigned heap_max_get(struct anv_device *device)
|
|
{
|
|
unsigned ret;
|
|
|
|
if (anv_physical_device_has_vram(device->physical))
|
|
ret = ANV_BO_SLAB_HEAP_LMEM_ONLY;
|
|
else
|
|
ret = device->info->verx10 >= 200 ? ANV_BO_SLAB_HEAP_COMPRESSED :
|
|
ANV_BO_SLAB_HEAP_SMEM_COHERENT;
|
|
|
|
return (ret + 1);
|
|
}
|
|
|
|
static bool
|
|
anv_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
|
|
{
|
|
struct anv_bo *bo = container_of(entry, struct anv_bo, slab_entry);
|
|
|
|
return p_atomic_read(&bo->refcount) == 0;
|
|
}
|
|
|
|
static struct pb_slab *
|
|
anv_slab_alloc(void *priv,
|
|
unsigned heap,
|
|
unsigned entry_size,
|
|
unsigned group_index)
|
|
{
|
|
struct anv_device *device = priv;
|
|
struct anv_slab *slab = calloc(1, sizeof(struct anv_slab));
|
|
|
|
if (!slab)
|
|
return NULL;
|
|
|
|
const enum anv_bo_slab_heap bo_slab_heap = heap;
|
|
enum anv_bo_alloc_flags alloc_flags = ANV_BO_ALLOC_SLAB_PARENT;
|
|
|
|
switch (bo_slab_heap) {
|
|
case ANV_BO_SLAB_HEAP_SMEM_CACHED_COHERENT:
|
|
alloc_flags |= ANV_BO_ALLOC_HOST_CACHED_COHERENT |
|
|
ANV_BO_ALLOC_NO_LOCAL_MEM;
|
|
break;
|
|
case ANV_BO_SLAB_HEAP_SMEM_CACHED_INCOHERENT:
|
|
alloc_flags |= ANV_BO_ALLOC_HOST_CACHED |
|
|
ANV_BO_ALLOC_NO_LOCAL_MEM;
|
|
break;
|
|
case ANV_BO_SLAB_HEAP_SMEM_COHERENT:
|
|
alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT |
|
|
ANV_BO_ALLOC_NO_LOCAL_MEM;
|
|
break;
|
|
case ANV_BO_SLAB_HEAP_COMPRESSED:
|
|
alloc_flags |= ANV_BO_ALLOC_COMPRESSED;
|
|
break;
|
|
case ANV_BO_SLAB_HEAP_LMEM_SMEM:
|
|
alloc_flags |= ANV_BO_ALLOC_MAPPED |
|
|
ANV_BO_ALLOC_HOST_COHERENT;
|
|
break;
|
|
case ANV_BO_SLAB_HEAP_LMEM_ONLY:
|
|
break;
|
|
case ANV_BO_SLAB_HEAP_CACHED_COHERENT_CAPTURE:
|
|
alloc_flags |= ANV_BO_ALLOC_BATCH_BUFFER_FLAGS;
|
|
break;
|
|
case ANV_BO_SLAB_HEAP_DYNAMIC_VISIBLE_POOL:
|
|
alloc_flags |= ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL_FLAGS;
|
|
break;
|
|
case ANV_BO_SLAB_HEAP_DESCRIPTOR_POOL:
|
|
alloc_flags |= ANV_BO_ALLOC_DESCRIPTOR_POOL_FLAGS;
|
|
break;
|
|
default:
|
|
UNREACHABLE("Missing");
|
|
return NULL;
|
|
}
|
|
|
|
struct pb_slabs *slabs = get_slabs(device, entry_size);
|
|
|
|
entry_size = MAX2(entry_size, 1ULL << slabs->min_order);
|
|
if (!util_is_power_of_two_nonzero(entry_size))
|
|
entry_size = util_next_power_of_two(entry_size);
|
|
|
|
unsigned slab_parent_size = entry_size * 8;
|
|
/* allocate at least a 2MB buffer, this allows KMD to enable THP for this bo */
|
|
slab_parent_size = MAX2(slab_parent_size, 2 * 1024 * 1024);
|
|
|
|
VkResult result;
|
|
result = anv_device_alloc_bo(device, "slab_parent", slab_parent_size, alloc_flags,
|
|
0, &slab->bo);
|
|
if (result != VK_SUCCESS)
|
|
goto error_alloc_bo;
|
|
|
|
slab_parent_size = slab->bo->size = slab->bo->actual_size;
|
|
slab->base.num_entries = slab_parent_size / entry_size;
|
|
slab->base.num_free = slab->base.num_entries;
|
|
slab->base.group_index = group_index;
|
|
slab->base.entry_size = entry_size;
|
|
slab->entries = calloc(slab->base.num_entries, sizeof(*slab->entries));
|
|
if (!slab->entries)
|
|
goto error_alloc_entries;
|
|
|
|
list_inithead(&slab->base.free);
|
|
|
|
for (unsigned i = 0; i < slab->base.num_entries; i++) {
|
|
struct anv_bo *bo = &slab->entries[i];
|
|
uint64_t offset = intel_48b_address(slab->bo->offset);
|
|
|
|
offset += ((uint64_t)i * entry_size);
|
|
|
|
bo->name = "slab_child";
|
|
bo->gem_handle = slab->bo->gem_handle;
|
|
bo->refcount = 0;
|
|
bo->offset = intel_canonical_address(offset);
|
|
bo->size = entry_size;
|
|
bo->actual_size = entry_size;
|
|
bo->alloc_flags = alloc_flags;
|
|
bo->vma_heap = slab->bo->vma_heap;
|
|
bo->slab_parent = slab->bo;
|
|
bo->slab_entry.slab = &slab->base;
|
|
|
|
list_addtail(&bo->slab_entry.head, &slab->base.free);
|
|
}
|
|
|
|
return &slab->base;
|
|
|
|
error_alloc_entries:
|
|
anv_device_release_bo(device, slab->bo);
|
|
error_alloc_bo:
|
|
free(slab);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static void
|
|
anv_slab_free(void *priv, struct pb_slab *pslab)
|
|
{
|
|
struct anv_device *device = priv;
|
|
struct anv_slab *slab = (void *)pslab;
|
|
|
|
anv_device_release_bo(device, slab->bo);
|
|
|
|
free(slab->entries);
|
|
free(slab);
|
|
}
|
|
|
|
bool
|
|
anv_slab_bo_init(struct anv_device *device)
|
|
{
|
|
const unsigned num_slab_allocator = ARRAY_SIZE(device->bo_slabs);
|
|
unsigned min_slab_order = 8;/* 256 bytes */
|
|
const unsigned max_slab_order = 20;/* 1 MB (slab size = 2 MB) */
|
|
unsigned num_slab_orders_per_allocator = (max_slab_order - min_slab_order) /
|
|
num_slab_allocator;
|
|
|
|
if (unlikely(device->physical->instance->debug & ANV_DEBUG_NO_SLAB))
|
|
return true;
|
|
|
|
/* feature requirement */
|
|
if (!device->info->has_mmap_offset || !device->info->has_partial_mmap_offset)
|
|
return true;
|
|
|
|
/* Divide the size order range among slab managers. */
|
|
for (unsigned i = 0; i < num_slab_allocator; i++) {
|
|
const unsigned min_order = min_slab_order;
|
|
const unsigned max_order = MIN2(min_order + num_slab_orders_per_allocator,
|
|
max_slab_order);
|
|
|
|
if (!pb_slabs_init(&device->bo_slabs[i], min_order, max_order,
|
|
heap_max_get(device), false, device,
|
|
anv_can_reclaim_slab,
|
|
anv_slab_alloc,
|
|
anv_slab_free)) {
|
|
goto error_slabs_init;
|
|
}
|
|
min_slab_order = max_order + 1;
|
|
}
|
|
|
|
return true;
|
|
|
|
error_slabs_init:
|
|
for (unsigned i = 0; i < num_slab_allocator; i++) {
|
|
if (!device->bo_slabs[i].groups)
|
|
break;
|
|
|
|
pb_slabs_deinit(&device->bo_slabs[i]);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
void
|
|
anv_slab_bo_deinit(struct anv_device *device)
|
|
{
|
|
const unsigned num_slab_allocator = ARRAY_SIZE(device->bo_slabs);
|
|
|
|
if (anv_slab_bo_is_disabled(device))
|
|
return;
|
|
|
|
for (int i = 0; i < num_slab_allocator; i++) {
|
|
if (device->bo_slabs[i].groups)
|
|
pb_slabs_deinit(&device->bo_slabs[i]);
|
|
}
|
|
}
|