anv: Implement anv_slab_bo and enable memory pool

This is implementing the functions in anv_slab_bo and actually
enabling memory pool.
This is heavily based on Iris memory pool implementation, the main
difference is that the concept of heaps only exist in anv_slab_bo, we
have function that takes the anv_bo_alloc_flags and decides what heap
to place that bo.

Some anv_bo_alloc_flags blocks memory pool, we can relax and remove
some flags from this denied list later.

This feature can be disabled in runtime by setting
ANV_DISABLE_SLAB=true, this can help us to easily check if bugs
are due to this feature or not.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33558>
This commit is contained in:
José Roberto de Souza 2025-02-07 05:15:09 -08:00 committed by Marge Bot
parent 3bf6d42fda
commit dabb012423
4 changed files with 344 additions and 3 deletions

View file

@ -1613,7 +1613,8 @@ anv_device_alloc_bo(struct anv_device *device,
/* In platforms with LLC we can promote all bos to cached+coherent for free */ /* In platforms with LLC we can promote all bos to cached+coherent for free */
const enum anv_bo_alloc_flags not_allowed_promotion = ANV_BO_ALLOC_SCANOUT | const enum anv_bo_alloc_flags not_allowed_promotion = ANV_BO_ALLOC_SCANOUT |
ANV_BO_ALLOC_EXTERNAL | ANV_BO_ALLOC_EXTERNAL |
ANV_BO_ALLOC_PROTECTED; ANV_BO_ALLOC_PROTECTED |
ANV_BO_ALLOC_SLAB_PARENT;
if (device->info->has_llc && ((alloc_flags & not_allowed_promotion) == 0)) if (device->info->has_llc && ((alloc_flags & not_allowed_promotion) == 0))
alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT; alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT;
@ -1683,7 +1684,8 @@ anv_device_alloc_bo(struct anv_device *device,
.alloc_flags = alloc_flags, .alloc_flags = alloc_flags,
}; };
if (alloc_flags & ANV_BO_ALLOC_MAPPED) { if ((alloc_flags & ANV_BO_ALLOC_MAPPED) &&
((alloc_flags & ANV_BO_ALLOC_SLAB_PARENT) == 0)) {
VkResult result = anv_device_map_bo(device, &new_bo, 0, size, VkResult result = anv_device_map_bo(device, &new_bo, 0, size,
NULL, &new_bo.map); NULL, &new_bo.map);
if (unlikely(result != VK_SUCCESS)) { if (unlikely(result != VK_SUCCESS)) {

View file

@ -79,6 +79,7 @@ static const struct debug_control debug_control[] = {
{ "video-decode", ANV_DEBUG_VIDEO_DECODE}, { "video-decode", ANV_DEBUG_VIDEO_DECODE},
{ "video-encode", ANV_DEBUG_VIDEO_ENCODE}, { "video-encode", ANV_DEBUG_VIDEO_ENCODE},
{ "shader-hash", ANV_DEBUG_SHADER_HASH}, { "shader-hash", ANV_DEBUG_SHADER_HASH},
{ "no-slab", ANV_DEBUG_NO_SLAB},
{ NULL, 0 } { NULL, 0 }
}; };

View file

@ -456,6 +456,9 @@ enum anv_bo_alloc_flags {
/** Compressed buffer, only supported in Xe2+ */ /** Compressed buffer, only supported in Xe2+ */
ANV_BO_ALLOC_COMPRESSED = (1 << 21), ANV_BO_ALLOC_COMPRESSED = (1 << 21),
/** Specifies that this bo is a slab parent */
ANV_BO_ALLOC_SLAB_PARENT = (1 << 22),
}; };
/** Specifies that the BO should be cached and coherent. */ /** Specifies that the BO should be cached and coherent. */
@ -1312,6 +1315,7 @@ enum anv_debug {
ANV_DEBUG_VIDEO_DECODE = BITFIELD_BIT(5), ANV_DEBUG_VIDEO_DECODE = BITFIELD_BIT(5),
ANV_DEBUG_VIDEO_ENCODE = BITFIELD_BIT(6), ANV_DEBUG_VIDEO_ENCODE = BITFIELD_BIT(6),
ANV_DEBUG_SHADER_HASH = BITFIELD_BIT(7), ANV_DEBUG_SHADER_HASH = BITFIELD_BIT(7),
ANV_DEBUG_NO_SLAB = BITFIELD_BIT(8),
}; };
struct anv_instance { struct anv_instance {
@ -2420,6 +2424,8 @@ void anv_vma_free(struct anv_device *device,
static inline bool static inline bool
anv_bo_is_small_heap(enum anv_bo_alloc_flags alloc_flags) anv_bo_is_small_heap(enum anv_bo_alloc_flags alloc_flags)
{ {
if (alloc_flags & ANV_BO_ALLOC_SLAB_PARENT)
return false;
return alloc_flags & (ANV_BO_ALLOC_DESCRIPTOR_POOL | return alloc_flags & (ANV_BO_ALLOC_DESCRIPTOR_POOL |
ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL | ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL |
ANV_BO_ALLOC_32BIT_ADDRESS); ANV_BO_ALLOC_32BIT_ADDRESS);

View file

@ -4,25 +4,357 @@
#include "anv_slab_bo.h" #include "anv_slab_bo.h"
enum anv_bo_slab_heap {
ANV_BO_SLAB_HEAP_SMEM_CACHED_COHERENT,
ANV_BO_SLAB_HEAP_SMEM_CACHED_INCOHERENT,
ANV_BO_SLAB_HEAP_SMEM_COHERENT,
ANV_BO_SLAB_HEAP_COMPRESSED, /* used by integrated and discrete GPUs */
ANV_BO_SLAB_HEAP_LMEM_SMEM,
ANV_BO_SLAB_HEAP_LMEM_ONLY,
ANV_BO_SLAB_NOT_SUPPORTED,
};
struct anv_slab {
struct pb_slab base;
/** The BO representing the entire slab */
struct anv_bo *bo;
/** Array of anv_bo structs representing BOs allocated out of this slab */
struct anv_bo *entries;
};
static enum anv_bo_slab_heap
anv_bo_alloc_flags_to_slab_heap(struct anv_device *device,
enum anv_bo_alloc_flags alloc_flags)
{
enum anv_bo_alloc_flags not_supported = ANV_BO_ALLOC_32BIT_ADDRESS |
ANV_BO_ALLOC_EXTERNAL |
ANV_BO_ALLOC_CAPTURE |
ANV_BO_ALLOC_FIXED_ADDRESS |
ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS |
ANV_BO_ALLOC_DESCRIPTOR_POOL |
ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE |
ANV_BO_ALLOC_SCANOUT |
ANV_BO_ALLOC_PROTECTED |
ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL |
ANV_BO_ALLOC_IMPORTED |
ANV_BO_ALLOC_AUX_CCS |
ANV_BO_ALLOC_SLAB_PARENT;
if (device->info->kmd_type == INTEL_KMD_TYPE_I915) {
not_supported |= (ANV_BO_ALLOC_IMPLICIT_SYNC |
ANV_BO_ALLOC_IMPLICIT_WRITE);
}
if (alloc_flags & not_supported)
return ANV_BO_SLAB_NOT_SUPPORTED;
if (alloc_flags & ANV_BO_ALLOC_COMPRESSED)
return ANV_BO_SLAB_HEAP_COMPRESSED;
if (anv_physical_device_has_vram(device->physical)) {
if (alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM)
return ANV_BO_SLAB_HEAP_SMEM_CACHED_COHERENT;
if (alloc_flags & (ANV_BO_ALLOC_MAPPED | ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE))
return ANV_BO_SLAB_HEAP_LMEM_SMEM;
return ANV_BO_SLAB_HEAP_LMEM_ONLY;
}
if ((alloc_flags & ANV_BO_ALLOC_HOST_CACHED_COHERENT) == ANV_BO_ALLOC_HOST_CACHED_COHERENT)
return ANV_BO_SLAB_HEAP_SMEM_CACHED_COHERENT;
if (alloc_flags & ANV_BO_ALLOC_HOST_CACHED)
return ANV_BO_SLAB_HEAP_SMEM_CACHED_INCOHERENT;
return ANV_BO_SLAB_HEAP_SMEM_COHERENT;
}
/* Return the power of two size of a slab entry matching the input size. */
static unsigned
get_slab_pot_entry_size(struct anv_device *device, unsigned size)
{
unsigned entry_size = util_next_power_of_two(size);
unsigned min_entry_size = 1 << device->bo_slabs[0].min_order;
return MAX2(entry_size, min_entry_size);
}
static struct pb_slabs *
get_slabs(struct anv_device *device, uint64_t size)
{
const unsigned num_slab_allocator = ARRAY_SIZE(device->bo_slabs);
for (unsigned i = 0; i < num_slab_allocator; i++) {
struct pb_slabs *slabs = &device->bo_slabs[i];
if (size <= (1ull << (slabs->min_order + slabs->num_orders - 1)))
return slabs;
}
unreachable("should have found a valid slab for this size");
return NULL;
}
static inline bool
anv_slab_bo_is_disabled(struct anv_device *device)
{
return device->bo_slabs[0].num_heaps == 0;
}
struct anv_bo * struct anv_bo *
anv_slab_bo_alloc(struct anv_device *device, const char *name, uint64_t requested_size, anv_slab_bo_alloc(struct anv_device *device, const char *name, uint64_t requested_size,
uint32_t alignment, enum anv_bo_alloc_flags alloc_flags) uint32_t alignment, enum anv_bo_alloc_flags alloc_flags)
{ {
return NULL; if (anv_slab_bo_is_disabled(device))
return NULL;
const enum anv_bo_slab_heap slab_heap = anv_bo_alloc_flags_to_slab_heap(device, alloc_flags);
if (slab_heap == ANV_BO_SLAB_NOT_SUPPORTED)
return NULL;
const unsigned num_slab_allocator = ARRAY_SIZE(device->bo_slabs);
struct pb_slabs *last_slab = &device->bo_slabs[num_slab_allocator - 1];
const uint64_t max_slab_entry_size = 1 << (last_slab->min_order + last_slab->num_orders - 1);
if (requested_size > max_slab_entry_size)
return NULL;
uint64_t alloc_size = MAX2(alignment, requested_size);
alloc_size = get_slab_pot_entry_size(device, alloc_size);
if (alloc_size > max_slab_entry_size)
return NULL;
struct pb_slabs *slabs = get_slabs(device, alloc_size);
struct pb_slab_entry *entry = pb_slab_alloc(slabs, alloc_size, slab_heap);
if (!entry) {
/* Clean up and try again... */
pb_slabs_reclaim(slabs);
entry = pb_slab_alloc(slabs, alloc_size, slab_heap);
}
if (!entry)
return NULL;
struct anv_bo *bo = container_of(entry, struct anv_bo, slab_entry);
bo->name = name;
bo->refcount = 1;
bo->size = requested_size;
bo->alloc_flags = alloc_flags;
bo->flags = device->kmd_backend->bo_alloc_flags_to_bo_flags(device, alloc_flags);
assert(bo->flags == bo->slab_parent->flags);
assert((intel_48b_address(bo->offset) & (alignment - 1)) == 0);
if (alloc_flags & ANV_BO_ALLOC_MAPPED) {
if (anv_device_map_bo(device, bo, 0, bo->size, NULL, &bo->map) != VK_SUCCESS) {
anv_slab_bo_free(device, bo);
return NULL;
}
}
return bo;
} }
void void
anv_slab_bo_free(struct anv_device *device, struct anv_bo *bo) anv_slab_bo_free(struct anv_device *device, struct anv_bo *bo)
{ {
assert(bo->slab_parent);
if (bo->map) {
anv_device_unmap_bo(device, bo, bo->map, bo->size, false /* replace */);
bo->map = NULL;
}
bo->refcount = 0;
pb_slab_free(get_slabs(device, bo->size), &bo->slab_entry);
}
static unsigned heap_max_get(struct anv_device *device)
{
unsigned ret;
if (anv_physical_device_has_vram(device->physical))
ret = ANV_BO_SLAB_HEAP_LMEM_ONLY;
else
ret = device->info->verx10 >= 200 ? ANV_BO_SLAB_HEAP_COMPRESSED :
ANV_BO_SLAB_HEAP_SMEM_COHERENT;
return (ret + 1);
}
static bool
anv_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
{
struct anv_bo *bo = container_of(entry, struct anv_bo, slab_entry);
return p_atomic_read(&bo->refcount) == 0;
}
static struct pb_slab *
anv_slab_alloc(void *priv,
unsigned heap,
unsigned entry_size,
unsigned group_index)
{
struct anv_device *device = priv;
struct anv_slab *slab = calloc(1, sizeof(struct anv_slab));
if (!slab)
return NULL;
const enum anv_bo_slab_heap bo_slab_heap = heap;
enum anv_bo_alloc_flags alloc_flags = ANV_BO_ALLOC_SLAB_PARENT;
switch (bo_slab_heap) {
case ANV_BO_SLAB_HEAP_SMEM_CACHED_COHERENT:
alloc_flags |= ANV_BO_ALLOC_HOST_CACHED_COHERENT |
ANV_BO_ALLOC_NO_LOCAL_MEM;
break;
case ANV_BO_SLAB_HEAP_SMEM_CACHED_INCOHERENT:
alloc_flags |= ANV_BO_ALLOC_HOST_CACHED |
ANV_BO_ALLOC_NO_LOCAL_MEM;
break;
case ANV_BO_SLAB_HEAP_SMEM_COHERENT:
alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT |
ANV_BO_ALLOC_NO_LOCAL_MEM;
break;
case ANV_BO_SLAB_HEAP_COMPRESSED:
alloc_flags |= ANV_BO_ALLOC_COMPRESSED;
break;
case ANV_BO_SLAB_HEAP_LMEM_SMEM:
alloc_flags |= ANV_BO_ALLOC_MAPPED;
break;
case ANV_BO_SLAB_HEAP_LMEM_ONLY:
break;
default:
unreachable("Missing");
return NULL;
}
struct pb_slabs *slabs = get_slabs(device, entry_size);
entry_size = MAX2(entry_size, 1ULL << slabs->min_order);
if (!util_is_power_of_two_nonzero(entry_size))
entry_size = util_next_power_of_two(entry_size);
unsigned slab_parent_size = entry_size * 8;
/* allocate at least a 2MB buffer, this allows KMD to enable THP for this bo */
slab_parent_size = MAX2(slab_parent_size, 2 * 1024 * 1024);
VkResult result;
result = anv_device_alloc_bo(device, "slab_parent", slab_parent_size, alloc_flags,
0, &slab->bo);
if (result != VK_SUCCESS)
goto error_alloc_bo;
slab_parent_size = slab->bo->size = slab->bo->actual_size;
slab->base.num_entries = slab_parent_size / entry_size;
slab->base.num_free = slab->base.num_entries;
slab->base.group_index = group_index;
slab->base.entry_size = entry_size;
slab->entries = calloc(slab->base.num_entries, sizeof(*slab->entries));
if (!slab->entries)
goto error_alloc_entries;
list_inithead(&slab->base.free);
for (unsigned i = 0; i < slab->base.num_entries; i++) {
struct anv_bo *bo = &slab->entries[i];
uint64_t offset = intel_48b_address(slab->bo->offset);
offset += (i * entry_size);
bo->name = "slab_child";
bo->gem_handle = slab->bo->gem_handle;
bo->refcount = 0;
bo->offset = intel_canonical_address(offset);
bo->size = entry_size;
bo->actual_size = entry_size;
bo->alloc_flags = alloc_flags;
bo->vma_heap = slab->bo->vma_heap;
bo->slab_parent = slab->bo;
bo->slab_entry.slab = &slab->base;
list_addtail(&bo->slab_entry.head, &slab->base.free);
}
return &slab->base;
error_alloc_entries:
anv_device_release_bo(device, slab->bo);
error_alloc_bo:
free(slab);
return NULL;
}
static void
anv_slab_free(void *priv, struct pb_slab *pslab)
{
struct anv_device *device = priv;
struct anv_slab *slab = (void *)pslab;
anv_device_release_bo(device, slab->bo);
free(slab->entries);
free(slab);
} }
bool bool
anv_slab_bo_init(struct anv_device *device) anv_slab_bo_init(struct anv_device *device)
{ {
const unsigned num_slab_allocator = ARRAY_SIZE(device->bo_slabs);
unsigned min_slab_order = 8;/* 256 bytes */
const unsigned max_slab_order = 20;/* 1 MB (slab size = 2 MB) */
unsigned num_slab_orders_per_allocator = (max_slab_order - min_slab_order) /
num_slab_allocator;
if (unlikely(device->physical->instance->debug & ANV_DEBUG_NO_SLAB))
return true;
/* feature requirement */
if (!device->info->has_mmap_offset || !device->info->has_partial_mmap_offset)
return true;
/* Divide the size order range among slab managers. */
for (unsigned i = 0; i < num_slab_allocator; i++) {
const unsigned min_order = min_slab_order;
const unsigned max_order = MIN2(min_order + num_slab_orders_per_allocator,
max_slab_order);
if (!pb_slabs_init(&device->bo_slabs[i], min_order, max_order,
heap_max_get(device), false, device,
anv_can_reclaim_slab,
anv_slab_alloc,
anv_slab_free)) {
goto error_slabs_init;
}
min_slab_order = max_order + 1;
}
return true; return true;
error_slabs_init:
for (unsigned i = 0; i < num_slab_allocator; i++) {
if (!device->bo_slabs[i].groups)
break;
pb_slabs_deinit(&device->bo_slabs[i]);
}
return false;
} }
void void
anv_slab_bo_deinit(struct anv_device *device) anv_slab_bo_deinit(struct anv_device *device)
{ {
const unsigned num_slab_allocator = ARRAY_SIZE(device->bo_slabs);
if (anv_slab_bo_is_disabled(device))
return;
for (int i = 0; i < num_slab_allocator; i++) {
if (device->bo_slabs[i].groups)
pb_slabs_deinit(&device->bo_slabs[i]);
}
} }