mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 17:50:12 +01:00
anv: Use vma_heap for descriptor pool host allocation
Pre-patch, anv_descriptor_pool used a free list for host allocations
that never merged adjacent free blocks. If the pool only allocated
fixed-sized blocks, then this would not be a problem. But the pool
allocations are variable-sized, and this caused over half of the pool's
memory to be consumed by unusable free blocks in some workloads, causing
unnecessary memory footprint.
Replacing the free list with util_vma_heap, which does merge adjacent
free blocks, fixes the memory explosion in the target workload.
Disdavantges of util_vma_heap compared to the free list:
- The heap calls malloc() when a new hole is created.
- The heap calls free() when a hole disappears or is merged with an
adjacent hole.
- The Vulkan spec expects descriptor set creation/destruction to be
thread-local lockless in the common case. For workloads that
create/destroy with high frequency, malloc/free may cause overhead.
Profiling is needed.
Tested with a ChromeOS internal TensorFlow benchmark, provided by
package 'tensorflow', running with its OpenCL backend on clvk.
cmdline: benchmark_model --graph=mn2.tflite --use_gpu=true --min_secs=60
gpu: adl
memory footprint from start of benchmark:
before: init=132.691MB max=227.684MB
after: init=134.988MB max=134.988MB
Reported-by: Romaric Jodin <rjodin@google.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20289>
This commit is contained in:
parent
94a6384f1b
commit
a5f9e59ce3
2 changed files with 37 additions and 49 deletions
|
|
@ -806,13 +806,13 @@ void anv_DestroyPipelineLayout(
|
||||||
/*
|
/*
|
||||||
* Descriptor pools.
|
* Descriptor pools.
|
||||||
*
|
*
|
||||||
* These are implemented using a big pool of memory and a free-list for the
|
* These are implemented using a big pool of memory and a vma heap for the
|
||||||
* host memory allocations and a state_stream and a free list for the buffer
|
* host memory allocations and a state_stream and a free list for the buffer
|
||||||
* view surface state. The spec allows us to fail to allocate due to
|
* view surface state. The spec allows us to fail to allocate due to
|
||||||
* fragmentation in all cases but two: 1) after pool reset, allocating up
|
* fragmentation in all cases but two: 1) after pool reset, allocating up
|
||||||
* until the pool size with no freeing must succeed and 2) allocating and
|
* until the pool size with no freeing must succeed and 2) allocating and
|
||||||
* freeing only descriptor sets with the same layout. Case 1) is easy enough,
|
* freeing only descriptor sets with the same layout. Case 1) is easy enough,
|
||||||
* and the free lists lets us recycle blocks for case 2).
|
* and the vma heap ensures case 2).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* The vma heap reserves 0 to mean NULL; we have to offset by some amount to
|
/* The vma heap reserves 0 to mean NULL; we have to offset by some amount to
|
||||||
|
|
@ -897,25 +897,26 @@ VkResult anv_CreateDescriptorPool(
|
||||||
|
|
||||||
const bool host_only =
|
const bool host_only =
|
||||||
pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT;
|
pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT;
|
||||||
|
|
||||||
/* For host_only pools, allocate some memory to hold the written surface
|
/* For host_only pools, allocate some memory to hold the written surface
|
||||||
* states of the internal anv_buffer_view. With normal pools, the memory
|
* states of the internal anv_buffer_view. With normal pools, the memory
|
||||||
* holding surface state is allocated from the device surface_state_pool.
|
* holding surface state is allocated from the device surface_state_pool.
|
||||||
*/
|
*/
|
||||||
const size_t pool_size =
|
const size_t host_mem_size =
|
||||||
pCreateInfo->maxSets * sizeof(struct anv_descriptor_set) +
|
pCreateInfo->maxSets * sizeof(struct anv_descriptor_set) +
|
||||||
descriptor_count * sizeof(struct anv_descriptor) +
|
descriptor_count * sizeof(struct anv_descriptor) +
|
||||||
buffer_view_count * sizeof(struct anv_buffer_view) +
|
buffer_view_count * sizeof(struct anv_buffer_view) +
|
||||||
(host_only ? buffer_view_count * ANV_SURFACE_STATE_SIZE : 0);
|
(host_only ? buffer_view_count * ANV_SURFACE_STATE_SIZE : 0);
|
||||||
const size_t total_size = sizeof(*pool) + pool_size;
|
|
||||||
|
|
||||||
pool = vk_object_alloc(&device->vk, pAllocator, total_size,
|
pool = vk_object_alloc(&device->vk, pAllocator,
|
||||||
|
sizeof(*pool) + host_mem_size,
|
||||||
VK_OBJECT_TYPE_DESCRIPTOR_POOL);
|
VK_OBJECT_TYPE_DESCRIPTOR_POOL);
|
||||||
if (!pool)
|
if (!pool)
|
||||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||||
|
|
||||||
pool->size = pool_size;
|
pool->host_mem_size = host_mem_size;
|
||||||
pool->next = 0;
|
util_vma_heap_init(&pool->host_heap, POOL_HEAP_OFFSET, host_mem_size);
|
||||||
pool->free_list = EMPTY;
|
|
||||||
pool->host_only = host_only;
|
pool->host_only = host_only;
|
||||||
|
|
||||||
if (descriptor_bo_size > 0) {
|
if (descriptor_bo_size > 0) {
|
||||||
|
|
@ -990,8 +991,8 @@ VkResult anv_ResetDescriptorPool(
|
||||||
}
|
}
|
||||||
list_inithead(&pool->desc_sets);
|
list_inithead(&pool->desc_sets);
|
||||||
|
|
||||||
pool->next = 0;
|
util_vma_heap_finish(&pool->host_heap);
|
||||||
pool->free_list = EMPTY;
|
util_vma_heap_init(&pool->host_heap, POOL_HEAP_OFFSET, pool->host_mem_size);
|
||||||
|
|
||||||
if (pool->bo) {
|
if (pool->bo) {
|
||||||
util_vma_heap_finish(&pool->bo_heap);
|
util_vma_heap_finish(&pool->bo_heap);
|
||||||
|
|
@ -1006,57 +1007,37 @@ VkResult anv_ResetDescriptorPool(
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct pool_free_list_entry {
|
|
||||||
uint32_t next;
|
|
||||||
uint32_t size;
|
|
||||||
};
|
|
||||||
|
|
||||||
static VkResult
|
static VkResult
|
||||||
anv_descriptor_pool_alloc_set(struct anv_descriptor_pool *pool,
|
anv_descriptor_pool_alloc_set(struct anv_descriptor_pool *pool,
|
||||||
uint32_t size,
|
uint32_t size,
|
||||||
struct anv_descriptor_set **set)
|
struct anv_descriptor_set **set)
|
||||||
{
|
{
|
||||||
if (size <= pool->size - pool->next) {
|
uint64_t vma_offset = util_vma_heap_alloc(&pool->host_heap, size, 1);
|
||||||
*set = (struct anv_descriptor_set *) (pool->data + pool->next);
|
|
||||||
(*set)->size = size;
|
|
||||||
pool->next += size;
|
|
||||||
return VK_SUCCESS;
|
|
||||||
} else {
|
|
||||||
struct pool_free_list_entry *entry;
|
|
||||||
uint32_t *link = &pool->free_list;
|
|
||||||
for (uint32_t f = pool->free_list; f != EMPTY; f = entry->next) {
|
|
||||||
entry = (struct pool_free_list_entry *) (pool->data + f);
|
|
||||||
if (size <= entry->size) {
|
|
||||||
*link = entry->next;
|
|
||||||
*set = (struct anv_descriptor_set *) entry;
|
|
||||||
(*set)->size = entry->size;
|
|
||||||
return VK_SUCCESS;
|
|
||||||
}
|
|
||||||
link = &entry->next;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pool->free_list != EMPTY) {
|
if (vma_offset == 0) {
|
||||||
|
if (size <= pool->host_heap.free_size) {
|
||||||
return VK_ERROR_FRAGMENTED_POOL;
|
return VK_ERROR_FRAGMENTED_POOL;
|
||||||
} else {
|
} else {
|
||||||
return VK_ERROR_OUT_OF_POOL_MEMORY;
|
return VK_ERROR_OUT_OF_POOL_MEMORY;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
assert(vma_offset >= POOL_HEAP_OFFSET);
|
||||||
|
uint64_t host_mem_offset = vma_offset - POOL_HEAP_OFFSET;
|
||||||
|
|
||||||
|
*set = (struct anv_descriptor_set *) (pool->host_mem + host_mem_offset);
|
||||||
|
(*set)->size = size;
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
anv_descriptor_pool_free_set(struct anv_descriptor_pool *pool,
|
anv_descriptor_pool_free_set(struct anv_descriptor_pool *pool,
|
||||||
struct anv_descriptor_set *set)
|
struct anv_descriptor_set *set)
|
||||||
{
|
{
|
||||||
/* Put the descriptor set allocation back on the free list. */
|
util_vma_heap_free(&pool->host_heap,
|
||||||
const uint32_t index = (char *) set - pool->data;
|
((char *) set - pool->host_mem) + POOL_HEAP_OFFSET,
|
||||||
if (index + set->size == pool->next) {
|
set->size);
|
||||||
pool->next = index;
|
|
||||||
} else {
|
|
||||||
struct pool_free_list_entry *entry = (struct pool_free_list_entry *) set;
|
|
||||||
entry->next = pool->free_list;
|
|
||||||
entry->size = set->size;
|
|
||||||
pool->free_list = (char *) entry - pool->data;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct surface_state_free_list_entry {
|
struct surface_state_free_list_entry {
|
||||||
|
|
|
||||||
|
|
@ -1891,21 +1891,28 @@ anv_descriptor_set_address(struct anv_descriptor_set *set)
|
||||||
struct anv_descriptor_pool {
|
struct anv_descriptor_pool {
|
||||||
struct vk_object_base base;
|
struct vk_object_base base;
|
||||||
|
|
||||||
uint32_t size;
|
|
||||||
uint32_t next;
|
|
||||||
uint32_t free_list;
|
|
||||||
|
|
||||||
struct anv_bo *bo;
|
struct anv_bo *bo;
|
||||||
struct util_vma_heap bo_heap;
|
struct util_vma_heap bo_heap;
|
||||||
|
|
||||||
struct anv_state_stream surface_state_stream;
|
struct anv_state_stream surface_state_stream;
|
||||||
void *surface_state_free_list;
|
void *surface_state_free_list;
|
||||||
|
|
||||||
|
/** List of anv_descriptor_set. */
|
||||||
struct list_head desc_sets;
|
struct list_head desc_sets;
|
||||||
|
|
||||||
|
/** Heap over host_mem */
|
||||||
|
struct util_vma_heap host_heap;
|
||||||
|
|
||||||
|
/** Allocated size of host_mem */
|
||||||
|
uint32_t host_mem_size;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT. If set, then
|
||||||
|
* surface_state_stream is unused.
|
||||||
|
*/
|
||||||
bool host_only;
|
bool host_only;
|
||||||
|
|
||||||
char data[0];
|
char host_mem[0];
|
||||||
};
|
};
|
||||||
|
|
||||||
size_t
|
size_t
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue