mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 17:50:12 +01:00
anv: Use vma_heap for descriptor pool host allocation
Pre-patch, anv_descriptor_pool used a free list for host allocations
that never merged adjacent free blocks. If the pool only allocated
fixed-sized blocks, then this would not be a problem. But the pool
allocations are variable-sized, and this caused over half of the pool's
memory to be consumed by unusable free blocks in some workloads, causing
unnecessary memory footprint.
Replacing the free list with util_vma_heap, which does merge adjacent
free blocks, fixes the memory explosion in the target workload.
Disdavantges of util_vma_heap compared to the free list:
- The heap calls malloc() when a new hole is created.
- The heap calls free() when a hole disappears or is merged with an
adjacent hole.
- The Vulkan spec expects descriptor set creation/destruction to be
thread-local lockless in the common case. For workloads that
create/destroy with high frequency, malloc/free may cause overhead.
Profiling is needed.
Tested with a ChromeOS internal TensorFlow benchmark, provided by
package 'tensorflow', running with its OpenCL backend on clvk.
cmdline: benchmark_model --graph=mn2.tflite --use_gpu=true --min_secs=60
gpu: adl
memory footprint from start of benchmark:
before: init=132.691MB max=227.684MB
after: init=134.988MB max=134.988MB
Reported-by: Romaric Jodin <rjodin@google.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20289>
This commit is contained in:
parent
94a6384f1b
commit
a5f9e59ce3
2 changed files with 37 additions and 49 deletions
|
|
@ -806,13 +806,13 @@ void anv_DestroyPipelineLayout(
|
|||
/*
|
||||
* Descriptor pools.
|
||||
*
|
||||
* These are implemented using a big pool of memory and a free-list for the
|
||||
* These are implemented using a big pool of memory and a vma heap for the
|
||||
* host memory allocations and a state_stream and a free list for the buffer
|
||||
* view surface state. The spec allows us to fail to allocate due to
|
||||
* fragmentation in all cases but two: 1) after pool reset, allocating up
|
||||
* until the pool size with no freeing must succeed and 2) allocating and
|
||||
* freeing only descriptor sets with the same layout. Case 1) is easy enough,
|
||||
* and the free lists lets us recycle blocks for case 2).
|
||||
* and the vma heap ensures case 2).
|
||||
*/
|
||||
|
||||
/* The vma heap reserves 0 to mean NULL; we have to offset by some amount to
|
||||
|
|
@ -897,25 +897,26 @@ VkResult anv_CreateDescriptorPool(
|
|||
|
||||
const bool host_only =
|
||||
pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT;
|
||||
|
||||
/* For host_only pools, allocate some memory to hold the written surface
|
||||
* states of the internal anv_buffer_view. With normal pools, the memory
|
||||
* holding surface state is allocated from the device surface_state_pool.
|
||||
*/
|
||||
const size_t pool_size =
|
||||
const size_t host_mem_size =
|
||||
pCreateInfo->maxSets * sizeof(struct anv_descriptor_set) +
|
||||
descriptor_count * sizeof(struct anv_descriptor) +
|
||||
buffer_view_count * sizeof(struct anv_buffer_view) +
|
||||
(host_only ? buffer_view_count * ANV_SURFACE_STATE_SIZE : 0);
|
||||
const size_t total_size = sizeof(*pool) + pool_size;
|
||||
|
||||
pool = vk_object_alloc(&device->vk, pAllocator, total_size,
|
||||
pool = vk_object_alloc(&device->vk, pAllocator,
|
||||
sizeof(*pool) + host_mem_size,
|
||||
VK_OBJECT_TYPE_DESCRIPTOR_POOL);
|
||||
if (!pool)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
pool->size = pool_size;
|
||||
pool->next = 0;
|
||||
pool->free_list = EMPTY;
|
||||
pool->host_mem_size = host_mem_size;
|
||||
util_vma_heap_init(&pool->host_heap, POOL_HEAP_OFFSET, host_mem_size);
|
||||
|
||||
pool->host_only = host_only;
|
||||
|
||||
if (descriptor_bo_size > 0) {
|
||||
|
|
@ -990,8 +991,8 @@ VkResult anv_ResetDescriptorPool(
|
|||
}
|
||||
list_inithead(&pool->desc_sets);
|
||||
|
||||
pool->next = 0;
|
||||
pool->free_list = EMPTY;
|
||||
util_vma_heap_finish(&pool->host_heap);
|
||||
util_vma_heap_init(&pool->host_heap, POOL_HEAP_OFFSET, pool->host_mem_size);
|
||||
|
||||
if (pool->bo) {
|
||||
util_vma_heap_finish(&pool->bo_heap);
|
||||
|
|
@ -1006,57 +1007,37 @@ VkResult anv_ResetDescriptorPool(
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
struct pool_free_list_entry {
|
||||
uint32_t next;
|
||||
uint32_t size;
|
||||
};
|
||||
|
||||
static VkResult
|
||||
anv_descriptor_pool_alloc_set(struct anv_descriptor_pool *pool,
|
||||
uint32_t size,
|
||||
struct anv_descriptor_set **set)
|
||||
{
|
||||
if (size <= pool->size - pool->next) {
|
||||
*set = (struct anv_descriptor_set *) (pool->data + pool->next);
|
||||
(*set)->size = size;
|
||||
pool->next += size;
|
||||
return VK_SUCCESS;
|
||||
} else {
|
||||
struct pool_free_list_entry *entry;
|
||||
uint32_t *link = &pool->free_list;
|
||||
for (uint32_t f = pool->free_list; f != EMPTY; f = entry->next) {
|
||||
entry = (struct pool_free_list_entry *) (pool->data + f);
|
||||
if (size <= entry->size) {
|
||||
*link = entry->next;
|
||||
*set = (struct anv_descriptor_set *) entry;
|
||||
(*set)->size = entry->size;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
link = &entry->next;
|
||||
}
|
||||
uint64_t vma_offset = util_vma_heap_alloc(&pool->host_heap, size, 1);
|
||||
|
||||
if (pool->free_list != EMPTY) {
|
||||
if (vma_offset == 0) {
|
||||
if (size <= pool->host_heap.free_size) {
|
||||
return VK_ERROR_FRAGMENTED_POOL;
|
||||
} else {
|
||||
return VK_ERROR_OUT_OF_POOL_MEMORY;
|
||||
}
|
||||
}
|
||||
|
||||
assert(vma_offset >= POOL_HEAP_OFFSET);
|
||||
uint64_t host_mem_offset = vma_offset - POOL_HEAP_OFFSET;
|
||||
|
||||
*set = (struct anv_descriptor_set *) (pool->host_mem + host_mem_offset);
|
||||
(*set)->size = size;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
anv_descriptor_pool_free_set(struct anv_descriptor_pool *pool,
|
||||
struct anv_descriptor_set *set)
|
||||
{
|
||||
/* Put the descriptor set allocation back on the free list. */
|
||||
const uint32_t index = (char *) set - pool->data;
|
||||
if (index + set->size == pool->next) {
|
||||
pool->next = index;
|
||||
} else {
|
||||
struct pool_free_list_entry *entry = (struct pool_free_list_entry *) set;
|
||||
entry->next = pool->free_list;
|
||||
entry->size = set->size;
|
||||
pool->free_list = (char *) entry - pool->data;
|
||||
}
|
||||
util_vma_heap_free(&pool->host_heap,
|
||||
((char *) set - pool->host_mem) + POOL_HEAP_OFFSET,
|
||||
set->size);
|
||||
}
|
||||
|
||||
struct surface_state_free_list_entry {
|
||||
|
|
|
|||
|
|
@ -1891,21 +1891,28 @@ anv_descriptor_set_address(struct anv_descriptor_set *set)
|
|||
struct anv_descriptor_pool {
|
||||
struct vk_object_base base;
|
||||
|
||||
uint32_t size;
|
||||
uint32_t next;
|
||||
uint32_t free_list;
|
||||
|
||||
struct anv_bo *bo;
|
||||
struct util_vma_heap bo_heap;
|
||||
|
||||
struct anv_state_stream surface_state_stream;
|
||||
void *surface_state_free_list;
|
||||
|
||||
/** List of anv_descriptor_set. */
|
||||
struct list_head desc_sets;
|
||||
|
||||
/** Heap over host_mem */
|
||||
struct util_vma_heap host_heap;
|
||||
|
||||
/** Allocated size of host_mem */
|
||||
uint32_t host_mem_size;
|
||||
|
||||
/**
|
||||
* VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT. If set, then
|
||||
* surface_state_stream is unused.
|
||||
*/
|
||||
bool host_only;
|
||||
|
||||
char data[0];
|
||||
char host_mem[0];
|
||||
};
|
||||
|
||||
size_t
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue