From 849d41dbf8314d555b761a3b8d461869f7e4a8f1 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 16 Oct 2025 16:50:50 +0200 Subject: [PATCH] radv: implement a new descriptor sets allocator The previous implementation was horribly slow with a larger number of descriptor sets. The new approach uses util_vma_heap (like ANV) which is a perfect fit. This fixes stuttering in Indiana Jones because that games seems to use a huge number of descriptor sets which can also be freed. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/13901 Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_descriptor_pool.c | 18 +++++++-- src/amd/vulkan/radv_descriptor_pool.h | 16 ++++---- src/amd/vulkan/radv_descriptor_set.c | 56 ++++++++------------------- src/amd/vulkan/radv_descriptor_set.h | 3 +- 4 files changed, 41 insertions(+), 52 deletions(-) diff --git a/src/amd/vulkan/radv_descriptor_pool.c b/src/amd/vulkan/radv_descriptor_pool.c index 20892e9d3c2..9640cf6d38b 100644 --- a/src/amd/vulkan/radv_descriptor_pool.c +++ b/src/amd/vulkan/radv_descriptor_pool.c @@ -20,8 +20,8 @@ static void radv_destroy_descriptor_pool_entries(struct radv_device *device, struct radv_descriptor_pool *pool) { if (!pool->host_memory_base) { - for (uint32_t i = 0; i < pool->entry_count; ++i) { - radv_descriptor_set_destroy(device, pool, pool->entries[i].set, false); + list_for_each_entry_safe (struct radv_descriptor_set, set, &pool->sets, link) { + radv_descriptor_set_destroy(device, pool, set); } } else { list_for_each_entry_safe (struct radv_descriptor_set, set, &pool->sets, link) { @@ -38,6 +38,8 @@ radv_destroy_descriptor_pool(struct radv_device *device, const VkAllocationCallb { radv_destroy_descriptor_pool_entries(device, pool); + if (!pool->host_memory_base && pool->size) + util_vma_heap_finish(&pool->bo_heap); if (pool->bo) radv_bo_destroy(device, &pool->base, pool->bo); if (pool->host_bo) @@ -146,8 +148,6 @@ radv_create_descriptor_pool(struct radv_device *device, const VkDescriptorPoolCr size += pCreateInfo->maxSets * sizeof(struct radv_descriptor_set); size += sizeof(struct radeon_winsys_bo *) * bo_count; size += sizeof(struct radv_descriptor_range) * range_count; - } else { - size += sizeof(struct radv_descriptor_pool_entry) * pCreateInfo->maxSets; } pool = vk_zalloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); @@ -162,6 +162,11 @@ radv_create_descriptor_pool(struct radv_device *device, const VkDescriptorPoolCr pool->host_memory_base = (uint8_t *)pool + sizeof(struct radv_descriptor_pool); pool->host_memory_ptr = pool->host_memory_base; pool->host_memory_end = (uint8_t *)pool + size; + } else { + if (bo_size) { + util_vma_heap_init(&pool->bo_heap, RADV_POOL_HEAP_OFFSET, bo_size + RADV_POOL_HEAP_OFFSET); + pool->bo_heap.alloc_high = false; + } } if (bo_size) { @@ -231,6 +236,11 @@ radv_ResetDescriptorPool(VkDevice _device, VkDescriptorPool descriptorPool, VkDe radv_destroy_descriptor_pool_entries(device, pool); + if (!pool->host_memory_base && pool->size) { + util_vma_heap_finish(&pool->bo_heap); + util_vma_heap_init(&pool->bo_heap, RADV_POOL_HEAP_OFFSET, pool->size + RADV_POOL_HEAP_OFFSET); + } + pool->entry_count = 0; pool->current_offset = 0; pool->host_memory_ptr = pool->host_memory_base; diff --git a/src/amd/vulkan/radv_descriptor_pool.h b/src/amd/vulkan/radv_descriptor_pool.h index 295d6f4236a..cb79545f38e 100644 --- a/src/amd/vulkan/radv_descriptor_pool.h +++ b/src/amd/vulkan/radv_descriptor_pool.h @@ -12,15 +12,15 @@ #include #include "util/list.h" +#include "util/vma.h" + +/* The vma heap reserves 0 to mean NULL; we have to offset by some amount to ensure we can allocate + * the entire BO without hitting zero. The actual amount doesn't matter. + */ +#define RADV_POOL_HEAP_OFFSET 32 struct radv_descriptor_set; -struct radv_descriptor_pool_entry { - uint32_t offset; - uint32_t size; - struct radv_descriptor_set *set; -}; - struct radv_descriptor_pool { struct vk_object_base base; struct radeon_winsys_bo *bo; @@ -35,10 +35,10 @@ struct radv_descriptor_pool { struct list_head sets; + struct util_vma_heap bo_heap; + uint32_t entry_count; uint32_t max_entry_count; - - struct radv_descriptor_pool_entry entries[0]; }; VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, base, VkDescriptorPool, VK_OBJECT_TYPE_DESCRIPTOR_POOL) diff --git a/src/amd/vulkan/radv_descriptor_set.c b/src/amd/vulkan/radv_descriptor_set.c index 4362572f25e..bee011f60b6 100644 --- a/src/amd/vulkan/radv_descriptor_set.c +++ b/src/amd/vulkan/radv_descriptor_set.c @@ -377,47 +377,29 @@ radv_alloc_descriptor_pool_entry(struct radv_device *device, struct radv_descrip struct radv_descriptor_set *set) { uint64_t current_offset = pool->current_offset; - uint32_t entry_index = pool->entry_count; if (!pool->host_memory_base) { - /* Try to allocate linearly first, so that we don't spend time looking for gaps if the app - * only allocates & resets via the pool. */ - if (current_offset + set->header.size <= pool->size) { - pool->current_offset += set->header.size; - } else { - current_offset = 0; + if (set->header.size) { + uint64_t pool_vma_offset = util_vma_heap_alloc(&pool->bo_heap, set->header.size, 32); + if (!pool_vma_offset) + return VK_ERROR_FRAGMENTED_POOL; - for (entry_index = 0; entry_index < pool->entry_count; ++entry_index) { - if (pool->entries[entry_index].offset - current_offset >= set->header.size) - break; - current_offset = pool->entries[entry_index].offset + pool->entries[entry_index].size; - } - - if (pool->size - current_offset < set->header.size) { - vk_free2(&device->vk.alloc, NULL, set); - return VK_ERROR_OUT_OF_POOL_MEMORY; - } - - memmove(&pool->entries[entry_index + 1], &pool->entries[entry_index], - sizeof(pool->entries[0]) * (pool->entry_count - entry_index)); + assert(pool_vma_offset >= RADV_POOL_HEAP_OFFSET && pool_vma_offset <= pool->size + RADV_POOL_HEAP_OFFSET); + set->header.offset = pool_vma_offset - RADV_POOL_HEAP_OFFSET; + current_offset = set->header.offset; } - - pool->entries[entry_index].offset = current_offset; - pool->entries[entry_index].size = set->header.size; - pool->entries[entry_index].set = set; } else { if (current_offset + set->header.size > pool->size) return VK_ERROR_OUT_OF_POOL_MEMORY; pool->current_offset += set->header.size; - - list_addtail(&set->link, &pool->sets); } set->header.bo = pool->bo; set->header.mapped_ptr = (uint32_t *)(pool->mapped_ptr + current_offset); set->header.va = pool->bo ? (radv_buffer_get_va(set->header.bo) + current_offset) : 0; + list_addtail(&set->link, &pool->sets); pool->entry_count++; return VK_SUCCESS; } @@ -439,8 +421,8 @@ radv_descriptor_set_create(struct radv_device *device, struct radv_descriptor_po unsigned stride = radv_descriptor_type_buffer_count(layout->binding[layout->binding_count - 1].type); buffer_count = layout->binding[layout->binding_count - 1].buffer_offset + variable_count * stride; } - unsigned range_offset = sizeof(struct radv_descriptor_set_header) + sizeof(struct list_head) + - sizeof(struct radeon_winsys_bo *) * buffer_count; + unsigned range_offset = + offsetof(struct radv_descriptor_set, descriptors) + sizeof(struct radeon_winsys_bo *) * buffer_count; const unsigned dynamic_offset_count = layout->dynamic_offset_count; unsigned mem_size = range_offset + sizeof(struct radv_descriptor_range) * dynamic_offset_count; @@ -507,21 +489,17 @@ radv_descriptor_set_create(struct radv_device *device, struct radv_descriptor_po void radv_descriptor_set_destroy(struct radv_device *device, struct radv_descriptor_pool *pool, - struct radv_descriptor_set *set, bool free_bo) + struct radv_descriptor_set *set) { assert(!pool->host_memory_base); + list_del(&set->link); vk_descriptor_set_layout_unref(&device->vk, &set->header.layout->vk); - if (free_bo) { - for (int i = 0; i < pool->entry_count; ++i) { - if (pool->entries[i].set == set) { - memmove(&pool->entries[i], &pool->entries[i + 1], sizeof(pool->entries[i]) * (pool->entry_count - i - 1)); - --pool->entry_count; - break; - } - } - } + if (set->header.size) + util_vma_heap_free(&pool->bo_heap, (uint64_t)set->header.offset + RADV_POOL_HEAP_OFFSET, set->header.size); + pool->entry_count--; + vk_object_base_finish(&set->header.base); vk_free2(&device->vk.alloc, NULL, set); } @@ -578,7 +556,7 @@ radv_FreeDescriptorSets(VkDevice _device, VkDescriptorPool descriptorPool, uint3 VK_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]); if (set && !pool->host_memory_base) - radv_descriptor_set_destroy(device, pool, set, true); + radv_descriptor_set_destroy(device, pool, set); } return VK_SUCCESS; } diff --git a/src/amd/vulkan/radv_descriptor_set.h b/src/amd/vulkan/radv_descriptor_set.h index 1a8eb0c101d..344bdc2b957 100644 --- a/src/amd/vulkan/radv_descriptor_set.h +++ b/src/amd/vulkan/radv_descriptor_set.h @@ -90,6 +90,7 @@ struct radv_descriptor_range { struct radv_descriptor_set_header { struct vk_object_base base; struct radv_descriptor_set_layout *layout; + uint32_t offset; uint32_t size; uint32_t buffer_count; @@ -136,6 +137,6 @@ void radv_cmd_update_descriptor_sets(struct radv_device *device, struct radv_cmd const VkCopyDescriptorSet *pDescriptorCopies); void radv_descriptor_set_destroy(struct radv_device *device, struct radv_descriptor_pool *pool, - struct radv_descriptor_set *set, bool free_bo); + struct radv_descriptor_set *set); #endif /* RADV_DESCRIPTOR_SET_H */