radv: implement a new descriptor sets allocator
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

The previous implementation was horribly slow with a larger number
of descriptor sets.

The new approach uses util_vma_heap (like ANV) which is a perfect fit.

This fixes stuttering in Indiana Jones because that games seems to use
a huge number of descriptor sets which can also be freed.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/13901
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37976>
This commit is contained in:
Samuel Pitoiset 2025-10-16 16:50:50 +02:00 committed by Marge Bot
parent 9b6d89f4ca
commit 849d41dbf8
4 changed files with 41 additions and 52 deletions

View file

@ -20,8 +20,8 @@ static void
radv_destroy_descriptor_pool_entries(struct radv_device *device, struct radv_descriptor_pool *pool) radv_destroy_descriptor_pool_entries(struct radv_device *device, struct radv_descriptor_pool *pool)
{ {
if (!pool->host_memory_base) { if (!pool->host_memory_base) {
for (uint32_t i = 0; i < pool->entry_count; ++i) { list_for_each_entry_safe (struct radv_descriptor_set, set, &pool->sets, link) {
radv_descriptor_set_destroy(device, pool, pool->entries[i].set, false); radv_descriptor_set_destroy(device, pool, set);
} }
} else { } else {
list_for_each_entry_safe (struct radv_descriptor_set, set, &pool->sets, link) { list_for_each_entry_safe (struct radv_descriptor_set, set, &pool->sets, link) {
@ -38,6 +38,8 @@ radv_destroy_descriptor_pool(struct radv_device *device, const VkAllocationCallb
{ {
radv_destroy_descriptor_pool_entries(device, pool); radv_destroy_descriptor_pool_entries(device, pool);
if (!pool->host_memory_base && pool->size)
util_vma_heap_finish(&pool->bo_heap);
if (pool->bo) if (pool->bo)
radv_bo_destroy(device, &pool->base, pool->bo); radv_bo_destroy(device, &pool->base, pool->bo);
if (pool->host_bo) if (pool->host_bo)
@ -146,8 +148,6 @@ radv_create_descriptor_pool(struct radv_device *device, const VkDescriptorPoolCr
size += pCreateInfo->maxSets * sizeof(struct radv_descriptor_set); size += pCreateInfo->maxSets * sizeof(struct radv_descriptor_set);
size += sizeof(struct radeon_winsys_bo *) * bo_count; size += sizeof(struct radeon_winsys_bo *) * bo_count;
size += sizeof(struct radv_descriptor_range) * range_count; size += sizeof(struct radv_descriptor_range) * range_count;
} else {
size += sizeof(struct radv_descriptor_pool_entry) * pCreateInfo->maxSets;
} }
pool = vk_zalloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); pool = vk_zalloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
@ -162,6 +162,11 @@ radv_create_descriptor_pool(struct radv_device *device, const VkDescriptorPoolCr
pool->host_memory_base = (uint8_t *)pool + sizeof(struct radv_descriptor_pool); pool->host_memory_base = (uint8_t *)pool + sizeof(struct radv_descriptor_pool);
pool->host_memory_ptr = pool->host_memory_base; pool->host_memory_ptr = pool->host_memory_base;
pool->host_memory_end = (uint8_t *)pool + size; pool->host_memory_end = (uint8_t *)pool + size;
} else {
if (bo_size) {
util_vma_heap_init(&pool->bo_heap, RADV_POOL_HEAP_OFFSET, bo_size + RADV_POOL_HEAP_OFFSET);
pool->bo_heap.alloc_high = false;
}
} }
if (bo_size) { if (bo_size) {
@ -231,6 +236,11 @@ radv_ResetDescriptorPool(VkDevice _device, VkDescriptorPool descriptorPool, VkDe
radv_destroy_descriptor_pool_entries(device, pool); radv_destroy_descriptor_pool_entries(device, pool);
if (!pool->host_memory_base && pool->size) {
util_vma_heap_finish(&pool->bo_heap);
util_vma_heap_init(&pool->bo_heap, RADV_POOL_HEAP_OFFSET, pool->size + RADV_POOL_HEAP_OFFSET);
}
pool->entry_count = 0; pool->entry_count = 0;
pool->current_offset = 0; pool->current_offset = 0;
pool->host_memory_ptr = pool->host_memory_base; pool->host_memory_ptr = pool->host_memory_base;

View file

@ -12,15 +12,15 @@
#include <vulkan/vulkan.h> #include <vulkan/vulkan.h>
#include "util/list.h" #include "util/list.h"
#include "util/vma.h"
/* The vma heap reserves 0 to mean NULL; we have to offset by some amount to ensure we can allocate
* the entire BO without hitting zero. The actual amount doesn't matter.
*/
#define RADV_POOL_HEAP_OFFSET 32
struct radv_descriptor_set; struct radv_descriptor_set;
struct radv_descriptor_pool_entry {
uint32_t offset;
uint32_t size;
struct radv_descriptor_set *set;
};
struct radv_descriptor_pool { struct radv_descriptor_pool {
struct vk_object_base base; struct vk_object_base base;
struct radeon_winsys_bo *bo; struct radeon_winsys_bo *bo;
@ -35,10 +35,10 @@ struct radv_descriptor_pool {
struct list_head sets; struct list_head sets;
struct util_vma_heap bo_heap;
uint32_t entry_count; uint32_t entry_count;
uint32_t max_entry_count; uint32_t max_entry_count;
struct radv_descriptor_pool_entry entries[0];
}; };
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, base, VkDescriptorPool, VK_OBJECT_TYPE_DESCRIPTOR_POOL) VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, base, VkDescriptorPool, VK_OBJECT_TYPE_DESCRIPTOR_POOL)

View file

@ -377,47 +377,29 @@ radv_alloc_descriptor_pool_entry(struct radv_device *device, struct radv_descrip
struct radv_descriptor_set *set) struct radv_descriptor_set *set)
{ {
uint64_t current_offset = pool->current_offset; uint64_t current_offset = pool->current_offset;
uint32_t entry_index = pool->entry_count;
if (!pool->host_memory_base) { if (!pool->host_memory_base) {
/* Try to allocate linearly first, so that we don't spend time looking for gaps if the app if (set->header.size) {
* only allocates & resets via the pool. */ uint64_t pool_vma_offset = util_vma_heap_alloc(&pool->bo_heap, set->header.size, 32);
if (current_offset + set->header.size <= pool->size) { if (!pool_vma_offset)
pool->current_offset += set->header.size; return VK_ERROR_FRAGMENTED_POOL;
} else {
current_offset = 0;
for (entry_index = 0; entry_index < pool->entry_count; ++entry_index) { assert(pool_vma_offset >= RADV_POOL_HEAP_OFFSET && pool_vma_offset <= pool->size + RADV_POOL_HEAP_OFFSET);
if (pool->entries[entry_index].offset - current_offset >= set->header.size) set->header.offset = pool_vma_offset - RADV_POOL_HEAP_OFFSET;
break; current_offset = set->header.offset;
current_offset = pool->entries[entry_index].offset + pool->entries[entry_index].size;
} }
if (pool->size - current_offset < set->header.size) {
vk_free2(&device->vk.alloc, NULL, set);
return VK_ERROR_OUT_OF_POOL_MEMORY;
}
memmove(&pool->entries[entry_index + 1], &pool->entries[entry_index],
sizeof(pool->entries[0]) * (pool->entry_count - entry_index));
}
pool->entries[entry_index].offset = current_offset;
pool->entries[entry_index].size = set->header.size;
pool->entries[entry_index].set = set;
} else { } else {
if (current_offset + set->header.size > pool->size) if (current_offset + set->header.size > pool->size)
return VK_ERROR_OUT_OF_POOL_MEMORY; return VK_ERROR_OUT_OF_POOL_MEMORY;
pool->current_offset += set->header.size; pool->current_offset += set->header.size;
list_addtail(&set->link, &pool->sets);
} }
set->header.bo = pool->bo; set->header.bo = pool->bo;
set->header.mapped_ptr = (uint32_t *)(pool->mapped_ptr + current_offset); set->header.mapped_ptr = (uint32_t *)(pool->mapped_ptr + current_offset);
set->header.va = pool->bo ? (radv_buffer_get_va(set->header.bo) + current_offset) : 0; set->header.va = pool->bo ? (radv_buffer_get_va(set->header.bo) + current_offset) : 0;
list_addtail(&set->link, &pool->sets);
pool->entry_count++; pool->entry_count++;
return VK_SUCCESS; return VK_SUCCESS;
} }
@ -439,8 +421,8 @@ radv_descriptor_set_create(struct radv_device *device, struct radv_descriptor_po
unsigned stride = radv_descriptor_type_buffer_count(layout->binding[layout->binding_count - 1].type); unsigned stride = radv_descriptor_type_buffer_count(layout->binding[layout->binding_count - 1].type);
buffer_count = layout->binding[layout->binding_count - 1].buffer_offset + variable_count * stride; buffer_count = layout->binding[layout->binding_count - 1].buffer_offset + variable_count * stride;
} }
unsigned range_offset = sizeof(struct radv_descriptor_set_header) + sizeof(struct list_head) + unsigned range_offset =
sizeof(struct radeon_winsys_bo *) * buffer_count; offsetof(struct radv_descriptor_set, descriptors) + sizeof(struct radeon_winsys_bo *) * buffer_count;
const unsigned dynamic_offset_count = layout->dynamic_offset_count; const unsigned dynamic_offset_count = layout->dynamic_offset_count;
unsigned mem_size = range_offset + sizeof(struct radv_descriptor_range) * dynamic_offset_count; unsigned mem_size = range_offset + sizeof(struct radv_descriptor_range) * dynamic_offset_count;
@ -507,21 +489,17 @@ radv_descriptor_set_create(struct radv_device *device, struct radv_descriptor_po
void void
radv_descriptor_set_destroy(struct radv_device *device, struct radv_descriptor_pool *pool, radv_descriptor_set_destroy(struct radv_device *device, struct radv_descriptor_pool *pool,
struct radv_descriptor_set *set, bool free_bo) struct radv_descriptor_set *set)
{ {
assert(!pool->host_memory_base); assert(!pool->host_memory_base);
list_del(&set->link);
vk_descriptor_set_layout_unref(&device->vk, &set->header.layout->vk); vk_descriptor_set_layout_unref(&device->vk, &set->header.layout->vk);
if (free_bo) { if (set->header.size)
for (int i = 0; i < pool->entry_count; ++i) { util_vma_heap_free(&pool->bo_heap, (uint64_t)set->header.offset + RADV_POOL_HEAP_OFFSET, set->header.size);
if (pool->entries[i].set == set) { pool->entry_count--;
memmove(&pool->entries[i], &pool->entries[i + 1], sizeof(pool->entries[i]) * (pool->entry_count - i - 1));
--pool->entry_count;
break;
}
}
}
vk_object_base_finish(&set->header.base); vk_object_base_finish(&set->header.base);
vk_free2(&device->vk.alloc, NULL, set); vk_free2(&device->vk.alloc, NULL, set);
} }
@ -578,7 +556,7 @@ radv_FreeDescriptorSets(VkDevice _device, VkDescriptorPool descriptorPool, uint3
VK_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]); VK_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]);
if (set && !pool->host_memory_base) if (set && !pool->host_memory_base)
radv_descriptor_set_destroy(device, pool, set, true); radv_descriptor_set_destroy(device, pool, set);
} }
return VK_SUCCESS; return VK_SUCCESS;
} }

View file

@ -90,6 +90,7 @@ struct radv_descriptor_range {
struct radv_descriptor_set_header { struct radv_descriptor_set_header {
struct vk_object_base base; struct vk_object_base base;
struct radv_descriptor_set_layout *layout; struct radv_descriptor_set_layout *layout;
uint32_t offset;
uint32_t size; uint32_t size;
uint32_t buffer_count; uint32_t buffer_count;
@ -136,6 +137,6 @@ void radv_cmd_update_descriptor_sets(struct radv_device *device, struct radv_cmd
const VkCopyDescriptorSet *pDescriptorCopies); const VkCopyDescriptorSet *pDescriptorCopies);
void radv_descriptor_set_destroy(struct radv_device *device, struct radv_descriptor_pool *pool, void radv_descriptor_set_destroy(struct radv_device *device, struct radv_descriptor_pool *pool,
struct radv_descriptor_set *set, bool free_bo); struct radv_descriptor_set *set);
#endif /* RADV_DESCRIPTOR_SET_H */ #endif /* RADV_DESCRIPTOR_SET_H */