radv: implement a new descriptor sets allocator
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

The previous implementation was horribly slow with a larger number
of descriptor sets.

The new approach uses util_vma_heap (like ANV) which is a perfect fit.

This fixes stuttering in Indiana Jones because that games seems to use
a huge number of descriptor sets which can also be freed.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/13901
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37976>
This commit is contained in:
Samuel Pitoiset 2025-10-16 16:50:50 +02:00 committed by Marge Bot
parent 9b6d89f4ca
commit 849d41dbf8
4 changed files with 41 additions and 52 deletions

View file

@ -20,8 +20,8 @@ static void
radv_destroy_descriptor_pool_entries(struct radv_device *device, struct radv_descriptor_pool *pool)
{
if (!pool->host_memory_base) {
for (uint32_t i = 0; i < pool->entry_count; ++i) {
radv_descriptor_set_destroy(device, pool, pool->entries[i].set, false);
list_for_each_entry_safe (struct radv_descriptor_set, set, &pool->sets, link) {
radv_descriptor_set_destroy(device, pool, set);
}
} else {
list_for_each_entry_safe (struct radv_descriptor_set, set, &pool->sets, link) {
@ -38,6 +38,8 @@ radv_destroy_descriptor_pool(struct radv_device *device, const VkAllocationCallb
{
radv_destroy_descriptor_pool_entries(device, pool);
if (!pool->host_memory_base && pool->size)
util_vma_heap_finish(&pool->bo_heap);
if (pool->bo)
radv_bo_destroy(device, &pool->base, pool->bo);
if (pool->host_bo)
@ -146,8 +148,6 @@ radv_create_descriptor_pool(struct radv_device *device, const VkDescriptorPoolCr
size += pCreateInfo->maxSets * sizeof(struct radv_descriptor_set);
size += sizeof(struct radeon_winsys_bo *) * bo_count;
size += sizeof(struct radv_descriptor_range) * range_count;
} else {
size += sizeof(struct radv_descriptor_pool_entry) * pCreateInfo->maxSets;
}
pool = vk_zalloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
@ -162,6 +162,11 @@ radv_create_descriptor_pool(struct radv_device *device, const VkDescriptorPoolCr
pool->host_memory_base = (uint8_t *)pool + sizeof(struct radv_descriptor_pool);
pool->host_memory_ptr = pool->host_memory_base;
pool->host_memory_end = (uint8_t *)pool + size;
} else {
if (bo_size) {
util_vma_heap_init(&pool->bo_heap, RADV_POOL_HEAP_OFFSET, bo_size + RADV_POOL_HEAP_OFFSET);
pool->bo_heap.alloc_high = false;
}
}
if (bo_size) {
@ -231,6 +236,11 @@ radv_ResetDescriptorPool(VkDevice _device, VkDescriptorPool descriptorPool, VkDe
radv_destroy_descriptor_pool_entries(device, pool);
if (!pool->host_memory_base && pool->size) {
util_vma_heap_finish(&pool->bo_heap);
util_vma_heap_init(&pool->bo_heap, RADV_POOL_HEAP_OFFSET, pool->size + RADV_POOL_HEAP_OFFSET);
}
pool->entry_count = 0;
pool->current_offset = 0;
pool->host_memory_ptr = pool->host_memory_base;

View file

@ -12,15 +12,15 @@
#include <vulkan/vulkan.h>
#include "util/list.h"
#include "util/vma.h"
/* The vma heap reserves 0 to mean NULL; we have to offset by some amount to ensure we can allocate
* the entire BO without hitting zero. The actual amount doesn't matter.
*/
#define RADV_POOL_HEAP_OFFSET 32
struct radv_descriptor_set;
struct radv_descriptor_pool_entry {
uint32_t offset;
uint32_t size;
struct radv_descriptor_set *set;
};
struct radv_descriptor_pool {
struct vk_object_base base;
struct radeon_winsys_bo *bo;
@ -35,10 +35,10 @@ struct radv_descriptor_pool {
struct list_head sets;
struct util_vma_heap bo_heap;
uint32_t entry_count;
uint32_t max_entry_count;
struct radv_descriptor_pool_entry entries[0];
};
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, base, VkDescriptorPool, VK_OBJECT_TYPE_DESCRIPTOR_POOL)

View file

@ -377,47 +377,29 @@ radv_alloc_descriptor_pool_entry(struct radv_device *device, struct radv_descrip
struct radv_descriptor_set *set)
{
uint64_t current_offset = pool->current_offset;
uint32_t entry_index = pool->entry_count;
if (!pool->host_memory_base) {
/* Try to allocate linearly first, so that we don't spend time looking for gaps if the app
* only allocates & resets via the pool. */
if (current_offset + set->header.size <= pool->size) {
pool->current_offset += set->header.size;
} else {
current_offset = 0;
if (set->header.size) {
uint64_t pool_vma_offset = util_vma_heap_alloc(&pool->bo_heap, set->header.size, 32);
if (!pool_vma_offset)
return VK_ERROR_FRAGMENTED_POOL;
for (entry_index = 0; entry_index < pool->entry_count; ++entry_index) {
if (pool->entries[entry_index].offset - current_offset >= set->header.size)
break;
current_offset = pool->entries[entry_index].offset + pool->entries[entry_index].size;
}
if (pool->size - current_offset < set->header.size) {
vk_free2(&device->vk.alloc, NULL, set);
return VK_ERROR_OUT_OF_POOL_MEMORY;
}
memmove(&pool->entries[entry_index + 1], &pool->entries[entry_index],
sizeof(pool->entries[0]) * (pool->entry_count - entry_index));
assert(pool_vma_offset >= RADV_POOL_HEAP_OFFSET && pool_vma_offset <= pool->size + RADV_POOL_HEAP_OFFSET);
set->header.offset = pool_vma_offset - RADV_POOL_HEAP_OFFSET;
current_offset = set->header.offset;
}
pool->entries[entry_index].offset = current_offset;
pool->entries[entry_index].size = set->header.size;
pool->entries[entry_index].set = set;
} else {
if (current_offset + set->header.size > pool->size)
return VK_ERROR_OUT_OF_POOL_MEMORY;
pool->current_offset += set->header.size;
list_addtail(&set->link, &pool->sets);
}
set->header.bo = pool->bo;
set->header.mapped_ptr = (uint32_t *)(pool->mapped_ptr + current_offset);
set->header.va = pool->bo ? (radv_buffer_get_va(set->header.bo) + current_offset) : 0;
list_addtail(&set->link, &pool->sets);
pool->entry_count++;
return VK_SUCCESS;
}
@ -439,8 +421,8 @@ radv_descriptor_set_create(struct radv_device *device, struct radv_descriptor_po
unsigned stride = radv_descriptor_type_buffer_count(layout->binding[layout->binding_count - 1].type);
buffer_count = layout->binding[layout->binding_count - 1].buffer_offset + variable_count * stride;
}
unsigned range_offset = sizeof(struct radv_descriptor_set_header) + sizeof(struct list_head) +
sizeof(struct radeon_winsys_bo *) * buffer_count;
unsigned range_offset =
offsetof(struct radv_descriptor_set, descriptors) + sizeof(struct radeon_winsys_bo *) * buffer_count;
const unsigned dynamic_offset_count = layout->dynamic_offset_count;
unsigned mem_size = range_offset + sizeof(struct radv_descriptor_range) * dynamic_offset_count;
@ -507,21 +489,17 @@ radv_descriptor_set_create(struct radv_device *device, struct radv_descriptor_po
void
radv_descriptor_set_destroy(struct radv_device *device, struct radv_descriptor_pool *pool,
struct radv_descriptor_set *set, bool free_bo)
struct radv_descriptor_set *set)
{
assert(!pool->host_memory_base);
list_del(&set->link);
vk_descriptor_set_layout_unref(&device->vk, &set->header.layout->vk);
if (free_bo) {
for (int i = 0; i < pool->entry_count; ++i) {
if (pool->entries[i].set == set) {
memmove(&pool->entries[i], &pool->entries[i + 1], sizeof(pool->entries[i]) * (pool->entry_count - i - 1));
--pool->entry_count;
break;
}
}
}
if (set->header.size)
util_vma_heap_free(&pool->bo_heap, (uint64_t)set->header.offset + RADV_POOL_HEAP_OFFSET, set->header.size);
pool->entry_count--;
vk_object_base_finish(&set->header.base);
vk_free2(&device->vk.alloc, NULL, set);
}
@ -578,7 +556,7 @@ radv_FreeDescriptorSets(VkDevice _device, VkDescriptorPool descriptorPool, uint3
VK_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]);
if (set && !pool->host_memory_base)
radv_descriptor_set_destroy(device, pool, set, true);
radv_descriptor_set_destroy(device, pool, set);
}
return VK_SUCCESS;
}

View file

@ -90,6 +90,7 @@ struct radv_descriptor_range {
struct radv_descriptor_set_header {
struct vk_object_base base;
struct radv_descriptor_set_layout *layout;
uint32_t offset;
uint32_t size;
uint32_t buffer_count;
@ -136,6 +137,6 @@ void radv_cmd_update_descriptor_sets(struct radv_device *device, struct radv_cmd
const VkCopyDescriptorSet *pDescriptorCopies);
void radv_descriptor_set_destroy(struct radv_device *device, struct radv_descriptor_pool *pool,
struct radv_descriptor_set *set, bool free_bo);
struct radv_descriptor_set *set);
#endif /* RADV_DESCRIPTOR_SET_H */