nvk: Respect VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT

This is part of VK_EXT_mutable_descriptor_type but we never did anything
with it.  Since we use local memory for descriptor sets, copying from
them means reading VRAM through a WC map and it's pretty expensive.
Using malloc() for HOST_ONLY should be a nice perf boost for things
which give us the hint.

This massively improves the performance Dragon Age: The Veilguard,
taking it from 7 FPS to 25 FPS on an RTX 4060.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12622
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33571>
This commit is contained in:
Faith Ekstrand 2025-02-16 17:23:18 -06:00 committed by Marge Bot
parent b8508726f0
commit 607686f6bf
2 changed files with 57 additions and 20 deletions

View file

@ -539,9 +539,14 @@ nvk_destroy_descriptor_pool(struct nvk_device *dev,
if (pool->mem != NULL)
nvkmd_mem_unref(pool->mem);
if (pool->host_mem != NULL)
vk_free2(&dev->vk.alloc, pAllocator, pool->host_mem);
vk_object_free(&dev->vk, pAllocator, pool);
}
#define HOST_ONLY_ADDR 0xc0ffee0000000000ull
VKAPI_ATTR VkResult VKAPI_CALL
nvk_CreateDescriptorPool(VkDevice _device,
const VkDescriptorPoolCreateInfo *pCreateInfo,
@ -603,24 +608,39 @@ nvk_CreateDescriptorPool(VkDevice _device,
*/
mem_size += nvk_min_cbuf_alignment(&pdev->info) * pCreateInfo->maxSets;
if (mem_size) {
result = nvkmd_dev_alloc_mapped_mem(dev->nvkmd, &dev->vk.base,
mem_size, 0, NVKMD_MEM_LOCAL,
NVKMD_MEM_MAP_WR, &pool->mem);
if (result != VK_SUCCESS) {
nvk_destroy_descriptor_pool(dev, pAllocator, pool);
return result;
}
if (mem_size > 0) {
if (pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT) {
pool->host_mem = vk_zalloc2(&dev->vk.alloc, pAllocator, mem_size,
16, VK_OBJECT_TYPE_DESCRIPTOR_POOL);
if (pool->host_mem == NULL) {
nvk_destroy_descriptor_pool(dev, pAllocator, pool);
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
}
/* The BO may be larger thanks to GPU page alignment. We may as well
* make that extra space available to the client.
*/
assert(pool->mem->size_B >= mem_size);
util_vma_heap_init(&pool->heap, pool->mem->va->addr, pool->mem->size_B);
util_vma_heap_init(&pool->heap, HOST_ONLY_ADDR, mem_size);
} else {
result = nvkmd_dev_alloc_mapped_mem(dev->nvkmd, &dev->vk.base,
mem_size, 0, NVKMD_MEM_LOCAL,
NVKMD_MEM_MAP_WR, &pool->mem);
if (result != VK_SUCCESS) {
nvk_destroy_descriptor_pool(dev, pAllocator, pool);
return result;
}
/* The BO may be larger thanks to GPU page alignment. We may as well
* make that extra space available to the client.
*/
assert(pool->mem->size_B >= mem_size);
mem_size = pool->mem->size_B;
util_vma_heap_init(&pool->heap, pool->mem->va->addr, mem_size);
}
} else {
util_vma_heap_init(&pool->heap, 0, 0);
}
pool->mem_size_B = mem_size;
*pDescriptorPool = nvk_descriptor_pool_to_handle(pool);
return VK_SUCCESS;
}
@ -640,12 +660,22 @@ nvk_descriptor_pool_alloc(struct nvk_descriptor_pool *pool,
if (addr == 0)
return VK_ERROR_FRAGMENTED_POOL;
assert(addr >= pool->mem->va->addr);
assert(addr + size <= pool->mem->va->addr + pool->mem->size_B);
uint64_t offset = addr - pool->mem->va->addr;
if (pool->host_mem != NULL) {
/* In this case, the address is a host address */
assert(addr >= HOST_ONLY_ADDR);
assert(addr + size <= HOST_ONLY_ADDR + pool->mem_size_B);
uint64_t offset = addr - HOST_ONLY_ADDR;
*addr_out = addr;
*map_out = pool->mem->map + offset;
*addr_out = addr;
*map_out = pool->host_mem + offset;
} else {
assert(addr >= pool->mem->va->addr);
assert(addr + size <= pool->mem->va->addr + pool->mem_size_B);
uint64_t offset = addr - pool->mem->va->addr;
*addr_out = addr;
*map_out = pool->mem->map + offset;
}
return VK_SUCCESS;
}
@ -655,8 +685,13 @@ nvk_descriptor_pool_free(struct nvk_descriptor_pool *pool,
uint64_t addr, uint64_t size)
{
assert(size > 0);
assert(addr >= pool->mem->va->addr);
assert(addr + size <= pool->mem->va->addr + pool->mem->size_B);
if (pool->host_mem != NULL) {
assert(addr >= HOST_ONLY_ADDR);
assert(addr + size <= HOST_ONLY_ADDR + pool->mem_size_B);
} else {
assert(addr >= pool->mem->va->addr);
assert(addr + size <= pool->mem->va->addr + pool->mem_size_B);
}
util_vma_heap_free(&pool->heap, addr, size);
}

View file

@ -24,7 +24,9 @@ struct nvk_descriptor_pool {
struct list_head sets;
uint64_t mem_size_B;
struct nvkmd_mem *mem;
void *host_mem;
struct util_vma_heap heap;
};