nvk/heap: Use an nvk_mem_arena

The only material change here is that we're now allocating slightly less
memory every time in the non-contiguous case.  Instead of allocating a
power of two plus the over-allocation so our total usable space is a
power of two, we now allocate a power of two and just let the over-
allocation burn space.

This also reworks the internal vma_heap to use actual addresses instead
of the index+offset packed things.  This costs us a tiny bit more
looking up the map in alloc() but we save it again on free so there's no
real loss here.

Reviewed-by: Mel Henning <mhenning@darkrefraction.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35327>
This commit is contained in:
Faith Ekstrand 2025-06-03 15:03:03 -04:00 committed by Marge Bot
parent 417e85f617
commit 9e52e296f7
2 changed files with 35 additions and 172 deletions

View file

@ -19,134 +19,42 @@ nvk_heap_init(struct nvk_device *dev, struct nvk_heap *heap,
enum nvkmd_mem_map_flags map_flags,
uint32_t overalloc, bool contiguous)
{
VkResult result;
VkResult result = nvk_mem_arena_init(dev, &heap->arena, mem_flags, map_flags,
true, NVK_MEM_ARENA_MAX_SIZE);
if (result != VK_SUCCESS)
return result;
memset(heap, 0, sizeof(*heap));
heap->mem_flags = mem_flags;
if (map_flags)
heap->mem_flags |= NVKMD_MEM_CAN_MAP;
heap->map_flags = map_flags;
assert(overalloc < NVK_MEM_ARENA_MIN_SIZE);
heap->overalloc = overalloc;
if (contiguous) {
result = nvkmd_dev_alloc_va(dev->nvkmd, &dev->vk.base,
0 /* va_flags */, 0 /* pte_kind */,
NVK_HEAP_MAX_SIZE, 0 /* align_B */,
0 /* fixed_addr */,
&heap->contig_va);
if (result != VK_SUCCESS)
return result;
}
simple_mtx_init(&heap->mutex, mtx_plain);
util_vma_heap_init(&heap->heap, 0, 0);
heap->total_size = 0;
heap->mem_count = 0;
return VK_SUCCESS;
}
void
nvk_heap_finish(struct nvk_device *dev, struct nvk_heap *heap)
{
/* Freeing the VA will unbind all the memory */
if (heap->contig_va)
nvkmd_va_free(heap->contig_va);
for (uint32_t mem_idx = 0; mem_idx < heap->mem_count; mem_idx++)
nvkmd_mem_unref(heap->mem[mem_idx].mem);
util_vma_heap_finish(&heap->heap);
simple_mtx_destroy(&heap->mutex);
}
static uint64_t
encode_vma(uint32_t mem_idx, uint64_t mem_offset)
{
assert(mem_idx < UINT16_MAX - 1);
assert(mem_offset < (1ull << 48));
return ((uint64_t)(mem_idx + 1) << 48) | mem_offset;
}
static uint32_t
vma_mem_idx(uint64_t offset)
{
offset = offset >> 48;
assert(offset > 0);
return offset - 1;
}
static uint64_t
vma_mem_offset(uint64_t offset)
{
return offset & BITFIELD64_MASK(48);
nvk_mem_arena_finish(dev, &heap->arena);
}
static VkResult
nvk_heap_grow_locked(struct nvk_device *dev, struct nvk_heap *heap)
{
const bool is_first_grow = nvk_mem_arena_size_B(&heap->arena) == 0;
VkResult result;
if (heap->mem_count >= NVK_HEAP_MAX_BO_COUNT) {
return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
"Heap has already hit its maximum size");
}
/* First two BOs are MIN_SIZE, double after that */
const uint64_t new_mem_size =
NVK_HEAP_MIN_SIZE << (MAX2(heap->mem_count, 1) - 1);
struct nvkmd_mem *mem;
if (heap->map_flags) {
result = nvkmd_dev_alloc_mapped_mem(dev->nvkmd, &dev->vk.base,
new_mem_size, 0, heap->mem_flags,
heap->map_flags, &mem);
} else {
result = nvkmd_dev_alloc_mem(dev->nvkmd, &dev->vk.base,
new_mem_size, 0, heap->mem_flags, &mem);
}
uint64_t addr, mem_size_B;
result = nvk_mem_arena_grow_locked(dev, &heap->arena, &addr, &mem_size_B);
if (result != VK_SUCCESS)
return result;
assert(mem->size_B == new_mem_size);
uint64_t addr;
if (heap->contig_va != NULL) {
result = nvkmd_va_bind_mem(heap->contig_va, &dev->vk.base,
heap->total_size, mem, 0, new_mem_size);
if (result != VK_SUCCESS) {
nvkmd_mem_unref(mem);
return result;
}
addr = heap->contig_va->addr + heap->total_size;
/* For contiguous heaps, we can now free the padding from the previous
* BO because the BO we just added will provide the needed padding. For
* non-contiguous heaps, we have to leave each BO padded individually.
*/
if (heap->mem_count > 0) {
struct nvkmd_mem *prev_mem = heap->mem[heap->mem_count - 1].mem;
assert(heap->overalloc < prev_mem->size_B);
const uint64_t pad_vma =
encode_vma(heap->mem_count - 1, prev_mem->size_B - heap->overalloc);
util_vma_heap_free(&heap->heap, pad_vma, heap->overalloc);
}
if (nvk_mem_arena_is_contiguous(&heap->arena) && !is_first_grow) {
util_vma_heap_free(&heap->heap, addr - heap->overalloc, mem_size_B);
} else {
addr = mem->va->addr;
util_vma_heap_free(&heap->heap, addr, mem_size_B - heap->overalloc);
}
uint64_t vma = encode_vma(heap->mem_count, 0);
assert(heap->overalloc < new_mem_size);
util_vma_heap_free(&heap->heap, vma, new_mem_size - heap->overalloc);
heap->mem[heap->mem_count++] = (struct nvk_heap_mem) {
.mem = mem,
.addr = addr,
};
heap->total_size += new_mem_size;
return VK_SUCCESS;
}
@ -155,20 +63,20 @@ nvk_heap_alloc_locked(struct nvk_device *dev, struct nvk_heap *heap,
uint64_t size, uint32_t alignment,
uint64_t *addr_out, void **map_out)
{
/* Make sure we follow the restrictions in nvk_mem_arena_map(). */
if (map_out != NULL && nvk_mem_arena_is_mapped(&heap->arena)) {
assert(size <= NVK_MEM_ARENA_MIN_SIZE);
alignment = MAX2(alignment, util_next_power_of_two(size));
}
while (1) {
uint64_t vma = util_vma_heap_alloc(&heap->heap, size, alignment);
if (vma != 0) {
uint32_t mem_idx = vma_mem_idx(vma);
uint64_t mem_offset = vma_mem_offset(vma);
uint64_t addr = util_vma_heap_alloc(&heap->heap, size, alignment);
if (addr != 0) {
*addr_out = addr;
assert(mem_idx < heap->mem_count);
assert(heap->mem[mem_idx].mem != NULL);
assert(mem_offset + size <= heap->mem[mem_idx].mem->size_B);
*addr_out = heap->mem[mem_idx].addr + mem_offset;
if (map_out != NULL) {
if (heap->mem[mem_idx].mem->map != NULL)
*map_out = (char *)heap->mem[mem_idx].mem->map + mem_offset;
if (nvk_mem_arena_is_mapped(&heap->arena))
*map_out = nvk_mem_arena_map(&heap->arena, addr, size);
else
*map_out = NULL;
}
@ -187,22 +95,7 @@ nvk_heap_free_locked(struct nvk_device *dev, struct nvk_heap *heap,
uint64_t addr, uint64_t size)
{
assert(addr + size > addr);
for (uint32_t mem_idx = 0; mem_idx < heap->mem_count; mem_idx++) {
if (addr < heap->mem[mem_idx].addr)
continue;
uint64_t mem_offset = addr - heap->mem[mem_idx].addr;
if (mem_offset >= heap->mem[mem_idx].mem->size_B)
continue;
assert(mem_offset + size <= heap->mem[mem_idx].mem->size_B);
uint64_t vma = encode_vma(mem_idx, mem_offset);
util_vma_heap_free(&heap->heap, vma, size);
return;
}
assert(!"Failed to find heap BO");
util_vma_heap_free(&heap->heap, addr, size);
}
VkResult
@ -210,10 +103,10 @@ nvk_heap_alloc(struct nvk_device *dev, struct nvk_heap *heap,
uint64_t size, uint32_t alignment,
uint64_t *addr_out, void **map_out)
{
simple_mtx_lock(&heap->mutex);
simple_mtx_lock(&heap->arena.mutex);
VkResult result = nvk_heap_alloc_locked(dev, heap, size, alignment,
addr_out, map_out);
simple_mtx_unlock(&heap->mutex);
simple_mtx_unlock(&heap->arena.mutex);
return result;
}
@ -223,18 +116,14 @@ nvk_heap_upload(struct nvk_device *dev, struct nvk_heap *heap,
const void *data, size_t size, uint32_t alignment,
uint64_t *addr_out)
{
simple_mtx_lock(&heap->mutex);
void *map = NULL;
VkResult result = nvk_heap_alloc_locked(dev, heap, size, alignment,
addr_out, &map);
simple_mtx_unlock(&heap->mutex);
VkResult result;
result = nvk_heap_alloc(dev, heap, size, alignment, addr_out, NULL);
if (result != VK_SUCCESS)
return result;
if (map != NULL && (heap->map_flags & NVKMD_MEM_MAP_WR)) {
/* If we have a map, copy directly with memcpy */
memcpy(map, data, size);
if (heap->arena.map_flags & NVKMD_MEM_MAP_WR) {
nvk_mem_arena_copy_to_gpu(&heap->arena, *addr_out, data, size);
} else {
/* Otherwise, kick off an upload with the upload queue.
*
@ -269,7 +158,7 @@ void
nvk_heap_free(struct nvk_device *dev, struct nvk_heap *heap,
uint64_t addr, uint64_t size)
{
simple_mtx_lock(&heap->mutex);
simple_mtx_lock(&heap->arena.mutex);
nvk_heap_free_locked(dev, heap, addr, size);
simple_mtx_unlock(&heap->mutex);
simple_mtx_unlock(&heap->arena.mutex);
}

View file

@ -5,41 +5,16 @@
#ifndef NVK_HEAP_H
#define NVK_HEAP_H 1
#include "nvk_private.h"
#include "nvk_mem_arena.h"
#include "util/simple_mtx.h"
#include "util/vma.h"
#include "nvkmd/nvkmd.h"
struct nvk_device;
#define NVK_HEAP_MIN_SIZE_LOG2 16
#define NVK_HEAP_MAX_SIZE_LOG2 32
#define NVK_HEAP_MIN_SIZE (1ull << NVK_HEAP_MIN_SIZE_LOG2)
#define NVK_HEAP_MAX_SIZE (1ull << NVK_HEAP_MAX_SIZE_LOG2)
#define NVK_HEAP_MAX_BO_COUNT (NVK_HEAP_MAX_SIZE_LOG2 - \
NVK_HEAP_MIN_SIZE_LOG2 + 1)
struct nvk_heap_mem {
struct nvkmd_mem *mem;
uint64_t addr;
};
struct nvk_heap {
enum nvkmd_mem_flags mem_flags;
enum nvkmd_mem_map_flags map_flags;
struct nvk_mem_arena arena;
uint32_t overalloc;
simple_mtx_t mutex;
struct util_vma_heap heap;
/* VA for contiguous heaps, NULL otherwise */
struct nvkmd_va *contig_va;
uint64_t total_size;
uint32_t mem_count;
struct nvk_heap_mem mem[NVK_HEAP_MAX_BO_COUNT];
};
VkResult nvk_heap_init(struct nvk_device *dev, struct nvk_heap *heap,
@ -63,8 +38,7 @@ void nvk_heap_free(struct nvk_device *dev, struct nvk_heap *heap,
static inline uint64_t
nvk_heap_contiguous_base_address(struct nvk_heap *heap)
{
assert(heap->contig_va != NULL);
return heap->contig_va->addr;
return nvk_contiguous_mem_arena_base_address(&heap->arena);
}
#endif /* define NVK_HEAP_H */