mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 17:30:12 +01:00
nvk/heap: Use an nvk_mem_arena
The only material change here is that we're now allocating slightly less memory every time in the non-contiguous case. Instead of allocating a power of two plus the over-allocation so our total usable space is a power of two, we now allocate a power of two and just let the over- allocation burn space. This also reworks the internal vma_heap to use actual addresses instead of the index+offset packed things. This costs us a tiny bit more looking up the map in alloc() but we save it again on free so there's no real loss here. Reviewed-by: Mel Henning <mhenning@darkrefraction.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35327>
This commit is contained in:
parent
417e85f617
commit
9e52e296f7
2 changed files with 35 additions and 172 deletions
|
|
@ -19,134 +19,42 @@ nvk_heap_init(struct nvk_device *dev, struct nvk_heap *heap,
|
|||
enum nvkmd_mem_map_flags map_flags,
|
||||
uint32_t overalloc, bool contiguous)
|
||||
{
|
||||
VkResult result;
|
||||
VkResult result = nvk_mem_arena_init(dev, &heap->arena, mem_flags, map_flags,
|
||||
true, NVK_MEM_ARENA_MAX_SIZE);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
memset(heap, 0, sizeof(*heap));
|
||||
|
||||
heap->mem_flags = mem_flags;
|
||||
if (map_flags)
|
||||
heap->mem_flags |= NVKMD_MEM_CAN_MAP;
|
||||
heap->map_flags = map_flags;
|
||||
assert(overalloc < NVK_MEM_ARENA_MIN_SIZE);
|
||||
heap->overalloc = overalloc;
|
||||
|
||||
if (contiguous) {
|
||||
result = nvkmd_dev_alloc_va(dev->nvkmd, &dev->vk.base,
|
||||
0 /* va_flags */, 0 /* pte_kind */,
|
||||
NVK_HEAP_MAX_SIZE, 0 /* align_B */,
|
||||
0 /* fixed_addr */,
|
||||
&heap->contig_va);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
}
|
||||
|
||||
simple_mtx_init(&heap->mutex, mtx_plain);
|
||||
util_vma_heap_init(&heap->heap, 0, 0);
|
||||
|
||||
heap->total_size = 0;
|
||||
heap->mem_count = 0;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
nvk_heap_finish(struct nvk_device *dev, struct nvk_heap *heap)
|
||||
{
|
||||
/* Freeing the VA will unbind all the memory */
|
||||
if (heap->contig_va)
|
||||
nvkmd_va_free(heap->contig_va);
|
||||
|
||||
for (uint32_t mem_idx = 0; mem_idx < heap->mem_count; mem_idx++)
|
||||
nvkmd_mem_unref(heap->mem[mem_idx].mem);
|
||||
|
||||
util_vma_heap_finish(&heap->heap);
|
||||
simple_mtx_destroy(&heap->mutex);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
encode_vma(uint32_t mem_idx, uint64_t mem_offset)
|
||||
{
|
||||
assert(mem_idx < UINT16_MAX - 1);
|
||||
assert(mem_offset < (1ull << 48));
|
||||
return ((uint64_t)(mem_idx + 1) << 48) | mem_offset;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
vma_mem_idx(uint64_t offset)
|
||||
{
|
||||
offset = offset >> 48;
|
||||
assert(offset > 0);
|
||||
return offset - 1;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
vma_mem_offset(uint64_t offset)
|
||||
{
|
||||
return offset & BITFIELD64_MASK(48);
|
||||
nvk_mem_arena_finish(dev, &heap->arena);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
nvk_heap_grow_locked(struct nvk_device *dev, struct nvk_heap *heap)
|
||||
{
|
||||
const bool is_first_grow = nvk_mem_arena_size_B(&heap->arena) == 0;
|
||||
VkResult result;
|
||||
|
||||
if (heap->mem_count >= NVK_HEAP_MAX_BO_COUNT) {
|
||||
return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||
"Heap has already hit its maximum size");
|
||||
}
|
||||
|
||||
/* First two BOs are MIN_SIZE, double after that */
|
||||
const uint64_t new_mem_size =
|
||||
NVK_HEAP_MIN_SIZE << (MAX2(heap->mem_count, 1) - 1);
|
||||
|
||||
struct nvkmd_mem *mem;
|
||||
if (heap->map_flags) {
|
||||
result = nvkmd_dev_alloc_mapped_mem(dev->nvkmd, &dev->vk.base,
|
||||
new_mem_size, 0, heap->mem_flags,
|
||||
heap->map_flags, &mem);
|
||||
} else {
|
||||
result = nvkmd_dev_alloc_mem(dev->nvkmd, &dev->vk.base,
|
||||
new_mem_size, 0, heap->mem_flags, &mem);
|
||||
}
|
||||
uint64_t addr, mem_size_B;
|
||||
result = nvk_mem_arena_grow_locked(dev, &heap->arena, &addr, &mem_size_B);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
assert(mem->size_B == new_mem_size);
|
||||
|
||||
uint64_t addr;
|
||||
if (heap->contig_va != NULL) {
|
||||
result = nvkmd_va_bind_mem(heap->contig_va, &dev->vk.base,
|
||||
heap->total_size, mem, 0, new_mem_size);
|
||||
if (result != VK_SUCCESS) {
|
||||
nvkmd_mem_unref(mem);
|
||||
return result;
|
||||
}
|
||||
addr = heap->contig_va->addr + heap->total_size;
|
||||
|
||||
/* For contiguous heaps, we can now free the padding from the previous
|
||||
* BO because the BO we just added will provide the needed padding. For
|
||||
* non-contiguous heaps, we have to leave each BO padded individually.
|
||||
*/
|
||||
if (heap->mem_count > 0) {
|
||||
struct nvkmd_mem *prev_mem = heap->mem[heap->mem_count - 1].mem;
|
||||
assert(heap->overalloc < prev_mem->size_B);
|
||||
const uint64_t pad_vma =
|
||||
encode_vma(heap->mem_count - 1, prev_mem->size_B - heap->overalloc);
|
||||
util_vma_heap_free(&heap->heap, pad_vma, heap->overalloc);
|
||||
}
|
||||
if (nvk_mem_arena_is_contiguous(&heap->arena) && !is_first_grow) {
|
||||
util_vma_heap_free(&heap->heap, addr - heap->overalloc, mem_size_B);
|
||||
} else {
|
||||
addr = mem->va->addr;
|
||||
util_vma_heap_free(&heap->heap, addr, mem_size_B - heap->overalloc);
|
||||
}
|
||||
|
||||
uint64_t vma = encode_vma(heap->mem_count, 0);
|
||||
assert(heap->overalloc < new_mem_size);
|
||||
util_vma_heap_free(&heap->heap, vma, new_mem_size - heap->overalloc);
|
||||
|
||||
heap->mem[heap->mem_count++] = (struct nvk_heap_mem) {
|
||||
.mem = mem,
|
||||
.addr = addr,
|
||||
};
|
||||
heap->total_size += new_mem_size;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
@ -155,20 +63,20 @@ nvk_heap_alloc_locked(struct nvk_device *dev, struct nvk_heap *heap,
|
|||
uint64_t size, uint32_t alignment,
|
||||
uint64_t *addr_out, void **map_out)
|
||||
{
|
||||
/* Make sure we follow the restrictions in nvk_mem_arena_map(). */
|
||||
if (map_out != NULL && nvk_mem_arena_is_mapped(&heap->arena)) {
|
||||
assert(size <= NVK_MEM_ARENA_MIN_SIZE);
|
||||
alignment = MAX2(alignment, util_next_power_of_two(size));
|
||||
}
|
||||
|
||||
while (1) {
|
||||
uint64_t vma = util_vma_heap_alloc(&heap->heap, size, alignment);
|
||||
if (vma != 0) {
|
||||
uint32_t mem_idx = vma_mem_idx(vma);
|
||||
uint64_t mem_offset = vma_mem_offset(vma);
|
||||
uint64_t addr = util_vma_heap_alloc(&heap->heap, size, alignment);
|
||||
if (addr != 0) {
|
||||
*addr_out = addr;
|
||||
|
||||
assert(mem_idx < heap->mem_count);
|
||||
assert(heap->mem[mem_idx].mem != NULL);
|
||||
assert(mem_offset + size <= heap->mem[mem_idx].mem->size_B);
|
||||
|
||||
*addr_out = heap->mem[mem_idx].addr + mem_offset;
|
||||
if (map_out != NULL) {
|
||||
if (heap->mem[mem_idx].mem->map != NULL)
|
||||
*map_out = (char *)heap->mem[mem_idx].mem->map + mem_offset;
|
||||
if (nvk_mem_arena_is_mapped(&heap->arena))
|
||||
*map_out = nvk_mem_arena_map(&heap->arena, addr, size);
|
||||
else
|
||||
*map_out = NULL;
|
||||
}
|
||||
|
|
@ -187,22 +95,7 @@ nvk_heap_free_locked(struct nvk_device *dev, struct nvk_heap *heap,
|
|||
uint64_t addr, uint64_t size)
|
||||
{
|
||||
assert(addr + size > addr);
|
||||
|
||||
for (uint32_t mem_idx = 0; mem_idx < heap->mem_count; mem_idx++) {
|
||||
if (addr < heap->mem[mem_idx].addr)
|
||||
continue;
|
||||
|
||||
uint64_t mem_offset = addr - heap->mem[mem_idx].addr;
|
||||
if (mem_offset >= heap->mem[mem_idx].mem->size_B)
|
||||
continue;
|
||||
|
||||
assert(mem_offset + size <= heap->mem[mem_idx].mem->size_B);
|
||||
uint64_t vma = encode_vma(mem_idx, mem_offset);
|
||||
|
||||
util_vma_heap_free(&heap->heap, vma, size);
|
||||
return;
|
||||
}
|
||||
assert(!"Failed to find heap BO");
|
||||
util_vma_heap_free(&heap->heap, addr, size);
|
||||
}
|
||||
|
||||
VkResult
|
||||
|
|
@ -210,10 +103,10 @@ nvk_heap_alloc(struct nvk_device *dev, struct nvk_heap *heap,
|
|||
uint64_t size, uint32_t alignment,
|
||||
uint64_t *addr_out, void **map_out)
|
||||
{
|
||||
simple_mtx_lock(&heap->mutex);
|
||||
simple_mtx_lock(&heap->arena.mutex);
|
||||
VkResult result = nvk_heap_alloc_locked(dev, heap, size, alignment,
|
||||
addr_out, map_out);
|
||||
simple_mtx_unlock(&heap->mutex);
|
||||
simple_mtx_unlock(&heap->arena.mutex);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
@ -223,18 +116,14 @@ nvk_heap_upload(struct nvk_device *dev, struct nvk_heap *heap,
|
|||
const void *data, size_t size, uint32_t alignment,
|
||||
uint64_t *addr_out)
|
||||
{
|
||||
simple_mtx_lock(&heap->mutex);
|
||||
void *map = NULL;
|
||||
VkResult result = nvk_heap_alloc_locked(dev, heap, size, alignment,
|
||||
addr_out, &map);
|
||||
simple_mtx_unlock(&heap->mutex);
|
||||
VkResult result;
|
||||
|
||||
result = nvk_heap_alloc(dev, heap, size, alignment, addr_out, NULL);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
if (map != NULL && (heap->map_flags & NVKMD_MEM_MAP_WR)) {
|
||||
/* If we have a map, copy directly with memcpy */
|
||||
memcpy(map, data, size);
|
||||
if (heap->arena.map_flags & NVKMD_MEM_MAP_WR) {
|
||||
nvk_mem_arena_copy_to_gpu(&heap->arena, *addr_out, data, size);
|
||||
} else {
|
||||
/* Otherwise, kick off an upload with the upload queue.
|
||||
*
|
||||
|
|
@ -269,7 +158,7 @@ void
|
|||
nvk_heap_free(struct nvk_device *dev, struct nvk_heap *heap,
|
||||
uint64_t addr, uint64_t size)
|
||||
{
|
||||
simple_mtx_lock(&heap->mutex);
|
||||
simple_mtx_lock(&heap->arena.mutex);
|
||||
nvk_heap_free_locked(dev, heap, addr, size);
|
||||
simple_mtx_unlock(&heap->mutex);
|
||||
simple_mtx_unlock(&heap->arena.mutex);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,41 +5,16 @@
|
|||
#ifndef NVK_HEAP_H
|
||||
#define NVK_HEAP_H 1
|
||||
|
||||
#include "nvk_private.h"
|
||||
#include "nvk_mem_arena.h"
|
||||
|
||||
#include "util/simple_mtx.h"
|
||||
#include "util/vma.h"
|
||||
#include "nvkmd/nvkmd.h"
|
||||
|
||||
struct nvk_device;
|
||||
|
||||
#define NVK_HEAP_MIN_SIZE_LOG2 16
|
||||
#define NVK_HEAP_MAX_SIZE_LOG2 32
|
||||
#define NVK_HEAP_MIN_SIZE (1ull << NVK_HEAP_MIN_SIZE_LOG2)
|
||||
#define NVK_HEAP_MAX_SIZE (1ull << NVK_HEAP_MAX_SIZE_LOG2)
|
||||
#define NVK_HEAP_MAX_BO_COUNT (NVK_HEAP_MAX_SIZE_LOG2 - \
|
||||
NVK_HEAP_MIN_SIZE_LOG2 + 1)
|
||||
|
||||
struct nvk_heap_mem {
|
||||
struct nvkmd_mem *mem;
|
||||
uint64_t addr;
|
||||
};
|
||||
|
||||
struct nvk_heap {
|
||||
enum nvkmd_mem_flags mem_flags;
|
||||
enum nvkmd_mem_map_flags map_flags;
|
||||
struct nvk_mem_arena arena;
|
||||
uint32_t overalloc;
|
||||
|
||||
simple_mtx_t mutex;
|
||||
struct util_vma_heap heap;
|
||||
|
||||
/* VA for contiguous heaps, NULL otherwise */
|
||||
struct nvkmd_va *contig_va;
|
||||
|
||||
uint64_t total_size;
|
||||
|
||||
uint32_t mem_count;
|
||||
struct nvk_heap_mem mem[NVK_HEAP_MAX_BO_COUNT];
|
||||
};
|
||||
|
||||
VkResult nvk_heap_init(struct nvk_device *dev, struct nvk_heap *heap,
|
||||
|
|
@ -63,8 +38,7 @@ void nvk_heap_free(struct nvk_device *dev, struct nvk_heap *heap,
|
|||
static inline uint64_t
|
||||
nvk_heap_contiguous_base_address(struct nvk_heap *heap)
|
||||
{
|
||||
assert(heap->contig_va != NULL);
|
||||
return heap->contig_va->addr;
|
||||
return nvk_contiguous_mem_arena_base_address(&heap->arena);
|
||||
}
|
||||
|
||||
#endif /* define NVK_HEAP_H */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue