nvk: Use nvkmd_mem for shader and event heaps

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30033>
This commit is contained in:
Faith Ekstrand 2024-07-03 09:47:41 -05:00 committed by Marge Bot
parent 17623bc8a9
commit d072bea5b7
4 changed files with 97 additions and 101 deletions

View file

@ -192,11 +192,11 @@ nvk_CreateDevice(VkPhysicalDevice physicalDevice,
* Also, the I-cache pre-fetches and we don't really know by how much.
* Over-allocating shader BOs by 4K ensures we don't run past.
*/
enum nouveau_ws_bo_map_flags shader_map_flags = 0;
enum nvkmd_mem_map_flags shader_map_flags = 0;
if (pdev->info.bar_size_B >= pdev->info.vram_size_B)
shader_map_flags = NOUVEAU_WS_BO_WR;
shader_map_flags = NVKMD_MEM_MAP_WR;
result = nvk_heap_init(dev, &dev->shader_heap,
NOUVEAU_WS_BO_LOCAL | NOUVEAU_WS_BO_NO_SHARE,
NVKMD_MEM_LOCAL | NVKMD_MEM_NO_SHARE,
shader_map_flags,
4096 /* overalloc */,
pdev->info.cls_eng3d < VOLTA_A);
@ -204,8 +204,8 @@ nvk_CreateDevice(VkPhysicalDevice physicalDevice,
goto fail_samplers;
result = nvk_heap_init(dev, &dev->event_heap,
NOUVEAU_WS_BO_LOCAL | NOUVEAU_WS_BO_NO_SHARE,
NOUVEAU_WS_BO_WR,
NVKMD_MEM_LOCAL | NVKMD_MEM_NO_SHARE,
NVKMD_MEM_MAP_WR,
0 /* overalloc */, false /* contiguous */);
if (result != VK_SUCCESS)
goto fail_shader_heap;

View file

@ -15,34 +15,35 @@
VkResult
nvk_heap_init(struct nvk_device *dev, struct nvk_heap *heap,
enum nouveau_ws_bo_flags bo_flags,
enum nouveau_ws_bo_map_flags map_flags,
enum nvkmd_mem_flags mem_flags,
enum nvkmd_mem_map_flags map_flags,
uint32_t overalloc, bool contiguous)
{
VkResult result;
memset(heap, 0, sizeof(*heap));
heap->bo_flags = bo_flags;
heap->mem_flags = mem_flags;
if (map_flags)
heap->bo_flags |= NOUVEAU_WS_BO_MAP;
heap->mem_flags |= NVKMD_MEM_CAN_MAP;
heap->map_flags = map_flags;
heap->overalloc = overalloc;
if (contiguous) {
heap->base_addr = nouveau_ws_alloc_vma(dev->ws_dev, 0,
NVK_HEAP_MAX_SIZE,
0, false /* bda */,
false /* sparse */);
if (heap->base_addr == 0) {
return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
"Failed to allocate VMA for heap");
}
result = nvkmd_dev_alloc_va(dev->nvkmd, &dev->vk.base,
0 /* va_flags */, 0 /* pte_kind */,
NVK_HEAP_MAX_SIZE, 0 /* align_B */,
0 /* fixed_addr */,
&heap->contig_va);
if (result != VK_SUCCESS)
return result;
}
simple_mtx_init(&heap->mutex, mtx_plain);
util_vma_heap_init(&heap->heap, 0, 0);
heap->total_size = 0;
heap->bo_count = 0;
heap->mem_count = 0;
return VK_SUCCESS;
}
@ -50,37 +51,27 @@ nvk_heap_init(struct nvk_device *dev, struct nvk_heap *heap,
void
nvk_heap_finish(struct nvk_device *dev, struct nvk_heap *heap)
{
for (uint32_t bo_idx = 0; bo_idx < heap->bo_count; bo_idx++) {
if (heap->base_addr != 0) {
nouveau_ws_bo_unbind_vma(dev->ws_dev, heap->bos[bo_idx].addr,
heap->bos[bo_idx].bo->size);
}
if (heap->map_flags) {
assert(heap->bos[bo_idx].map);
nouveau_ws_bo_unmap(heap->bos[bo_idx].bo, heap->bos[bo_idx].map);
}
nouveau_ws_bo_destroy(heap->bos[bo_idx].bo);
}
/* Freeing the VA will unbind all the memory */
if (heap->contig_va)
nvkmd_va_free(heap->contig_va);
for (uint32_t mem_idx = 0; mem_idx < heap->mem_count; mem_idx++)
nvkmd_mem_unref(heap->mem[mem_idx].mem);
util_vma_heap_finish(&heap->heap);
simple_mtx_destroy(&heap->mutex);
if (heap->base_addr != 0) {
nouveau_ws_free_vma(dev->ws_dev, heap->base_addr, NVK_HEAP_MAX_SIZE,
false /* bda */, false /* sparse */);
}
}
static uint64_t
encode_vma(uint32_t bo_idx, uint64_t bo_offset)
encode_vma(uint32_t mem_idx, uint64_t mem_offset)
{
assert(bo_idx < UINT16_MAX - 1);
assert(bo_offset < (1ull << 48));
return ((uint64_t)(bo_idx + 1) << 48) | bo_offset;
assert(mem_idx < UINT16_MAX - 1);
assert(mem_offset < (1ull << 48));
return ((uint64_t)(mem_idx + 1) << 48) | mem_offset;
}
static uint32_t
vma_bo_idx(uint64_t offset)
vma_mem_idx(uint64_t offset)
{
offset = offset >> 48;
assert(offset > 0);
@ -88,7 +79,7 @@ vma_bo_idx(uint64_t offset)
}
static uint64_t
vma_bo_offset(uint64_t offset)
vma_mem_offset(uint64_t offset)
{
return offset & BITFIELD64_MASK(48);
}
@ -96,60 +87,65 @@ vma_bo_offset(uint64_t offset)
static VkResult
nvk_heap_grow_locked(struct nvk_device *dev, struct nvk_heap *heap)
{
if (heap->bo_count >= NVK_HEAP_MAX_BO_COUNT) {
VkResult result;
if (heap->mem_count >= NVK_HEAP_MAX_BO_COUNT) {
return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
"Heap has already hit its maximum size");
}
/* First two BOs are MIN_SIZE, double after that */
const uint64_t new_bo_size =
NVK_HEAP_MIN_SIZE << (MAX2(heap->bo_count, 1) - 1);
const uint64_t new_mem_size =
NVK_HEAP_MIN_SIZE << (MAX2(heap->mem_count, 1) - 1);
struct nouveau_ws_bo *bo =
nouveau_ws_bo_new(dev->ws_dev, new_bo_size, 0, heap->bo_flags);
if (bo == NULL) {
return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
"Failed to allocate a heap BO: %m");
}
void *map = NULL;
struct nvkmd_mem *mem;
if (heap->map_flags) {
map = nouveau_ws_bo_map(bo, heap->map_flags, NULL);
if (map == NULL) {
nouveau_ws_bo_destroy(bo);
return vk_errorf(dev, VK_ERROR_OUT_OF_HOST_MEMORY,
"Failed to map a heap BO: %m");
}
result = nvkmd_dev_alloc_mapped_mem(dev->nvkmd, &dev->vk.base,
new_mem_size, 0, heap->mem_flags,
heap->map_flags, &mem);
} else {
result = nvkmd_dev_alloc_mem(dev->nvkmd, &dev->vk.base,
new_mem_size, 0, heap->mem_flags, &mem);
}
if (result != VK_SUCCESS)
return result;
uint64_t addr = bo->offset;
if (heap->base_addr != 0) {
addr = heap->base_addr + heap->total_size;
nouveau_ws_bo_bind_vma(dev->ws_dev, bo, addr, new_bo_size, 0, 0);
assert(mem->size_B == new_mem_size);
uint64_t addr;
if (heap->contig_va != NULL) {
result = nvkmd_va_bind_mem(heap->contig_va, &dev->vk.base,
heap->total_size, mem, 0, new_mem_size);
if (result != VK_SUCCESS) {
nvkmd_mem_unref(mem);
return result;
}
addr = heap->contig_va->addr + heap->total_size;
/* For contiguous heaps, we can now free the padding from the previous
* BO because the BO we just added will provide the needed padding. For
* non-contiguous heaps, we have to leave each BO padded individually.
*/
if (heap->bo_count > 0) {
struct nouveau_ws_bo *prev_bo = heap->bos[heap->bo_count - 1].bo;
assert(heap->overalloc < prev_bo->size);
if (heap->mem_count > 0) {
struct nvkmd_mem *prev_mem = heap->mem[heap->mem_count - 1].mem;
assert(heap->overalloc < prev_mem->size_B);
const uint64_t pad_vma =
encode_vma(heap->bo_count - 1, prev_bo->size - heap->overalloc);
encode_vma(heap->mem_count - 1, prev_mem->size_B - heap->overalloc);
util_vma_heap_free(&heap->heap, pad_vma, heap->overalloc);
}
} else {
addr = mem->va->addr;
}
uint64_t vma = encode_vma(heap->bo_count, 0);
assert(heap->overalloc < new_bo_size);
util_vma_heap_free(&heap->heap, vma, new_bo_size - heap->overalloc);
uint64_t vma = encode_vma(heap->mem_count, 0);
assert(heap->overalloc < new_mem_size);
util_vma_heap_free(&heap->heap, vma, new_mem_size - heap->overalloc);
heap->bos[heap->bo_count++] = (struct nvk_heap_bo) {
.bo = bo,
.map = map,
heap->mem[heap->mem_count++] = (struct nvk_heap_mem) {
.mem = mem,
.addr = addr,
};
heap->total_size += new_bo_size;
heap->total_size += new_mem_size;
return VK_SUCCESS;
}
@ -162,17 +158,17 @@ nvk_heap_alloc_locked(struct nvk_device *dev, struct nvk_heap *heap,
while (1) {
uint64_t vma = util_vma_heap_alloc(&heap->heap, size, alignment);
if (vma != 0) {
uint32_t bo_idx = vma_bo_idx(vma);
uint64_t bo_offset = vma_bo_offset(vma);
uint32_t mem_idx = vma_mem_idx(vma);
uint64_t mem_offset = vma_mem_offset(vma);
assert(bo_idx < heap->bo_count);
assert(heap->bos[bo_idx].bo != NULL);
assert(bo_offset + size <= heap->bos[bo_idx].bo->size);
assert(mem_idx < heap->mem_count);
assert(heap->mem[mem_idx].mem != NULL);
assert(mem_offset + size <= heap->mem[mem_idx].mem->size_B);
*addr_out = heap->bos[bo_idx].addr + bo_offset;
*addr_out = heap->mem[mem_idx].addr + mem_offset;
if (map_out != NULL) {
if (heap->bos[bo_idx].map != NULL)
*map_out = (char *)heap->bos[bo_idx].map + bo_offset;
if (heap->mem[mem_idx].mem->map != NULL)
*map_out = (char *)heap->mem[mem_idx].mem->map + mem_offset;
else
*map_out = NULL;
}
@ -192,16 +188,16 @@ nvk_heap_free_locked(struct nvk_device *dev, struct nvk_heap *heap,
{
assert(addr + size > addr);
for (uint32_t bo_idx = 0; bo_idx < heap->bo_count; bo_idx++) {
if (addr < heap->bos[bo_idx].addr)
for (uint32_t mem_idx = 0; mem_idx < heap->mem_count; mem_idx++) {
if (addr < heap->mem[mem_idx].addr)
continue;
uint64_t bo_offset = addr - heap->bos[bo_idx].addr;
if (bo_offset >= heap->bos[bo_idx].bo->size)
uint64_t mem_offset = addr - heap->mem[mem_idx].addr;
if (mem_offset >= heap->mem[mem_idx].mem->size_B)
continue;
assert(bo_offset + size <= heap->bos[bo_idx].bo->size);
uint64_t vma = encode_vma(bo_idx, bo_offset);
assert(mem_offset + size <= heap->mem[mem_idx].mem->size_B);
uint64_t vma = encode_vma(mem_idx, mem_offset);
util_vma_heap_free(&heap->heap, vma, size);
return;
@ -236,7 +232,7 @@ nvk_heap_upload(struct nvk_device *dev, struct nvk_heap *heap,
if (result != VK_SUCCESS)
return result;
if (map != NULL && (heap->map_flags & NOUVEAU_WS_BO_WR)) {
if (map != NULL && (heap->map_flags & NVKMD_MEM_MAP_WR)) {
/* If we have a map, copy directly with memcpy */
memcpy(map, data, size);
} else {

View file

@ -7,9 +7,9 @@
#include "nvk_private.h"
#include "nouveau_bo.h"
#include "util/simple_mtx.h"
#include "util/vma.h"
#include "nvkmd/nvkmd.h"
struct nvk_device;
@ -20,32 +20,31 @@ struct nvk_device;
#define NVK_HEAP_MAX_BO_COUNT (NVK_HEAP_MAX_SIZE_LOG2 - \
NVK_HEAP_MIN_SIZE_LOG2 + 1)
struct nvk_heap_bo {
struct nouveau_ws_bo *bo;
void *map;
struct nvk_heap_mem {
struct nvkmd_mem *mem;
uint64_t addr;
};
struct nvk_heap {
enum nouveau_ws_bo_flags bo_flags;
enum nouveau_ws_bo_map_flags map_flags;
enum nvkmd_mem_flags mem_flags;
enum nvkmd_mem_map_flags map_flags;
uint32_t overalloc;
simple_mtx_t mutex;
struct util_vma_heap heap;
/* Base address for contiguous heaps, 0 otherwise */
uint64_t base_addr;
/* VA for contiguous heaps, NULL otherwise */
struct nvkmd_va *contig_va;
uint64_t total_size;
uint32_t bo_count;
struct nvk_heap_bo bos[NVK_HEAP_MAX_BO_COUNT];
uint32_t mem_count;
struct nvk_heap_mem mem[NVK_HEAP_MAX_BO_COUNT];
};
VkResult nvk_heap_init(struct nvk_device *dev, struct nvk_heap *heap,
enum nouveau_ws_bo_flags bo_flags,
enum nouveau_ws_bo_map_flags map_flags,
enum nvkmd_mem_flags mem_flags,
enum nvkmd_mem_map_flags map_flags,
uint32_t overalloc, bool contiguous);
void nvk_heap_finish(struct nvk_device *dev, struct nvk_heap *heap);
@ -64,8 +63,8 @@ void nvk_heap_free(struct nvk_device *dev, struct nvk_heap *heap,
static inline uint64_t
nvk_heap_contiguous_base_address(struct nvk_heap *heap)
{
assert(heap->base_addr != 0);
return heap->base_addr;
assert(heap->contig_va != NULL);
return heap->contig_va->addr;
}
#endif /* define NVK_HEAP_H */

View file

@ -11,6 +11,7 @@
#include <xf86drm.h>
#include "nouveau_context.h"
#include "nouveau_device.h"
#include "drm-uapi/nouveau_drm.h"
#include "nv_push.h"