nvk: Add support for contiguous heaps to nvk_heap

This is required for shader heaps pre-Volta because they use a single 64-bit base address and 32-bit offsets for individual shaders. In this case, the addresses returned from the heap are relative to the base. The BO for the heap can be retrieved via nvk_heap_get_contiguous_bo_ref which returns an actual reference so it's safe to call from the queue submit path. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24326>
2025-12-27 04:00:10 +01:00 · 2023-01-30 20:12:06 -06:00 · 2023-01-30 20:12:06 -06:00 · 55be10648d
commit 55be10648d
parent dc8fd9050c
3 changed files with 141 additions and 30 deletions
--- a/src/nouveau/vulkan/nvk_device.c
+++ b/src/nouveau/vulkan/nvk_device.c
@ -180,13 +180,13 @@ nvk_CreateDevice(VkPhysicalDevice physicalDevice,
    */
   result = nvk_heap_init(device, &device->shader_heap,
                          NOUVEAU_WS_BO_LOCAL, NOUVEAU_WS_BO_WR,
-                          4096 /* overalloc */);
+                          4096 /* overalloc */, false /* contiguous */);
   if (result != VK_SUCCESS)
      goto fail_samplers;

   result = nvk_heap_init(device, &device->event_heap,
                          NOUVEAU_WS_BO_LOCAL, NOUVEAU_WS_BO_WR,
-                          0 /* overalloc */);
+                          0 /* overalloc */, false /* contiguous */);
   if (result != VK_SUCCESS)
      goto fail_shader_heap;

--- a/src/nouveau/vulkan/nvk_heap.c
+++ b/src/nouveau/vulkan/nvk_heap.c
@ -13,7 +13,7 @@ VkResult
 nvk_heap_init(struct nvk_device *device, struct nvk_heap *heap,
              enum nouveau_ws_bo_flags bo_flags,
              enum nouveau_ws_bo_map_flags map_flags,
-              uint32_t overalloc)
+              uint32_t overalloc, bool contiguous)
 {
   memset(heap, 0, sizeof(*heap));

@ -22,6 +22,7 @@ nvk_heap_init(struct nvk_device *device, struct nvk_heap *heap,
      heap->bo_flags |= NOUVEAU_WS_BO_MAP;
   heap->map_flags = map_flags;
   heap->overalloc = overalloc;
+   heap->contiguous = contiguous;

   simple_mtx_init(&heap->mutex, mtx_plain);
   util_vma_heap_init(&heap->heap, 0, 0);
@ -69,32 +70,115 @@ vma_bo_offset(uint64_t offset)
 static VkResult
 nvk_heap_grow_locked(struct nvk_device *dev, struct nvk_heap *heap)
 {
-   if (heap->bo_count >= NVK_HEAP_MAX_BO_COUNT) {
-      return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
-                       "Heap has already hit its maximum size");
+   VkResult result;
+
+   if (heap->contiguous) {
+      if (heap->total_size >= NVK_HEAP_MAX_SIZE) {
+         return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
+                          "Heap has already hit its maximum size");
+      }
+
+      const uint64_t new_bo_size =
+         MAX2(heap->total_size * 2, NVK_HEAP_MIN_SIZE);
+
+      void *new_bo_map;
+      struct nouveau_ws_bo *new_bo =
+         nouveau_ws_bo_new_mapped(dev->pdev->dev,
+                                  new_bo_size + heap->overalloc, 0,
+                                  heap->bo_flags, heap->map_flags,
+                                  &new_bo_map);
+      if (new_bo == NULL) {
+         return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
+                          "Failed to allocate a heap BO: %m");
+      }
+
+      if (heap->bo_count > 0) {
+         assert(heap->bo_count == 1);
+         struct nouveau_ws_bo *old_bo = heap->bos[0].bo;
+
+         uint32_t push_dw[10];
+         struct nv_push push;
+         nv_push_init(&push, push_dw, ARRAY_SIZE(push_dw));
+         struct nv_push *p = &push;
+
+         P_MTHD(p, NV90B5, OFFSET_IN_UPPER);
+         P_NV90B5_OFFSET_IN_UPPER(p, old_bo->offset >> 32);
+         P_NV90B5_OFFSET_IN_LOWER(p, old_bo->offset & 0xffffffff);
+         P_NV90B5_OFFSET_OUT_UPPER(p, new_bo->offset >> 32);
+         P_NV90B5_OFFSET_OUT_LOWER(p, new_bo->offset & 0xffffffff);
+
+         assert(util_is_power_of_two_nonzero(heap->total_size));
+         assert(heap->total_size >= NVK_HEAP_MIN_SIZE);
+         assert(heap->total_size <= old_bo->size);
+         assert(heap->total_size < new_bo_size);
+
+         unsigned line_bytes = MIN2(heap->total_size, 1 << 17);
+         assert(heap->total_size % line_bytes == 0);
+
+         P_MTHD(p, NV90B5, LINE_LENGTH_IN);
+         P_NV90B5_LINE_LENGTH_IN(p, line_bytes);
+         P_NV90B5_LINE_COUNT(p, heap->total_size / line_bytes);
+
+         P_IMMD(p, NV90B5, LAUNCH_DMA, {
+            .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
+            .multi_line_enable = MULTI_LINE_ENABLE_TRUE,
+            .flush_enable = FLUSH_ENABLE_TRUE,
+            .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
+            .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
+         });
+
+         struct nouveau_ws_bo *push_bos[] = { new_bo, old_bo, };
+         result = nvk_queue_submit_simple(&dev->queue,
+                                          nv_push_dw_count(&push), push_dw,
+                                          ARRAY_SIZE(push_bos), push_bos,
+                                          true /* sync */);
+         if (result != VK_SUCCESS) {
+            nouveau_ws_bo_unmap(new_bo, new_bo_map);
+            nouveau_ws_bo_destroy(new_bo);
+            return result;
+         }
+
+         nouveau_ws_bo_unmap(heap->bos[0].bo, heap->bos[0].map);
+         nouveau_ws_bo_destroy(heap->bos[0].bo);
+      }
+
+      uint64_t vma = encode_vma(0, heap->total_size);
+      util_vma_heap_free(&heap->heap, vma, new_bo_size - heap->total_size);
+
+      heap->total_size = new_bo_size;
+      heap->bo_count = 1;
+      heap->bos[0].bo = new_bo;
+      heap->bos[0].map = new_bo_map;
+
+      return VK_SUCCESS;
+   } else {
+      if (heap->bo_count >= NVK_HEAP_MAX_BO_COUNT) {
+         return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
+                          "Heap has already hit its maximum size");
+      }
+
+      /* First two BOs are MIN_SIZE, double after that */
+      const uint64_t new_bo_size =
+         NVK_HEAP_MIN_SIZE << (MAX2(heap->bo_count, 1) - 1);
+
+      heap->bos[heap->bo_count].bo =
+         nouveau_ws_bo_new_mapped(dev->pdev->dev,
+                                  new_bo_size + heap->overalloc, 0,
+                                  heap->bo_flags, heap->map_flags,
+                                  &heap->bos[heap->bo_count].map);
+      if (heap->bos[heap->bo_count].bo == NULL) {
+         return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
+                          "Failed to allocate a heap BO: %m");
+      }
+
+      uint64_t vma = encode_vma(heap->bo_count, 0);
+      util_vma_heap_free(&heap->heap, vma, new_bo_size);
+
+      heap->total_size += new_bo_size;
+      heap->bo_count++;
+
+      return VK_SUCCESS;
   }
-
-   /* First two BOs are MIN_SIZE, double after that */
-   const uint64_t new_bo_size =
-      NVK_HEAP_MIN_SIZE << (MAX2(heap->bo_count, 1) - 1);
-
-   heap->bos[heap->bo_count].bo =
-      nouveau_ws_bo_new_mapped(dev->pdev->dev,
-                               new_bo_size + heap->overalloc, 0,
-                               heap->bo_flags, heap->map_flags,
-                               &heap->bos[heap->bo_count].map);
-   if (heap->bos[heap->bo_count].bo == NULL) {
-      return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
-                       "Failed to allocate a heap BO: %m");
-   }
-
-   uint64_t vma = encode_vma(heap->bo_count, 0);
-   util_vma_heap_free(&heap->heap, vma, new_bo_size);
-
-   heap->total_size += new_bo_size;
-   heap->bo_count++;
-
-   return VK_SUCCESS;
 }

 static VkResult
@ -113,7 +197,12 @@ nvk_heap_alloc_locked(struct nvk_device *dev, struct nvk_heap *heap,
         assert(bo_offset + size + heap->overalloc <=
                heap->bos[bo_idx].bo->size);

-         *addr_out = heap->bos[bo_idx].bo->offset + bo_offset;
+         if (heap->contiguous) {
+            assert(bo_idx == 0);
+            *addr_out = bo_offset;
+         } else {
+            *addr_out = heap->bos[bo_idx].bo->offset + bo_offset;
+         }
         *map_out = (char *)heap->bos[bo_idx].map + bo_offset;

         return VK_SUCCESS;
@ -153,6 +242,12 @@ nvk_heap_alloc(struct nvk_device *dev, struct nvk_heap *heap,
               uint64_t size, uint32_t alignment,
               uint64_t *addr_out, void **map_out)
 {
+   /* We can't return maps from contiguous heaps because the the map may go
+    * away at any time when the lock isn't taken and we don't want to trust
+    * the caller with racy maps.
+    */
+   assert(!heap->contiguous);
+
   simple_mtx_lock(&heap->mutex);
   VkResult result = nvk_heap_alloc_locked(dev, heap, size, alignment,
                                           addr_out, map_out);
--- a/src/nouveau/vulkan/nvk_heap.h
+++ b/src/nouveau/vulkan/nvk_heap.h
@ -25,6 +25,7 @@ struct nvk_heap {
   enum nouveau_ws_bo_flags bo_flags;
   enum nouveau_ws_bo_map_flags map_flags;
   uint32_t overalloc;
+   bool contiguous;

   simple_mtx_t mutex;
   struct util_vma_heap heap;
@ -38,7 +39,7 @@ struct nvk_heap {
 VkResult nvk_heap_init(struct nvk_device *dev, struct nvk_heap *heap,
                       enum nouveau_ws_bo_flags bo_flags,
                       enum nouveau_ws_bo_map_flags map_flags,
-                       uint32_t overalloc);
+                       uint32_t overalloc, bool contiguous);

 void nvk_heap_finish(struct nvk_device *dev, struct nvk_heap *heap);

@ -53,4 +54,19 @@ VkResult nvk_heap_upload(struct nvk_device *dev, struct nvk_heap *heap,
 void nvk_heap_free(struct nvk_device *dev, struct nvk_heap *heap,
                   uint64_t addr, uint64_t size);

+static inline struct nouveau_ws_bo *
+nvk_heap_get_contiguous_bo_ref(struct nvk_heap *heap)
+{
+   assert(heap->contiguous);
+   assert(heap->bo_count <= 1);
+
+   simple_mtx_lock(&heap->mutex);
+   struct nouveau_ws_bo *bo = heap->bos[0].bo;
+   if (bo)
+      nouveau_ws_bo_ref(bo);
+   simple_mtx_unlock(&heap->mutex);
+
+   return bo;
+}
+
 #endif /* define NVK_HEAP_H */