nvk: Use an nvk_arena for nvk_descriptor_table

This gives descriptors a consistent base address and lets us stop doing the memory reference counting dance for queue state management. We still need to track sizes so that the GPU doesn't read outside the arena but that's a lot simpler. Reviewed-by: Mel Henning <mhenning@darkrefraction.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35327>
2026-05-05 22:38:05 +02:00 · 2025-06-03 15:42:45 -04:00 · 2025-06-03 15:42:45 -04:00 · 6b2331d5f7
commit 6b2331d5f7
parent ec148e9cdc
4 changed files with 54 additions and 79 deletions
--- a/src/nouveau/vulkan/nvk_descriptor_table.c
+++ b/src/nouveau/vulkan/nvk_descriptor_table.c
@ -14,27 +14,16 @@ nvk_descriptor_table_grow_locked(struct nvk_device *dev,
                                 struct nvk_descriptor_table *table,
                                 uint32_t new_alloc)
 {
-   struct nvkmd_mem *new_mem;
   BITSET_WORD *new_in_use;
   uint32_t *new_free_table;
-   VkResult result;

-   assert(new_alloc > table->alloc && new_alloc <= table->max_alloc);
-
-   const uint32_t new_mem_size = new_alloc * table->desc_size;
-   result = nvkmd_dev_alloc_mapped_mem(dev->nvkmd, &dev->vk.base,
-                                       new_mem_size, 256,
-                                       NVKMD_MEM_LOCAL, NVKMD_MEM_MAP_WR,
-                                       &new_mem);
-   if (result != VK_SUCCESS)
-      return result;
-
-   if (table->mem) {
-      assert(new_mem_size >= table->mem->size_B);
-      memcpy(new_mem->map, table->mem->map, table->mem->size_B);
-      nvkmd_mem_unref(table->mem);
+   uint32_t new_arena_size_B = new_alloc * table->desc_size;
+   while (nvk_mem_arena_size_B(&table->arena) < new_arena_size_B) {
+      VkResult result = nvk_mem_arena_grow_locked(dev, &table->arena,
+                                                  NULL, NULL);
+      if (result != VK_SUCCESS)
+         return result;
   }
-   table->mem = new_mem;

   assert((table->alloc % BITSET_WORDBITS) == 0);
   assert((new_alloc % BITSET_WORDBITS) == 0);
@ -78,11 +67,15 @@ nvk_descriptor_table_init(struct nvk_device *dev,
   memset(table, 0, sizeof(*table));
   VkResult result;

-   simple_mtx_init(&table->mutex, mtx_plain);
-
   assert(util_is_power_of_two_nonzero(min_descriptor_count));
   assert(util_is_power_of_two_nonzero(max_descriptor_count));

+   result = nvk_mem_arena_init(dev, &table->arena, NVKMD_MEM_LOCAL,
+                               NVKMD_MEM_MAP_WR, true /* contiguous */,
+                               max_descriptor_count * descriptor_size);
+   if (result != VK_SUCCESS)
+      return result;
+
   table->desc_size = descriptor_size;
   table->alloc = 0;
   table->max_alloc = max_descriptor_count;
@ -102,11 +95,9 @@ void
 nvk_descriptor_table_finish(struct nvk_device *dev,
                            struct nvk_descriptor_table *table)
 {
-   if (table->mem != NULL)
-      nvkmd_mem_unref(table->mem);
+   nvk_mem_arena_finish(dev, &table->arena);
   vk_free(&dev->vk.alloc, table->in_use);
   vk_free(&dev->vk.alloc, table->free_table);
-   simple_mtx_destroy(&table->mutex);
 }

 static void *
@ -116,7 +107,9 @@ nvk_descriptor_table_map_locked(struct nvk_descriptor_table *table,
   assert(index < table->alloc);
   assert(BITSET_TEST(table->in_use, index));

-   return (char *)table->mem->map + (index * table->desc_size);
+   uint32_t offset_B = index * table->desc_size;
+   return nvk_contiguous_mem_arena_map_offset(&table->arena, offset_B,
+                                              table->desc_size);
 }

 static void
@ -219,10 +212,10 @@ nvk_descriptor_table_add(struct nvk_device *dev,
                         const void *desc_data, size_t desc_size,
                         uint32_t *index_out)
 {
-   simple_mtx_lock(&table->mutex);
+   simple_mtx_lock(&table->arena.mutex);
   VkResult result = nvk_descriptor_table_add_locked(dev, table, desc_data,
                                                     desc_size, index_out);
-   simple_mtx_unlock(&table->mutex);
+   simple_mtx_unlock(&table->arena.mutex);

   return result;
 }
@ -248,10 +241,10 @@ nvk_descriptor_table_insert(struct nvk_device *dev,
                            uint32_t index,
                            const void *desc_data, size_t desc_size)
 {
-   simple_mtx_lock(&table->mutex);
+   simple_mtx_lock(&table->arena.mutex);
   VkResult result = nvk_descriptor_table_insert_locked(dev, table, index,
                                                        desc_data, desc_size);
-   simple_mtx_unlock(&table->mutex);
+   simple_mtx_unlock(&table->arena.mutex);

   return result;
 }
@ -314,7 +307,7 @@ nvk_descriptor_table_remove(struct nvk_device *dev,
                            struct nvk_descriptor_table *table,
                            uint32_t index)
 {
-   simple_mtx_lock(&table->mutex);
+   simple_mtx_lock(&table->arena.mutex);
   nvk_descriptor_table_remove_locked(dev, table, index);
-   simple_mtx_unlock(&table->mutex);
+   simple_mtx_unlock(&table->arena.mutex);
 }
--- a/src/nouveau/vulkan/nvk_descriptor_table.h
+++ b/src/nouveau/vulkan/nvk_descriptor_table.h
@ -5,16 +5,14 @@
 #ifndef NVK_DESCRIPTOR_TABLE_H
 #define NVK_DESCRIPTOR_TABLE_H 1

-#include "nvk_private.h"
+#include "nvk_mem_arena.h"

 #include "util/bitset.h"
-#include "util/simple_mtx.h"
-#include "nvkmd/nvkmd.h"

 struct nvk_device;

 struct nvk_descriptor_table {
-   simple_mtx_t mutex;
+   struct nvk_mem_arena arena;

   uint32_t desc_size; /**< Size of a descriptor */
   uint32_t alloc; /**< Number of descriptors allocated */
@ -22,8 +20,6 @@ struct nvk_descriptor_table {
   uint32_t next_desc; /**< Next unallocated descriptor */
   uint32_t free_count; /**< Size of free_table */

-   struct nvkmd_mem *mem;
-
   /* Bitset of all descriptors currently in use.  This is the single source
    * of truth for what is and isn't free.  The free_table and next_desc are
    * simply hints to make finding a free descrptor fast.  Every free
@ -59,18 +55,20 @@ void nvk_descriptor_table_remove(struct nvk_device *dev,
                                 struct nvk_descriptor_table *table,
                                 uint32_t index);

-static inline struct nvkmd_mem *
-nvk_descriptor_table_get_mem_ref(struct nvk_descriptor_table *table,
-                                 uint32_t *alloc_count_out)
+static inline uint64_t
+nvk_descriptor_table_base_address(struct nvk_descriptor_table *table)
 {
-   simple_mtx_lock(&table->mutex);
-   struct nvkmd_mem *mem = table->mem;
-   if (mem)
-      nvkmd_mem_ref(mem);
-   *alloc_count_out = table->alloc;
-   simple_mtx_unlock(&table->mutex);
+   return nvk_contiguous_mem_arena_base_address(&table->arena);
+}

-   return mem;
+static inline uint64_t
+nvk_descriptor_table_alloc_count(struct nvk_descriptor_table *table)
+{
+   simple_mtx_lock(&table->arena.mutex);
+   uint32_t alloc = table->alloc;
+   simple_mtx_unlock(&table->arena.mutex);
+
+   return alloc;
 }

 #endif
--- a/src/nouveau/vulkan/nvk_queue.c
+++ b/src/nouveau/vulkan/nvk_queue.c
@ -33,10 +33,6 @@ static void
 nvk_queue_state_finish(struct nvk_device *dev,
                       struct nvk_queue_state *qs)
 {
-   if (qs->images.mem)
-      nvkmd_mem_unref(qs->images.mem);
-   if (qs->samplers.mem)
-      nvkmd_mem_unref(qs->samplers.mem);
   if (qs->slm.mem)
      nvkmd_mem_unref(qs->slm.mem);
 }
@ -51,30 +47,16 @@ nvk_queue_state_update(struct nvk_queue *queue,
   uint32_t alloc_count, bytes_per_warp, bytes_per_tpc;
   bool dirty = false;

-   mem = nvk_descriptor_table_get_mem_ref(&dev->images, &alloc_count);
-   if (qs->images.mem != mem || qs->images.alloc_count != alloc_count) {
-      if (qs->images.mem)
-         nvkmd_mem_unref(qs->images.mem);
-      qs->images.mem = mem;
+   alloc_count = nvk_descriptor_table_alloc_count(&dev->images);
+   if (qs->images.alloc_count != alloc_count) {
      qs->images.alloc_count = alloc_count;
      dirty = true;
-   } else {
-      /* No change */
-      if (mem)
-         nvkmd_mem_unref(mem);
   }

-   mem = nvk_descriptor_table_get_mem_ref(&dev->samplers, &alloc_count);
-   if (qs->samplers.mem != mem || qs->samplers.alloc_count != alloc_count) {
-      if (qs->samplers.mem)
-         nvkmd_mem_unref(qs->samplers.mem);
-      qs->samplers.mem = mem;
+   alloc_count = nvk_descriptor_table_alloc_count(&dev->samplers);
+   if (qs->samplers.alloc_count != alloc_count) {
      qs->samplers.alloc_count = alloc_count;
      dirty = true;
-   } else {
-      /* No change */
-      if (mem)
-         nvkmd_mem_unref(mem);
   }

   mem = nvk_slm_area_get_mem_ref(&dev->slm, &bytes_per_warp, &bytes_per_tpc);
@ -100,11 +82,13 @@ nvk_queue_state_update(struct nvk_queue *queue,
   nv_push_init(&push, push_data, 64);
   struct nv_push *p = &push;

-   if (qs->images.mem) {
+   if (qs->images.alloc_count > 0) {
+      const uint64_t tex_pool_addr =
+         nvk_descriptor_table_base_address(&dev->images);
      if (queue->engines & NVKMD_ENGINE_COMPUTE) {
         P_MTHD(p, NVA0C0, SET_TEX_HEADER_POOL_A);
-         P_NVA0C0_SET_TEX_HEADER_POOL_A(p, qs->images.mem->va->addr >> 32);
-         P_NVA0C0_SET_TEX_HEADER_POOL_B(p, qs->images.mem->va->addr);
+         P_NVA0C0_SET_TEX_HEADER_POOL_A(p, tex_pool_addr >> 32);
+         P_NVA0C0_SET_TEX_HEADER_POOL_B(p, tex_pool_addr);
         P_NVA0C0_SET_TEX_HEADER_POOL_C(p, qs->images.alloc_count - 1);
         P_IMMD(p, NVA0C0, INVALIDATE_TEXTURE_HEADER_CACHE_NO_WFI, {
            .lines = LINES_ALL
@ -113,8 +97,8 @@ nvk_queue_state_update(struct nvk_queue *queue,

      if (queue->engines & NVKMD_ENGINE_3D) {
         P_MTHD(p, NV9097, SET_TEX_HEADER_POOL_A);
-         P_NV9097_SET_TEX_HEADER_POOL_A(p, qs->images.mem->va->addr >> 32);
-         P_NV9097_SET_TEX_HEADER_POOL_B(p, qs->images.mem->va->addr);
+         P_NV9097_SET_TEX_HEADER_POOL_A(p, tex_pool_addr >> 32);
+         P_NV9097_SET_TEX_HEADER_POOL_B(p, tex_pool_addr);
         P_NV9097_SET_TEX_HEADER_POOL_C(p, qs->images.alloc_count - 1);
         P_IMMD(p, NV9097, INVALIDATE_TEXTURE_HEADER_CACHE_NO_WFI, {
            .lines = LINES_ALL
@ -122,11 +106,13 @@ nvk_queue_state_update(struct nvk_queue *queue,
      }
   }

-   if (qs->samplers.mem) {
+   if (qs->samplers.alloc_count > 0) {
+      const uint64_t sampler_pool_addr =
+         nvk_descriptor_table_base_address(&dev->samplers);
      if (queue->engines & NVKMD_ENGINE_COMPUTE) {
         P_MTHD(p, NVA0C0, SET_TEX_SAMPLER_POOL_A);
-         P_NVA0C0_SET_TEX_SAMPLER_POOL_A(p, qs->samplers.mem->va->addr >> 32);
-         P_NVA0C0_SET_TEX_SAMPLER_POOL_B(p, qs->samplers.mem->va->addr);
+         P_NVA0C0_SET_TEX_SAMPLER_POOL_A(p, sampler_pool_addr >> 32);
+         P_NVA0C0_SET_TEX_SAMPLER_POOL_B(p, sampler_pool_addr);
         P_NVA0C0_SET_TEX_SAMPLER_POOL_C(p, qs->samplers.alloc_count - 1);
         P_IMMD(p, NVA0C0, INVALIDATE_SAMPLER_CACHE_NO_WFI, {
            .lines = LINES_ALL
@ -135,8 +121,8 @@ nvk_queue_state_update(struct nvk_queue *queue,

      if (queue->engines & NVKMD_ENGINE_3D) {
         P_MTHD(p, NV9097, SET_TEX_SAMPLER_POOL_A);
-         P_NV9097_SET_TEX_SAMPLER_POOL_A(p, qs->samplers.mem->va->addr >> 32);
-         P_NV9097_SET_TEX_SAMPLER_POOL_B(p, qs->samplers.mem->va->addr);
+         P_NV9097_SET_TEX_SAMPLER_POOL_A(p, sampler_pool_addr >> 32);
+         P_NV9097_SET_TEX_SAMPLER_POOL_B(p, sampler_pool_addr);
         P_NV9097_SET_TEX_SAMPLER_POOL_C(p, qs->samplers.alloc_count - 1);
         P_IMMD(p, NV9097, INVALIDATE_SAMPLER_CACHE_NO_WFI, {
            .lines = LINES_ALL
--- a/src/nouveau/vulkan/nvk_queue.h
+++ b/src/nouveau/vulkan/nvk_queue.h
@ -20,12 +20,10 @@ struct nvkmd_ctx;

 struct nvk_queue_state {
   struct {
-      struct nvkmd_mem *mem;
      uint32_t alloc_count;
   } images;

   struct {
-      struct nvkmd_mem *mem;
      uint32_t alloc_count;
   } samplers;