venus: switch to lazy VkBuffer cache

Instead of creating a static VkBuffer cache at init with hardcoded CreateInfo's, lazily cache VkBuffers that the app requests. Signed-off-by: Juston Li <justonli@google.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21324>
2026-02-26 15:40:30 +01:00 · 2023-02-08 17:01:31 -08:00 · 2023-02-08 17:01:31 -08:00 · ef255444c1
commit ef255444c1
parent 56a9aad401
2 changed files with 85 additions and 177 deletions
--- a/src/virtio/vulkan/vn_buffer.c
+++ b/src/virtio/vulkan/vn_buffer.c
@ -20,142 +20,16 @@

 /* buffer commands */

-/* mandatory buffer create infos to cache */
-static const VkBufferCreateInfo cache_infos[] = {
-   {
-      .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
-      .size = 1,
-      .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
-      .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
-   },
-   {
-      .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
-      .size = 1,
-      .usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
-      .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
-   },
-   {
-      .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
-      .size = 1,
-      .usage =
-         VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
-      .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
-   },
-   {
-      .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
-      .size = 1,
-      .usage =
-         VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
-      .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
-   },
-   {
-      /* mainly for layering clients like angle and zink */
-      .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
-      .size = 1,
-      .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
-               VK_BUFFER_USAGE_TRANSFER_DST_BIT |
-               VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
-               VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
-               VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
-               VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
-               VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
-               VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
-               VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT |
-               VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
-               VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT |
-               VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT |
-               VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT,
-      .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
-   },
-};
-
 static inline bool
-vn_buffer_create_info_can_be_cached(const VkBufferCreateInfo *create_info)
+vn_buffer_create_info_can_be_cached(const VkBufferCreateInfo *create_info,
+                                    struct vn_buffer_cache *cache)
 {
   /* cache only VK_SHARING_MODE_EXCLUSIVE and without pNext for simplicity */
-   return (create_info->pNext == NULL) &&
+   return (create_info->size <= cache->max_buffer_size) &&
+          (create_info->pNext == NULL) &&
          (create_info->sharingMode == VK_SHARING_MODE_EXCLUSIVE);
 }

-static VkResult
-vn_buffer_cache_entries_create(struct vn_device *dev,
-                               struct vn_buffer_cache_entry **out_entries,
-                               uint32_t *out_entry_count)
-{
-   const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
-   const struct vk_device_extension_table *app_exts =
-      &dev->base.base.enabled_extensions;
-   VkDevice dev_handle = vn_device_to_handle(dev);
-   struct vn_buffer_cache_entry *entries;
-   const uint32_t entry_count = ARRAY_SIZE(cache_infos);
-   VkResult result;
-
-   entries = vk_zalloc(alloc, sizeof(*entries) * entry_count,
-                       VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
-   if (!entries)
-      return VK_ERROR_OUT_OF_HOST_MEMORY;
-
-   for (uint32_t i = 0; i < entry_count; i++) {
-      VkBuffer buf_handle = VK_NULL_HANDLE;
-      struct vn_buffer *buf = NULL;
-      VkBufferCreateInfo local_info = cache_infos[i];
-
-      assert(vn_buffer_create_info_can_be_cached(&cache_infos[i]));
-
-      /* We mask out usage bits from exts not enabled by the app to create the
-       * buffer. To be noted, we'll still set cache entry create_info to the
-       * unmasked one for code simplicity, and it's fine to use a superset.
-       */
-      if (!app_exts->EXT_transform_feedback) {
-         local_info.usage &=
-            ~(VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT |
-              VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT);
-      }
-      if (!app_exts->EXT_conditional_rendering)
-         local_info.usage &= ~VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT;
-      /* TODO check feature enablement instead */
-      if (!app_exts->KHR_buffer_device_address)
-         local_info.usage &= ~VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
-
-      result = vn_CreateBuffer(dev_handle, &local_info, alloc, &buf_handle);
-      if (result != VK_SUCCESS) {
-         vk_free(alloc, entries);
-         return result;
-      }
-
-      buf = vn_buffer_from_handle(buf_handle);
-
-      /* TODO remove below after VK_KHR_maintenance4 becomes a requirement */
-      if (buf->requirements.memory.memoryRequirements.alignment <
-          buf->requirements.memory.memoryRequirements.size) {
-         vk_free(alloc, entries);
-         *out_entries = NULL;
-         *out_entry_count = 0;
-         return VK_SUCCESS;
-      }
-
-      entries[i].create_info = &cache_infos[i];
-      entries[i].requirements.memory = buf->requirements.memory;
-      entries[i].requirements.dedicated = buf->requirements.dedicated;
-
-      vn_DestroyBuffer(dev_handle, buf_handle, alloc);
-   }
-
-   *out_entries = entries;
-   *out_entry_count = entry_count;
-   return VK_SUCCESS;
-}
-
-static void
-vn_buffer_cache_entries_destroy(struct vn_device *dev,
-                                struct vn_buffer_cache_entry *entries)
-{
-   const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
-
-   if (entries)
-      vk_free(alloc, entries);
-}
-
 static VkResult
 vn_buffer_get_max_buffer_size(struct vn_device *dev,
                              uint64_t *out_max_buffer_size)
@ -205,8 +79,6 @@ vn_buffer_cache_init(struct vn_device *dev)
 {
   uint32_t ahb_mem_type_bits = 0;
   uint64_t max_buffer_size = 0;
-   struct vn_buffer_cache_entry *entries = NULL;
-   uint32_t entry_count = 0;
   VkResult result;

   if (dev->base.base.enabled_extensions
@ -221,39 +93,33 @@ vn_buffer_cache_init(struct vn_device *dev)
      result = vn_buffer_get_max_buffer_size(dev, &max_buffer_size);
      if (result != VK_SUCCESS)
         return result;
-
-      result = vn_buffer_cache_entries_create(dev, &entries, &entry_count);
-      if (result != VK_SUCCESS)
-         return result;
   }

   dev->buffer_cache.ahb_mem_type_bits = ahb_mem_type_bits;
   dev->buffer_cache.max_buffer_size = max_buffer_size;
-   dev->buffer_cache.entries = entries;
-   dev->buffer_cache.entry_count = entry_count;
+
+   simple_mtx_init(&dev->buffer_cache.mutex, mtx_plain);
+   util_sparse_array_init(&dev->buffer_cache.entries,
+                          sizeof(struct vn_buffer_cache_entry), 64);
+
   return VK_SUCCESS;
 }

 void
 vn_buffer_cache_fini(struct vn_device *dev)
 {
-   vn_buffer_cache_entries_destroy(dev, dev->buffer_cache.entries);
+   util_sparse_array_finish(&dev->buffer_cache.entries);
+   simple_mtx_destroy(&dev->buffer_cache.mutex);
 }

-static bool
-vn_buffer_cache_get_memory_requirements(
+static struct vn_buffer_cache_entry *
+vn_buffer_get_cached_memory_requirements(
   struct vn_buffer_cache *cache,
   const VkBufferCreateInfo *create_info,
   struct vn_buffer_memory_requirements *out)
 {
   if (VN_PERF(NO_ASYNC_BUFFER_CREATE))
-      return false;
-
-   if (create_info->size > cache->max_buffer_size)
-      return false;
-
-   if (!vn_buffer_create_info_can_be_cached(create_info))
-      return false;
+      return NULL;

   /* 12.7. Resource Memory Association
    *
@ -261,35 +127,57 @@ vn_buffer_cache_get_memory_requirements(
    * with the same value for the flags and usage members in the
    * VkBufferCreateInfo structure and the handleTypes member of the
    * VkExternalMemoryBufferCreateInfo structure passed to vkCreateBuffer.
-    * Further, if usage1 and usage2 of type VkBufferUsageFlags are such that
-    * the bits set in usage2 are a subset of the bits set in usage1, and they
-    * have the same flags and VkExternalMemoryBufferCreateInfo::handleTypes,
-    * then the bits set in memoryTypeBits returned for usage1 must be a subset
-    * of the bits set in memoryTypeBits returned for usage2, for all values of
-    * flags.
    */
-   for (uint32_t i = 0; i < cache->entry_count; i++) {
-      const struct vn_buffer_cache_entry *entry = &cache->entries[i];
-      // TODO: Fix the spec regarding the usage and alignment behavior
-      if ((entry->create_info->flags == create_info->flags) &&
-          ((entry->create_info->usage & create_info->usage) ==
-           create_info->usage)) {
+   if (vn_buffer_create_info_can_be_cached(create_info, cache)) {
+      /* Combine flags and usage bits to form a unique index. */
+      const uint64_t idx =
+         (uint64_t)create_info->flags << 32 | create_info->usage;
+
+      struct vn_buffer_cache_entry *entry =
+         util_sparse_array_get(&cache->entries, idx);
+
+      if (entry->valid) {
         *out = entry->requirements;

-         /* TODO remove the comment after VK_KHR_maintenance4 becomes a
-          * requirement
+         /* TODO remove comment after mandating VK_KHR_maintenance4
          *
          * This is based on below implementation defined behavior:
-          *
          *    req.size <= align64(info.size, req.alignment)
          */
         out->memory.memoryRequirements.size = align64(
            create_info->size, out->memory.memoryRequirements.alignment);
-         return true;
      }
+
+      return entry;
   }

-   return false;
+   return NULL;
+}
+
+static void
+vn_buffer_cache_entry_init(struct vn_buffer_cache *cache,
+                           struct vn_buffer_cache_entry *entry,
+                           VkMemoryRequirements2 *req)
+{
+   simple_mtx_lock(&cache->mutex);
+
+   /* Entry might have already been initialized by another thread
+    * before the lock
+    */
+   if (entry->valid)
+      goto unlock;
+
+   entry->requirements.memory = *req;
+
+   const VkMemoryDedicatedRequirements *dedicated_req =
+      vk_find_struct_const(req->pNext, MEMORY_DEDICATED_REQUIREMENTS);
+   if (dedicated_req)
+      entry->requirements.dedicated = *dedicated_req;
+
+   entry->valid = true;
+
+unlock:
+   simple_mtx_unlock(&cache->mutex);
 }

 static void
@ -328,15 +216,22 @@ vn_buffer_init(struct vn_device *dev,
 {
   VkDevice dev_handle = vn_device_to_handle(dev);
   VkBuffer buf_handle = vn_buffer_to_handle(buf);
+   struct vn_buffer_cache *cache = &dev->buffer_cache;
   VkResult result;

-   if (vn_buffer_cache_get_memory_requirements(
-          &dev->buffer_cache, create_info, &buf->requirements)) {
+   /* If cacheable and mem requirements found in cache, make async call */
+   struct vn_buffer_cache_entry *entry =
+      vn_buffer_get_cached_memory_requirements(cache, create_info,
+                                               &buf->requirements);
+
+   /* Check size instead of entry->valid to be lock free */
+   if (buf->requirements.memory.memoryRequirements.size) {
      vn_async_vkCreateBuffer(dev->instance, dev_handle, create_info, NULL,
                              &buf_handle);
      return VK_SUCCESS;
   }

+   /* If cache miss or not cacheable, make synchronous call */
   result = vn_call_vkCreateBuffer(dev->instance, dev_handle, create_info,
                                   NULL, &buf_handle);
   if (result != VK_SUCCESS)
@ -356,6 +251,10 @@ vn_buffer_init(struct vn_device *dev,
      },
      &buf->requirements.memory);

+   /* If cacheable, store mem requirements from the synchronous call */
+   if (entry)
+      vn_buffer_cache_entry_init(cache, entry, &buf->requirements.memory);
+
   return VK_SUCCESS;
 }

@ -566,15 +465,25 @@ vn_GetDeviceBufferMemoryRequirements(
   VkMemoryRequirements2 *pMemoryRequirements)
 {
   struct vn_device *dev = vn_device_from_handle(device);
-   struct vn_buffer_memory_requirements cached;
+   struct vn_buffer_cache *cache = &dev->buffer_cache;
+   struct vn_buffer_memory_requirements reqs = { 0 };

-   if (vn_buffer_cache_get_memory_requirements(&dev->buffer_cache,
-                                               pInfo->pCreateInfo, &cached)) {
-      vn_copy_cached_memory_requirements(&cached, pMemoryRequirements);
+   /* If cacheable and mem requirements found in cache, skip host call */
+   struct vn_buffer_cache_entry *entry =
+      vn_buffer_get_cached_memory_requirements(cache, pInfo->pCreateInfo,
+                                               &reqs);
+
+   /* Check size instead of entry->valid to be lock free */
+   if (reqs.memory.memoryRequirements.size) {
+      vn_copy_cached_memory_requirements(&reqs, pMemoryRequirements);
      return;
   }

-   /* make the host call if not found in cache */
+   /* Make the host call if not found in cache or not cacheable */
   vn_call_vkGetDeviceBufferMemoryRequirements(dev->instance, device, pInfo,
                                               pMemoryRequirements);
+
+   /* If cacheable, store mem requirements from the host call */
+   if (entry)
+      vn_buffer_cache_entry_init(cache, entry, pMemoryRequirements);
 }
--- a/src/virtio/vulkan/vn_buffer.h
+++ b/src/virtio/vulkan/vn_buffer.h
@ -19,9 +19,8 @@ struct vn_buffer_memory_requirements {
 };

 struct vn_buffer_cache_entry {
-   const VkBufferCreateInfo *create_info;
-
   struct vn_buffer_memory_requirements requirements;
+   atomic_bool valid;
 };

 struct vn_buffer_cache {
@ -30,9 +29,9 @@ struct vn_buffer_cache {

   uint64_t max_buffer_size;

-   /* cache memory requirements for common native buffer infos */
-   struct vn_buffer_cache_entry *entries;
-   uint32_t entry_count;
+   /* lazily cache memory requirements for native buffer infos */
+   struct util_sparse_array entries;
+   simple_mtx_t mutex;
 };

 struct vn_buffer {