venus: switch to lazy VkBuffer cache

Instead of creating a static VkBuffer cache at init with hardcoded
CreateInfo's, lazily cache VkBuffers that the app requests.

Signed-off-by: Juston Li <justonli@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21324>
This commit is contained in:
Juston Li 2023-02-08 17:01:31 -08:00 committed by Marge Bot
parent 56a9aad401
commit ef255444c1
2 changed files with 85 additions and 177 deletions

View file

@ -20,142 +20,16 @@
/* buffer commands */
/* mandatory buffer create infos to cache */
static const VkBufferCreateInfo cache_infos[] = {
{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.size = 1,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
},
{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.size = 1,
.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
},
{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.size = 1,
.usage =
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
},
{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.size = 1,
.usage =
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
},
{
/* mainly for layering clients like angle and zink */
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.size = 1,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT |
VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT |
VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
},
};
static inline bool
vn_buffer_create_info_can_be_cached(const VkBufferCreateInfo *create_info)
vn_buffer_create_info_can_be_cached(const VkBufferCreateInfo *create_info,
struct vn_buffer_cache *cache)
{
/* cache only VK_SHARING_MODE_EXCLUSIVE and without pNext for simplicity */
return (create_info->pNext == NULL) &&
return (create_info->size <= cache->max_buffer_size) &&
(create_info->pNext == NULL) &&
(create_info->sharingMode == VK_SHARING_MODE_EXCLUSIVE);
}
static VkResult
vn_buffer_cache_entries_create(struct vn_device *dev,
struct vn_buffer_cache_entry **out_entries,
uint32_t *out_entry_count)
{
const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
const struct vk_device_extension_table *app_exts =
&dev->base.base.enabled_extensions;
VkDevice dev_handle = vn_device_to_handle(dev);
struct vn_buffer_cache_entry *entries;
const uint32_t entry_count = ARRAY_SIZE(cache_infos);
VkResult result;
entries = vk_zalloc(alloc, sizeof(*entries) * entry_count,
VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (!entries)
return VK_ERROR_OUT_OF_HOST_MEMORY;
for (uint32_t i = 0; i < entry_count; i++) {
VkBuffer buf_handle = VK_NULL_HANDLE;
struct vn_buffer *buf = NULL;
VkBufferCreateInfo local_info = cache_infos[i];
assert(vn_buffer_create_info_can_be_cached(&cache_infos[i]));
/* We mask out usage bits from exts not enabled by the app to create the
* buffer. To be noted, we'll still set cache entry create_info to the
* unmasked one for code simplicity, and it's fine to use a superset.
*/
if (!app_exts->EXT_transform_feedback) {
local_info.usage &=
~(VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT |
VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT);
}
if (!app_exts->EXT_conditional_rendering)
local_info.usage &= ~VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT;
/* TODO check feature enablement instead */
if (!app_exts->KHR_buffer_device_address)
local_info.usage &= ~VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
result = vn_CreateBuffer(dev_handle, &local_info, alloc, &buf_handle);
if (result != VK_SUCCESS) {
vk_free(alloc, entries);
return result;
}
buf = vn_buffer_from_handle(buf_handle);
/* TODO remove below after VK_KHR_maintenance4 becomes a requirement */
if (buf->requirements.memory.memoryRequirements.alignment <
buf->requirements.memory.memoryRequirements.size) {
vk_free(alloc, entries);
*out_entries = NULL;
*out_entry_count = 0;
return VK_SUCCESS;
}
entries[i].create_info = &cache_infos[i];
entries[i].requirements.memory = buf->requirements.memory;
entries[i].requirements.dedicated = buf->requirements.dedicated;
vn_DestroyBuffer(dev_handle, buf_handle, alloc);
}
*out_entries = entries;
*out_entry_count = entry_count;
return VK_SUCCESS;
}
static void
vn_buffer_cache_entries_destroy(struct vn_device *dev,
struct vn_buffer_cache_entry *entries)
{
const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
if (entries)
vk_free(alloc, entries);
}
static VkResult
vn_buffer_get_max_buffer_size(struct vn_device *dev,
uint64_t *out_max_buffer_size)
@ -205,8 +79,6 @@ vn_buffer_cache_init(struct vn_device *dev)
{
uint32_t ahb_mem_type_bits = 0;
uint64_t max_buffer_size = 0;
struct vn_buffer_cache_entry *entries = NULL;
uint32_t entry_count = 0;
VkResult result;
if (dev->base.base.enabled_extensions
@ -221,39 +93,33 @@ vn_buffer_cache_init(struct vn_device *dev)
result = vn_buffer_get_max_buffer_size(dev, &max_buffer_size);
if (result != VK_SUCCESS)
return result;
result = vn_buffer_cache_entries_create(dev, &entries, &entry_count);
if (result != VK_SUCCESS)
return result;
}
dev->buffer_cache.ahb_mem_type_bits = ahb_mem_type_bits;
dev->buffer_cache.max_buffer_size = max_buffer_size;
dev->buffer_cache.entries = entries;
dev->buffer_cache.entry_count = entry_count;
simple_mtx_init(&dev->buffer_cache.mutex, mtx_plain);
util_sparse_array_init(&dev->buffer_cache.entries,
sizeof(struct vn_buffer_cache_entry), 64);
return VK_SUCCESS;
}
void
vn_buffer_cache_fini(struct vn_device *dev)
{
vn_buffer_cache_entries_destroy(dev, dev->buffer_cache.entries);
util_sparse_array_finish(&dev->buffer_cache.entries);
simple_mtx_destroy(&dev->buffer_cache.mutex);
}
static bool
vn_buffer_cache_get_memory_requirements(
static struct vn_buffer_cache_entry *
vn_buffer_get_cached_memory_requirements(
struct vn_buffer_cache *cache,
const VkBufferCreateInfo *create_info,
struct vn_buffer_memory_requirements *out)
{
if (VN_PERF(NO_ASYNC_BUFFER_CREATE))
return false;
if (create_info->size > cache->max_buffer_size)
return false;
if (!vn_buffer_create_info_can_be_cached(create_info))
return false;
return NULL;
/* 12.7. Resource Memory Association
*
@ -261,35 +127,57 @@ vn_buffer_cache_get_memory_requirements(
* with the same value for the flags and usage members in the
* VkBufferCreateInfo structure and the handleTypes member of the
* VkExternalMemoryBufferCreateInfo structure passed to vkCreateBuffer.
* Further, if usage1 and usage2 of type VkBufferUsageFlags are such that
* the bits set in usage2 are a subset of the bits set in usage1, and they
* have the same flags and VkExternalMemoryBufferCreateInfo::handleTypes,
* then the bits set in memoryTypeBits returned for usage1 must be a subset
* of the bits set in memoryTypeBits returned for usage2, for all values of
* flags.
*/
for (uint32_t i = 0; i < cache->entry_count; i++) {
const struct vn_buffer_cache_entry *entry = &cache->entries[i];
// TODO: Fix the spec regarding the usage and alignment behavior
if ((entry->create_info->flags == create_info->flags) &&
((entry->create_info->usage & create_info->usage) ==
create_info->usage)) {
if (vn_buffer_create_info_can_be_cached(create_info, cache)) {
/* Combine flags and usage bits to form a unique index. */
const uint64_t idx =
(uint64_t)create_info->flags << 32 | create_info->usage;
struct vn_buffer_cache_entry *entry =
util_sparse_array_get(&cache->entries, idx);
if (entry->valid) {
*out = entry->requirements;
/* TODO remove the comment after VK_KHR_maintenance4 becomes a
* requirement
/* TODO remove comment after mandating VK_KHR_maintenance4
*
* This is based on below implementation defined behavior:
*
* req.size <= align64(info.size, req.alignment)
*/
out->memory.memoryRequirements.size = align64(
create_info->size, out->memory.memoryRequirements.alignment);
return true;
}
return entry;
}
return false;
return NULL;
}
static void
vn_buffer_cache_entry_init(struct vn_buffer_cache *cache,
struct vn_buffer_cache_entry *entry,
VkMemoryRequirements2 *req)
{
simple_mtx_lock(&cache->mutex);
/* Entry might have already been initialized by another thread
* before the lock
*/
if (entry->valid)
goto unlock;
entry->requirements.memory = *req;
const VkMemoryDedicatedRequirements *dedicated_req =
vk_find_struct_const(req->pNext, MEMORY_DEDICATED_REQUIREMENTS);
if (dedicated_req)
entry->requirements.dedicated = *dedicated_req;
entry->valid = true;
unlock:
simple_mtx_unlock(&cache->mutex);
}
static void
@ -328,15 +216,22 @@ vn_buffer_init(struct vn_device *dev,
{
VkDevice dev_handle = vn_device_to_handle(dev);
VkBuffer buf_handle = vn_buffer_to_handle(buf);
struct vn_buffer_cache *cache = &dev->buffer_cache;
VkResult result;
if (vn_buffer_cache_get_memory_requirements(
&dev->buffer_cache, create_info, &buf->requirements)) {
/* If cacheable and mem requirements found in cache, make async call */
struct vn_buffer_cache_entry *entry =
vn_buffer_get_cached_memory_requirements(cache, create_info,
&buf->requirements);
/* Check size instead of entry->valid to be lock free */
if (buf->requirements.memory.memoryRequirements.size) {
vn_async_vkCreateBuffer(dev->instance, dev_handle, create_info, NULL,
&buf_handle);
return VK_SUCCESS;
}
/* If cache miss or not cacheable, make synchronous call */
result = vn_call_vkCreateBuffer(dev->instance, dev_handle, create_info,
NULL, &buf_handle);
if (result != VK_SUCCESS)
@ -356,6 +251,10 @@ vn_buffer_init(struct vn_device *dev,
},
&buf->requirements.memory);
/* If cacheable, store mem requirements from the synchronous call */
if (entry)
vn_buffer_cache_entry_init(cache, entry, &buf->requirements.memory);
return VK_SUCCESS;
}
@ -566,15 +465,25 @@ vn_GetDeviceBufferMemoryRequirements(
VkMemoryRequirements2 *pMemoryRequirements)
{
struct vn_device *dev = vn_device_from_handle(device);
struct vn_buffer_memory_requirements cached;
struct vn_buffer_cache *cache = &dev->buffer_cache;
struct vn_buffer_memory_requirements reqs = { 0 };
if (vn_buffer_cache_get_memory_requirements(&dev->buffer_cache,
pInfo->pCreateInfo, &cached)) {
vn_copy_cached_memory_requirements(&cached, pMemoryRequirements);
/* If cacheable and mem requirements found in cache, skip host call */
struct vn_buffer_cache_entry *entry =
vn_buffer_get_cached_memory_requirements(cache, pInfo->pCreateInfo,
&reqs);
/* Check size instead of entry->valid to be lock free */
if (reqs.memory.memoryRequirements.size) {
vn_copy_cached_memory_requirements(&reqs, pMemoryRequirements);
return;
}
/* make the host call if not found in cache */
/* Make the host call if not found in cache or not cacheable */
vn_call_vkGetDeviceBufferMemoryRequirements(dev->instance, device, pInfo,
pMemoryRequirements);
/* If cacheable, store mem requirements from the host call */
if (entry)
vn_buffer_cache_entry_init(cache, entry, pMemoryRequirements);
}

View file

@ -19,9 +19,8 @@ struct vn_buffer_memory_requirements {
};
struct vn_buffer_cache_entry {
const VkBufferCreateInfo *create_info;
struct vn_buffer_memory_requirements requirements;
atomic_bool valid;
};
struct vn_buffer_cache {
@ -30,9 +29,9 @@ struct vn_buffer_cache {
uint64_t max_buffer_size;
/* cache memory requirements for common native buffer infos */
struct vn_buffer_cache_entry *entries;
uint32_t entry_count;
/* lazily cache memory requirements for native buffer infos */
struct util_sparse_array entries;
simple_mtx_t mutex;
};
struct vn_buffer {