From 2adea940f1fc155032923cd84fc1cbb51459d0d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= Date: Fri, 5 Jun 2020 12:21:54 +0200 Subject: [PATCH] v3dv/bo: adding a BO cache Heavily based on the already existing for the v3d OpenGL driver, but without references, and with some extra OOM checks (Vulkan CTS has several OOM tests). With this commit v3dv_bo_alloc and v3dv_bo_free became frontends to the bo_cache. The former tries to get a BO from the cache if possible, and the latter stores the BO on the cache if possible. The former also adds a new parameter to point if the BO to allocate is private. As v3d we are only caching private BOs, those created by the driver for internal use (like CLs, tile_alloc, etc). They are the ones with the highest change of being reused (for example, CL BOs are always 4KB, so they can always be reused). User-created BOs can have any size, including some very large ones for buffers and images, which makes them far less likely to be reused and would add a lot of memory pressure if we decided to cache them. In any case, in practice, we found that we could get a performance improvement by caching also user-created BOs, but that would need more care and an analysis to decide which ones makes sense. Would also require to change how the cached BOs are stored by size. Right now there are an array of list_head, that doesn't work well with big BOs. If done, that would be handled on a separate commit. Part-of: --- src/broadcom/vulkan/v3dv_bo.c | 275 +++++++++++++++++++--- src/broadcom/vulkan/v3dv_bo.h | 18 +- src/broadcom/vulkan/v3dv_cl.c | 2 +- src/broadcom/vulkan/v3dv_cmd_buffer.c | 4 +- src/broadcom/vulkan/v3dv_descriptor_set.c | 2 +- src/broadcom/vulkan/v3dv_device.c | 4 +- src/broadcom/vulkan/v3dv_meta_copy.c | 2 +- src/broadcom/vulkan/v3dv_pipeline.c | 5 +- src/broadcom/vulkan/v3dv_private.h | 13 + src/broadcom/vulkan/v3dv_query.c | 2 +- src/broadcom/vulkan/v3dv_uniforms.c | 3 +- 11 files changed, 287 insertions(+), 43 deletions(-) diff --git a/src/broadcom/vulkan/v3dv_bo.c b/src/broadcom/vulkan/v3dv_bo.c index 5926ab9f90c..d966cba6932 100644 --- a/src/broadcom/vulkan/v3dv_bo.c +++ b/src/broadcom/vulkan/v3dv_bo.c @@ -29,44 +29,49 @@ #include "drm-uapi/v3d_drm.h" #include "util/u_memory.h" -struct v3dv_bo * -v3dv_bo_alloc(struct v3dv_device *device, uint32_t size, const char *name) +static void +bo_remove_from_cache(struct v3dv_bo_cache *cache, struct v3dv_bo *bo) { - struct v3dv_bo *bo = vk_alloc(&device->alloc, sizeof(struct v3dv_bo), 8, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - if (!bo) { - fprintf(stderr, "Failed to allocate host memory for BO\n"); + list_del(&bo->time_list); + list_del(&bo->size_list); +} + +static struct v3dv_bo * +bo_from_cache(struct v3dv_device *device, uint32_t size, const char *name) +{ + struct v3dv_bo_cache *cache = &device->bo_cache; + uint32_t page_index = size / 4096 - 1; + + if (cache->size_list_size <= page_index) return NULL; + + struct v3dv_bo *bo = NULL; + + mtx_lock(&cache->lock); + if (!list_is_empty(&cache->size_list[page_index])) { + bo = list_first_entry(&cache->size_list[page_index], + struct v3dv_bo, size_list); + + /* Check that the BO has gone idle. If not, then we want to + * allocate something new instead, since we assume that the + * user will proceed to CPU map it and fill it with stuff. + */ + if (!v3dv_bo_wait(device, bo, 0)) { + mtx_unlock(&cache->lock); + return NULL; + } + + bo_remove_from_cache(cache, bo); + + bo->name = name; } - - const uint32_t page_align = 4096; /* Always allocate full pages */ - size = align(size, page_align); - struct drm_v3d_create_bo create = { - .size = size - }; - - int ret = v3dv_ioctl(device->render_fd, DRM_IOCTL_V3D_CREATE_BO, &create); - if (ret != 0) { - fprintf(stderr, "Failed to allocate device memory for BO\n"); - return NULL; - } - - assert(create.offset % page_align == 0); - assert((create.offset & 0xffffffff) == create.offset); - - bo->handle = create.handle; - bo->size = size; - bo->offset = create.offset; - bo->map = NULL; - bo->map_size = 0; - bo->name = name; - list_inithead(&bo->list_link); - + mtx_unlock(&cache->lock); return bo; } -bool -v3dv_bo_free(struct v3dv_device *device, struct v3dv_bo *bo) +static bool +bo_free(struct v3dv_device *device, + struct v3dv_bo *bo) { if (!bo) return true; @@ -81,11 +86,96 @@ v3dv_bo_free(struct v3dv_device *device, struct v3dv_bo *bo) if (ret != 0) fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno)); + device->bo_count--; + device->bo_size -= bo->size; vk_free(&device->alloc, bo); return ret == 0; } +static void +bo_cache_free_all(struct v3dv_device *device, + bool with_lock) +{ + struct v3dv_bo_cache *cache = &device->bo_cache; + + if (with_lock) + mtx_lock(&cache->lock); + list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list, + time_list) { + bo_remove_from_cache(cache, bo); + bo_free(device, bo); + } + if (with_lock) + mtx_unlock(&cache->lock); + +} + +struct v3dv_bo * +v3dv_bo_alloc(struct v3dv_device *device, + uint32_t size, + const char *name, + bool private) +{ + struct v3dv_bo *bo; + + const uint32_t page_align = 4096; /* Always allocate full pages */ + size = align(size, page_align); + + if (private) { + bo = bo_from_cache(device, size, name); + if (bo) + return bo; + } + + bo = vk_alloc(&device->alloc, sizeof(struct v3dv_bo), 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + + if (!bo) { + fprintf(stderr, "Failed to allocate host memory for BO\n"); + return NULL; + } + + retry: + ; + + bool cleared_and_retried = false; + struct drm_v3d_create_bo create = { + .size = size + }; + + int ret = v3dv_ioctl(device->render_fd, DRM_IOCTL_V3D_CREATE_BO, &create); + if (ret != 0) { + if (!list_is_empty(&device->bo_cache.time_list) && + !cleared_and_retried) { + cleared_and_retried = true; + bo_cache_free_all(device, true); + goto retry; + } + + vk_free(&device->alloc, bo); + fprintf(stderr, "Failed to allocate device memory for BO\n"); + return NULL; + } + + assert(create.offset % page_align == 0); + assert((create.offset & 0xffffffff) == create.offset); + + bo->handle = create.handle; + bo->size = size; + bo->offset = create.offset; + bo->map = NULL; + bo->map_size = 0; + bo->name = name; + bo->private = private; + list_inithead(&bo->list_link); + + device->bo_count++; + device->bo_size += bo->size; + + return bo; +} + bool v3dv_bo_map_unsynchronized(struct v3dv_device *device, struct v3dv_bo *bo, @@ -93,6 +183,9 @@ v3dv_bo_map_unsynchronized(struct v3dv_device *device, { assert(bo != NULL && size <= bo->size); + if (bo->map) + return bo->map; + struct drm_v3d_mmap_bo map; memset(&map, 0, sizeof(map)); map.handle = bo->handle; @@ -158,3 +251,121 @@ v3dv_bo_unmap(struct v3dv_device *device, struct v3dv_bo *bo) bo->map_size = 0; } +static boolean +reallocate_size_list(struct v3dv_bo_cache *cache, + struct v3dv_device *device, + uint32_t size) +{ + struct list_head *new_list = + vk_alloc(&device->alloc, sizeof(struct list_head) * size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + + if (!new_list) { + fprintf(stderr, "Failed to allocate host memory for cache bo list\n"); + return false; + } + struct list_head *old_list = cache->size_list; + + /* Move old list contents over (since the array has moved, and + * therefore the pointers to the list heads have to change). + */ + for (int i = 0; i < cache->size_list_size; i++) { + struct list_head *old_head = &cache->size_list[i]; + if (list_is_empty(old_head)) { + list_inithead(&new_list[i]); + } else { + new_list[i].next = old_head->next; + new_list[i].prev = old_head->prev; + new_list[i].next->prev = &new_list[i]; + new_list[i].prev->next = &new_list[i]; + } + } + for (int i = cache->size_list_size; i < size; i++) + list_inithead(&new_list[i]); + + cache->size_list = new_list; + cache->size_list_size = size; + vk_free(&device->alloc, old_list); + + return true; +} + +void +v3dv_bo_cache_init(struct v3dv_device *device) +{ + device->bo_size = 0; + device->bo_count = 0; + list_inithead(&device->bo_cache.time_list); + /* FIXME: perhaps set a initial size for the size-list, to avoid run-time + * reallocations + */ + device->bo_cache.size_list_size = 0; +} + +void +v3dv_bo_cache_destroy(struct v3dv_device *device) +{ + bo_cache_free_all(device, true); + vk_free(&device->alloc, device->bo_cache.size_list); +} + + +static void +free_stale_bos(struct v3dv_device *device, + time_t time) +{ + struct v3dv_bo_cache *cache = &device->bo_cache; + + list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list, + time_list) { + /* If it's more than a second old, free it. */ + if (time - bo->free_time > 2) { + bo_remove_from_cache(cache, bo); + bo_free(device, bo); + } else { + break; + } + } +} + +bool +v3dv_bo_free(struct v3dv_device *device, + struct v3dv_bo *bo) +{ + if (!bo) + return true; + + struct timespec time; + struct v3dv_bo_cache *cache = &device->bo_cache; + uint32_t page_index = bo->size / 4096 - 1; + + if (!bo->private) + return bo_free(device, bo); + + clock_gettime(CLOCK_MONOTONIC, &time); + mtx_lock(&cache->lock); + + if (cache->size_list_size <= page_index) { + if (!reallocate_size_list(cache, device, page_index + 1)) { + bool outcome = bo_free(device, bo); + /* If the reallocation failed, it usually means that we are out of + * memory, so we also free all the bo cache. We need to call it to + * not use the cache lock, as we are already under it. + */ + bo_cache_free_all(device, false); + mtx_unlock(&cache->lock); + return outcome; + } + } + + bo->free_time = time.tv_sec; + list_addtail(&bo->size_list, &cache->size_list[page_index]); + list_addtail(&bo->time_list, &cache->time_list); + bo->name = NULL; + + free_stale_bos(device, time.tv_sec); + + mtx_unlock(&cache->lock); + + return true; +} diff --git a/src/broadcom/vulkan/v3dv_bo.h b/src/broadcom/vulkan/v3dv_bo.h index 270022df7e9..8833a723e51 100644 --- a/src/broadcom/vulkan/v3dv_bo.h +++ b/src/broadcom/vulkan/v3dv_bo.h @@ -37,9 +37,22 @@ struct v3dv_bo { void *map; const char *name; + + /** Entry in the linked list of buffers freed, by age. */ + struct list_head time_list; + /** Entry in the per-page-count linked list of buffers freed (by age). */ + struct list_head size_list; + /** Approximate second when the bo was freed. */ + time_t free_time; + + /** + * Whether only our process has a reference to the BO (meaning that + * it's safe to reuse it in the BO cache). + */ + bool private; }; -struct v3dv_bo *v3dv_bo_alloc(struct v3dv_device *device, uint32_t size, const char *name); +struct v3dv_bo *v3dv_bo_alloc(struct v3dv_device *device, uint32_t size, const char *name, bool private); bool v3dv_bo_free(struct v3dv_device *device, struct v3dv_bo *bo); @@ -51,4 +64,7 @@ bool v3dv_bo_map(struct v3dv_device *device, struct v3dv_bo *bo, uint32_t size); void v3dv_bo_unmap(struct v3dv_device *device, struct v3dv_bo *bo); +void v3dv_bo_cache_init(struct v3dv_device *device); +void v3dv_bo_cache_destroy(struct v3dv_device *device); + #endif /* V3DV_BO_H */ diff --git a/src/broadcom/vulkan/v3dv_cl.c b/src/broadcom/vulkan/v3dv_cl.c index 27b99ecdae1..03c729a2e05 100644 --- a/src/broadcom/vulkan/v3dv_cl.c +++ b/src/broadcom/vulkan/v3dv_cl.c @@ -51,7 +51,7 @@ v3dv_cl_destroy(struct v3dv_cl *cl) static bool cl_alloc_bo(struct v3dv_cl *cl, uint32_t space, bool use_branch) { - struct v3dv_bo *bo = v3dv_bo_alloc(cl->job->device, space, "CL"); + struct v3dv_bo *bo = v3dv_bo_alloc(cl->job->device, space, "CL", true); if (!bo) { fprintf(stderr, "failed to allocate memory for command list\n"); v3dv_flag_oom(NULL, cl->job); diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c index 979da874efd..edbf0e92f72 100644 --- a/src/broadcom/vulkan/v3dv_cmd_buffer.c +++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c @@ -519,7 +519,7 @@ v3dv_job_start_frame(struct v3dv_job *job, tile_alloc_size += 512 * 1024; job->tile_alloc = v3dv_bo_alloc(job->device, tile_alloc_size, - "tile_alloc"); + "tile_alloc", true); if (!job->tile_alloc) { v3dv_flag_oom(NULL, job); return; @@ -532,7 +532,7 @@ v3dv_job_start_frame(struct v3dv_job *job, tiling->draw_tiles_x * tiling->draw_tiles_y * tsda_per_tile_size; - job->tile_state = v3dv_bo_alloc(job->device, tile_state_size, "TSDA"); + job->tile_state = v3dv_bo_alloc(job->device, tile_state_size, "TSDA", true); if (!job->tile_state) { v3dv_flag_oom(NULL, job); return; diff --git a/src/broadcom/vulkan/v3dv_descriptor_set.c b/src/broadcom/vulkan/v3dv_descriptor_set.c index 5550536643c..48a742675fa 100644 --- a/src/broadcom/vulkan/v3dv_descriptor_set.c +++ b/src/broadcom/vulkan/v3dv_descriptor_set.c @@ -413,7 +413,7 @@ v3dv_CreateDescriptorPool(VkDevice _device, pool->max_entry_count = pCreateInfo->maxSets; if (bo_size > 0) { - pool->bo = v3dv_bo_alloc(device, bo_size, "descriptor pool bo"); + pool->bo = v3dv_bo_alloc(device, bo_size, "descriptor pool bo", true); if (!pool->bo) goto out_of_device_memory; diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c index 1aa9ae96fe7..a8ed8c80879 100644 --- a/src/broadcom/vulkan/v3dv_device.c +++ b/src/broadcom/vulkan/v3dv_device.c @@ -1286,6 +1286,7 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice, init_device_dispatch(device); init_device_meta(device); + v3dv_bo_cache_init(device); *pDevice = v3dv_device_to_handle(device); @@ -1308,6 +1309,7 @@ v3dv_DestroyDevice(VkDevice _device, pthread_mutex_destroy(&device->mutex); drmSyncobjDestroy(device->render_fd, device->last_job_sync); destroy_device_meta(device); + v3dv_bo_cache_destroy(device); vk_free2(&default_alloc, pAllocator, device); } @@ -1362,7 +1364,7 @@ device_alloc(struct v3dv_device *device, { /* Our kernel interface is 32-bit */ assert((size & 0xffffffff) == size); - mem->bo = v3dv_bo_alloc(device, size, "device_alloc"); + mem->bo = v3dv_bo_alloc(device, size, "device_alloc", false); if (!mem->bo) return VK_ERROR_OUT_OF_DEVICE_MEMORY; return VK_SUCCESS; diff --git a/src/broadcom/vulkan/v3dv_meta_copy.c b/src/broadcom/vulkan/v3dv_meta_copy.c index 90aec1a0410..92f0b7f82b2 100644 --- a/src/broadcom/vulkan/v3dv_meta_copy.c +++ b/src/broadcom/vulkan/v3dv_meta_copy.c @@ -1787,7 +1787,7 @@ v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer); struct v3dv_bo *src_bo = - v3dv_bo_alloc(cmd_buffer->device, dataSize, "vkCmdUpdateBuffer"); + v3dv_bo_alloc(cmd_buffer->device, dataSize, "vkCmdUpdateBuffer", true); if (!src_bo) { fprintf(stderr, "Failed to allocate BO for vkCmdUpdateBuffer.\n"); return; diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c index 80c74960a51..32addf496b8 100644 --- a/src/broadcom/vulkan/v3dv_pipeline.c +++ b/src/broadcom/vulkan/v3dv_pipeline.c @@ -1160,7 +1160,7 @@ upload_assembly(struct v3dv_pipeline_stage *p_stage, break; }; - struct v3dv_bo *bo = v3dv_bo_alloc(device, size, name); + struct v3dv_bo *bo = v3dv_bo_alloc(device, size, name, true); if (!bo) { fprintf(stderr, "failed to allocate memory for shader\n"); return false; @@ -2262,7 +2262,8 @@ create_default_attribute_values(struct v3dv_pipeline *pipeline, if (pipeline->default_attribute_values == NULL) { pipeline->default_attribute_values = v3dv_bo_alloc(pipeline->device, size, - "default_vi_attributes"); + "default_vi_attributes", + true); if (!pipeline->default_attribute_values) { fprintf(stderr, "failed to allocate memory for the default " diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h index c94c59984db..f805a5091d1 100644 --- a/src/broadcom/vulkan/v3dv_private.h +++ b/src/broadcom/vulkan/v3dv_private.h @@ -290,6 +290,19 @@ struct v3dv_device { struct hash_table *cache[3]; /* v3dv_meta_blit_pipeline for 1d, 2d, 3d */ } blit; } meta; + + struct v3dv_bo_cache { + /** List of struct v3d_bo freed, by age. */ + struct list_head time_list; + /** List of struct v3d_bo freed, per size, by age. */ + struct list_head *size_list; + uint32_t size_list_size; + + mtx_t lock; + } bo_cache; + + uint32_t bo_size; + uint32_t bo_count; }; struct v3dv_device_memory { diff --git a/src/broadcom/vulkan/v3dv_query.c b/src/broadcom/vulkan/v3dv_query.c index 2d7f40c2d50..7c2ce104365 100644 --- a/src/broadcom/vulkan/v3dv_query.c +++ b/src/broadcom/vulkan/v3dv_query.c @@ -59,7 +59,7 @@ v3dv_CreateQueryPool(VkDevice _device, uint32_t i; for (i = 0; i < pool->query_count; i++) { pool->queries[i].maybe_available = false; - pool->queries[i].bo = v3dv_bo_alloc(device, 4096, "query"); + pool->queries[i].bo = v3dv_bo_alloc(device, 4096, "query", true); if (!pool->queries[i].bo) { result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); goto fail_alloc_bo; diff --git a/src/broadcom/vulkan/v3dv_uniforms.c b/src/broadcom/vulkan/v3dv_uniforms.c index 3cbbb4df483..af28c6f1af3 100644 --- a/src/broadcom/vulkan/v3dv_uniforms.c +++ b/src/broadcom/vulkan/v3dv_uniforms.c @@ -48,7 +48,8 @@ check_push_constants_ubo(struct v3dv_cmd_buffer *cmd_buffer) if (cmd_buffer->push_constants_resource.bo == NULL) { cmd_buffer->push_constants_resource.bo = - v3dv_bo_alloc(cmd_buffer->device, MAX_PUSH_CONSTANTS_SIZE, "push constants"); + v3dv_bo_alloc(cmd_buffer->device, MAX_PUSH_CONSTANTS_SIZE, + "push constants", true); if (!cmd_buffer->push_constants_resource.bo) { fprintf(stderr, "Failed to allocate memory for push constants\n");