v3dv/bo: adding a BO cache

Heavily based on the already existing for the v3d OpenGL driver, but
without references, and with some extra OOM checks (Vulkan CTS has
several OOM tests).

With this commit v3dv_bo_alloc and v3dv_bo_free became frontends to
the bo_cache. The former tries to get a BO from the cache if possible,
and the latter stores the BO on the cache if possible. The former also
adds a new parameter to point if the BO to allocate is private.

As v3d we are only caching private BOs, those created by the driver
for internal use (like CLs, tile_alloc, etc). They are the ones with
the highest change of being reused (for example, CL BOs are always
4KB, so they can always be reused). User-created BOs can have any
size, including some very large ones for buffers and images, which
makes them far less likely to be reused and would add a lot of memory
pressure if we decided to cache them.

In any case, in practice, we found that we could get a performance
improvement by caching also user-created BOs, but that would need more
care and an analysis to decide which ones makes sense. Would also
require to change how the cached BOs are stored by size. Right now
there are an array of list_head, that doesn't work well with big
BOs. If done, that would be handled on a separate commit.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>
This commit is contained in:
Alejandro Piñeiro 2020-06-05 12:21:54 +02:00 committed by Marge Bot
parent e1a11b8154
commit 2adea940f1
11 changed files with 287 additions and 43 deletions

View file

@ -29,44 +29,49 @@
#include "drm-uapi/v3d_drm.h"
#include "util/u_memory.h"
struct v3dv_bo *
v3dv_bo_alloc(struct v3dv_device *device, uint32_t size, const char *name)
static void
bo_remove_from_cache(struct v3dv_bo_cache *cache, struct v3dv_bo *bo)
{
struct v3dv_bo *bo = vk_alloc(&device->alloc, sizeof(struct v3dv_bo), 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (!bo) {
fprintf(stderr, "Failed to allocate host memory for BO\n");
list_del(&bo->time_list);
list_del(&bo->size_list);
}
static struct v3dv_bo *
bo_from_cache(struct v3dv_device *device, uint32_t size, const char *name)
{
struct v3dv_bo_cache *cache = &device->bo_cache;
uint32_t page_index = size / 4096 - 1;
if (cache->size_list_size <= page_index)
return NULL;
struct v3dv_bo *bo = NULL;
mtx_lock(&cache->lock);
if (!list_is_empty(&cache->size_list[page_index])) {
bo = list_first_entry(&cache->size_list[page_index],
struct v3dv_bo, size_list);
/* Check that the BO has gone idle. If not, then we want to
* allocate something new instead, since we assume that the
* user will proceed to CPU map it and fill it with stuff.
*/
if (!v3dv_bo_wait(device, bo, 0)) {
mtx_unlock(&cache->lock);
return NULL;
}
bo_remove_from_cache(cache, bo);
bo->name = name;
}
const uint32_t page_align = 4096; /* Always allocate full pages */
size = align(size, page_align);
struct drm_v3d_create_bo create = {
.size = size
};
int ret = v3dv_ioctl(device->render_fd, DRM_IOCTL_V3D_CREATE_BO, &create);
if (ret != 0) {
fprintf(stderr, "Failed to allocate device memory for BO\n");
return NULL;
}
assert(create.offset % page_align == 0);
assert((create.offset & 0xffffffff) == create.offset);
bo->handle = create.handle;
bo->size = size;
bo->offset = create.offset;
bo->map = NULL;
bo->map_size = 0;
bo->name = name;
list_inithead(&bo->list_link);
mtx_unlock(&cache->lock);
return bo;
}
bool
v3dv_bo_free(struct v3dv_device *device, struct v3dv_bo *bo)
static bool
bo_free(struct v3dv_device *device,
struct v3dv_bo *bo)
{
if (!bo)
return true;
@ -81,11 +86,96 @@ v3dv_bo_free(struct v3dv_device *device, struct v3dv_bo *bo)
if (ret != 0)
fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno));
device->bo_count--;
device->bo_size -= bo->size;
vk_free(&device->alloc, bo);
return ret == 0;
}
static void
bo_cache_free_all(struct v3dv_device *device,
bool with_lock)
{
struct v3dv_bo_cache *cache = &device->bo_cache;
if (with_lock)
mtx_lock(&cache->lock);
list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list,
time_list) {
bo_remove_from_cache(cache, bo);
bo_free(device, bo);
}
if (with_lock)
mtx_unlock(&cache->lock);
}
struct v3dv_bo *
v3dv_bo_alloc(struct v3dv_device *device,
uint32_t size,
const char *name,
bool private)
{
struct v3dv_bo *bo;
const uint32_t page_align = 4096; /* Always allocate full pages */
size = align(size, page_align);
if (private) {
bo = bo_from_cache(device, size, name);
if (bo)
return bo;
}
bo = vk_alloc(&device->alloc, sizeof(struct v3dv_bo), 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (!bo) {
fprintf(stderr, "Failed to allocate host memory for BO\n");
return NULL;
}
retry:
;
bool cleared_and_retried = false;
struct drm_v3d_create_bo create = {
.size = size
};
int ret = v3dv_ioctl(device->render_fd, DRM_IOCTL_V3D_CREATE_BO, &create);
if (ret != 0) {
if (!list_is_empty(&device->bo_cache.time_list) &&
!cleared_and_retried) {
cleared_and_retried = true;
bo_cache_free_all(device, true);
goto retry;
}
vk_free(&device->alloc, bo);
fprintf(stderr, "Failed to allocate device memory for BO\n");
return NULL;
}
assert(create.offset % page_align == 0);
assert((create.offset & 0xffffffff) == create.offset);
bo->handle = create.handle;
bo->size = size;
bo->offset = create.offset;
bo->map = NULL;
bo->map_size = 0;
bo->name = name;
bo->private = private;
list_inithead(&bo->list_link);
device->bo_count++;
device->bo_size += bo->size;
return bo;
}
bool
v3dv_bo_map_unsynchronized(struct v3dv_device *device,
struct v3dv_bo *bo,
@ -93,6 +183,9 @@ v3dv_bo_map_unsynchronized(struct v3dv_device *device,
{
assert(bo != NULL && size <= bo->size);
if (bo->map)
return bo->map;
struct drm_v3d_mmap_bo map;
memset(&map, 0, sizeof(map));
map.handle = bo->handle;
@ -158,3 +251,121 @@ v3dv_bo_unmap(struct v3dv_device *device, struct v3dv_bo *bo)
bo->map_size = 0;
}
static boolean
reallocate_size_list(struct v3dv_bo_cache *cache,
struct v3dv_device *device,
uint32_t size)
{
struct list_head *new_list =
vk_alloc(&device->alloc, sizeof(struct list_head) * size, 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (!new_list) {
fprintf(stderr, "Failed to allocate host memory for cache bo list\n");
return false;
}
struct list_head *old_list = cache->size_list;
/* Move old list contents over (since the array has moved, and
* therefore the pointers to the list heads have to change).
*/
for (int i = 0; i < cache->size_list_size; i++) {
struct list_head *old_head = &cache->size_list[i];
if (list_is_empty(old_head)) {
list_inithead(&new_list[i]);
} else {
new_list[i].next = old_head->next;
new_list[i].prev = old_head->prev;
new_list[i].next->prev = &new_list[i];
new_list[i].prev->next = &new_list[i];
}
}
for (int i = cache->size_list_size; i < size; i++)
list_inithead(&new_list[i]);
cache->size_list = new_list;
cache->size_list_size = size;
vk_free(&device->alloc, old_list);
return true;
}
void
v3dv_bo_cache_init(struct v3dv_device *device)
{
device->bo_size = 0;
device->bo_count = 0;
list_inithead(&device->bo_cache.time_list);
/* FIXME: perhaps set a initial size for the size-list, to avoid run-time
* reallocations
*/
device->bo_cache.size_list_size = 0;
}
void
v3dv_bo_cache_destroy(struct v3dv_device *device)
{
bo_cache_free_all(device, true);
vk_free(&device->alloc, device->bo_cache.size_list);
}
static void
free_stale_bos(struct v3dv_device *device,
time_t time)
{
struct v3dv_bo_cache *cache = &device->bo_cache;
list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list,
time_list) {
/* If it's more than a second old, free it. */
if (time - bo->free_time > 2) {
bo_remove_from_cache(cache, bo);
bo_free(device, bo);
} else {
break;
}
}
}
bool
v3dv_bo_free(struct v3dv_device *device,
struct v3dv_bo *bo)
{
if (!bo)
return true;
struct timespec time;
struct v3dv_bo_cache *cache = &device->bo_cache;
uint32_t page_index = bo->size / 4096 - 1;
if (!bo->private)
return bo_free(device, bo);
clock_gettime(CLOCK_MONOTONIC, &time);
mtx_lock(&cache->lock);
if (cache->size_list_size <= page_index) {
if (!reallocate_size_list(cache, device, page_index + 1)) {
bool outcome = bo_free(device, bo);
/* If the reallocation failed, it usually means that we are out of
* memory, so we also free all the bo cache. We need to call it to
* not use the cache lock, as we are already under it.
*/
bo_cache_free_all(device, false);
mtx_unlock(&cache->lock);
return outcome;
}
}
bo->free_time = time.tv_sec;
list_addtail(&bo->size_list, &cache->size_list[page_index]);
list_addtail(&bo->time_list, &cache->time_list);
bo->name = NULL;
free_stale_bos(device, time.tv_sec);
mtx_unlock(&cache->lock);
return true;
}

View file

@ -37,9 +37,22 @@ struct v3dv_bo {
void *map;
const char *name;
/** Entry in the linked list of buffers freed, by age. */
struct list_head time_list;
/** Entry in the per-page-count linked list of buffers freed (by age). */
struct list_head size_list;
/** Approximate second when the bo was freed. */
time_t free_time;
/**
* Whether only our process has a reference to the BO (meaning that
* it's safe to reuse it in the BO cache).
*/
bool private;
};
struct v3dv_bo *v3dv_bo_alloc(struct v3dv_device *device, uint32_t size, const char *name);
struct v3dv_bo *v3dv_bo_alloc(struct v3dv_device *device, uint32_t size, const char *name, bool private);
bool v3dv_bo_free(struct v3dv_device *device, struct v3dv_bo *bo);
@ -51,4 +64,7 @@ bool v3dv_bo_map(struct v3dv_device *device, struct v3dv_bo *bo, uint32_t size);
void v3dv_bo_unmap(struct v3dv_device *device, struct v3dv_bo *bo);
void v3dv_bo_cache_init(struct v3dv_device *device);
void v3dv_bo_cache_destroy(struct v3dv_device *device);
#endif /* V3DV_BO_H */

View file

@ -51,7 +51,7 @@ v3dv_cl_destroy(struct v3dv_cl *cl)
static bool
cl_alloc_bo(struct v3dv_cl *cl, uint32_t space, bool use_branch)
{
struct v3dv_bo *bo = v3dv_bo_alloc(cl->job->device, space, "CL");
struct v3dv_bo *bo = v3dv_bo_alloc(cl->job->device, space, "CL", true);
if (!bo) {
fprintf(stderr, "failed to allocate memory for command list\n");
v3dv_flag_oom(NULL, cl->job);

View file

@ -519,7 +519,7 @@ v3dv_job_start_frame(struct v3dv_job *job,
tile_alloc_size += 512 * 1024;
job->tile_alloc = v3dv_bo_alloc(job->device, tile_alloc_size,
"tile_alloc");
"tile_alloc", true);
if (!job->tile_alloc) {
v3dv_flag_oom(NULL, job);
return;
@ -532,7 +532,7 @@ v3dv_job_start_frame(struct v3dv_job *job,
tiling->draw_tiles_x *
tiling->draw_tiles_y *
tsda_per_tile_size;
job->tile_state = v3dv_bo_alloc(job->device, tile_state_size, "TSDA");
job->tile_state = v3dv_bo_alloc(job->device, tile_state_size, "TSDA", true);
if (!job->tile_state) {
v3dv_flag_oom(NULL, job);
return;

View file

@ -413,7 +413,7 @@ v3dv_CreateDescriptorPool(VkDevice _device,
pool->max_entry_count = pCreateInfo->maxSets;
if (bo_size > 0) {
pool->bo = v3dv_bo_alloc(device, bo_size, "descriptor pool bo");
pool->bo = v3dv_bo_alloc(device, bo_size, "descriptor pool bo", true);
if (!pool->bo)
goto out_of_device_memory;

View file

@ -1286,6 +1286,7 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
init_device_dispatch(device);
init_device_meta(device);
v3dv_bo_cache_init(device);
*pDevice = v3dv_device_to_handle(device);
@ -1308,6 +1309,7 @@ v3dv_DestroyDevice(VkDevice _device,
pthread_mutex_destroy(&device->mutex);
drmSyncobjDestroy(device->render_fd, device->last_job_sync);
destroy_device_meta(device);
v3dv_bo_cache_destroy(device);
vk_free2(&default_alloc, pAllocator, device);
}
@ -1362,7 +1364,7 @@ device_alloc(struct v3dv_device *device,
{
/* Our kernel interface is 32-bit */
assert((size & 0xffffffff) == size);
mem->bo = v3dv_bo_alloc(device, size, "device_alloc");
mem->bo = v3dv_bo_alloc(device, size, "device_alloc", false);
if (!mem->bo)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
return VK_SUCCESS;

View file

@ -1787,7 +1787,7 @@ v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
struct v3dv_bo *src_bo =
v3dv_bo_alloc(cmd_buffer->device, dataSize, "vkCmdUpdateBuffer");
v3dv_bo_alloc(cmd_buffer->device, dataSize, "vkCmdUpdateBuffer", true);
if (!src_bo) {
fprintf(stderr, "Failed to allocate BO for vkCmdUpdateBuffer.\n");
return;

View file

@ -1160,7 +1160,7 @@ upload_assembly(struct v3dv_pipeline_stage *p_stage,
break;
};
struct v3dv_bo *bo = v3dv_bo_alloc(device, size, name);
struct v3dv_bo *bo = v3dv_bo_alloc(device, size, name, true);
if (!bo) {
fprintf(stderr, "failed to allocate memory for shader\n");
return false;
@ -2262,7 +2262,8 @@ create_default_attribute_values(struct v3dv_pipeline *pipeline,
if (pipeline->default_attribute_values == NULL) {
pipeline->default_attribute_values = v3dv_bo_alloc(pipeline->device, size,
"default_vi_attributes");
"default_vi_attributes",
true);
if (!pipeline->default_attribute_values) {
fprintf(stderr, "failed to allocate memory for the default "

View file

@ -290,6 +290,19 @@ struct v3dv_device {
struct hash_table *cache[3]; /* v3dv_meta_blit_pipeline for 1d, 2d, 3d */
} blit;
} meta;
struct v3dv_bo_cache {
/** List of struct v3d_bo freed, by age. */
struct list_head time_list;
/** List of struct v3d_bo freed, per size, by age. */
struct list_head *size_list;
uint32_t size_list_size;
mtx_t lock;
} bo_cache;
uint32_t bo_size;
uint32_t bo_count;
};
struct v3dv_device_memory {

View file

@ -59,7 +59,7 @@ v3dv_CreateQueryPool(VkDevice _device,
uint32_t i;
for (i = 0; i < pool->query_count; i++) {
pool->queries[i].maybe_available = false;
pool->queries[i].bo = v3dv_bo_alloc(device, 4096, "query");
pool->queries[i].bo = v3dv_bo_alloc(device, 4096, "query", true);
if (!pool->queries[i].bo) {
result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
goto fail_alloc_bo;

View file

@ -48,7 +48,8 @@ check_push_constants_ubo(struct v3dv_cmd_buffer *cmd_buffer)
if (cmd_buffer->push_constants_resource.bo == NULL) {
cmd_buffer->push_constants_resource.bo =
v3dv_bo_alloc(cmd_buffer->device, MAX_PUSH_CONSTANTS_SIZE, "push constants");
v3dv_bo_alloc(cmd_buffer->device, MAX_PUSH_CONSTANTS_SIZE,
"push constants", true);
if (!cmd_buffer->push_constants_resource.bo) {
fprintf(stderr, "Failed to allocate memory for push constants\n");