mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-27 12:40:09 +01:00
v3dv/bo: adding a BO cache
Heavily based on the already existing for the v3d OpenGL driver, but without references, and with some extra OOM checks (Vulkan CTS has several OOM tests). With this commit v3dv_bo_alloc and v3dv_bo_free became frontends to the bo_cache. The former tries to get a BO from the cache if possible, and the latter stores the BO on the cache if possible. The former also adds a new parameter to point if the BO to allocate is private. As v3d we are only caching private BOs, those created by the driver for internal use (like CLs, tile_alloc, etc). They are the ones with the highest change of being reused (for example, CL BOs are always 4KB, so they can always be reused). User-created BOs can have any size, including some very large ones for buffers and images, which makes them far less likely to be reused and would add a lot of memory pressure if we decided to cache them. In any case, in practice, we found that we could get a performance improvement by caching also user-created BOs, but that would need more care and an analysis to decide which ones makes sense. Would also require to change how the cached BOs are stored by size. Right now there are an array of list_head, that doesn't work well with big BOs. If done, that would be handled on a separate commit. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>
This commit is contained in:
parent
e1a11b8154
commit
2adea940f1
11 changed files with 287 additions and 43 deletions
|
|
@ -29,44 +29,49 @@
|
|||
#include "drm-uapi/v3d_drm.h"
|
||||
#include "util/u_memory.h"
|
||||
|
||||
struct v3dv_bo *
|
||||
v3dv_bo_alloc(struct v3dv_device *device, uint32_t size, const char *name)
|
||||
static void
|
||||
bo_remove_from_cache(struct v3dv_bo_cache *cache, struct v3dv_bo *bo)
|
||||
{
|
||||
struct v3dv_bo *bo = vk_alloc(&device->alloc, sizeof(struct v3dv_bo), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
if (!bo) {
|
||||
fprintf(stderr, "Failed to allocate host memory for BO\n");
|
||||
list_del(&bo->time_list);
|
||||
list_del(&bo->size_list);
|
||||
}
|
||||
|
||||
static struct v3dv_bo *
|
||||
bo_from_cache(struct v3dv_device *device, uint32_t size, const char *name)
|
||||
{
|
||||
struct v3dv_bo_cache *cache = &device->bo_cache;
|
||||
uint32_t page_index = size / 4096 - 1;
|
||||
|
||||
if (cache->size_list_size <= page_index)
|
||||
return NULL;
|
||||
|
||||
struct v3dv_bo *bo = NULL;
|
||||
|
||||
mtx_lock(&cache->lock);
|
||||
if (!list_is_empty(&cache->size_list[page_index])) {
|
||||
bo = list_first_entry(&cache->size_list[page_index],
|
||||
struct v3dv_bo, size_list);
|
||||
|
||||
/* Check that the BO has gone idle. If not, then we want to
|
||||
* allocate something new instead, since we assume that the
|
||||
* user will proceed to CPU map it and fill it with stuff.
|
||||
*/
|
||||
if (!v3dv_bo_wait(device, bo, 0)) {
|
||||
mtx_unlock(&cache->lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bo_remove_from_cache(cache, bo);
|
||||
|
||||
bo->name = name;
|
||||
}
|
||||
|
||||
const uint32_t page_align = 4096; /* Always allocate full pages */
|
||||
size = align(size, page_align);
|
||||
struct drm_v3d_create_bo create = {
|
||||
.size = size
|
||||
};
|
||||
|
||||
int ret = v3dv_ioctl(device->render_fd, DRM_IOCTL_V3D_CREATE_BO, &create);
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "Failed to allocate device memory for BO\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
assert(create.offset % page_align == 0);
|
||||
assert((create.offset & 0xffffffff) == create.offset);
|
||||
|
||||
bo->handle = create.handle;
|
||||
bo->size = size;
|
||||
bo->offset = create.offset;
|
||||
bo->map = NULL;
|
||||
bo->map_size = 0;
|
||||
bo->name = name;
|
||||
list_inithead(&bo->list_link);
|
||||
|
||||
mtx_unlock(&cache->lock);
|
||||
return bo;
|
||||
}
|
||||
|
||||
bool
|
||||
v3dv_bo_free(struct v3dv_device *device, struct v3dv_bo *bo)
|
||||
static bool
|
||||
bo_free(struct v3dv_device *device,
|
||||
struct v3dv_bo *bo)
|
||||
{
|
||||
if (!bo)
|
||||
return true;
|
||||
|
|
@ -81,11 +86,96 @@ v3dv_bo_free(struct v3dv_device *device, struct v3dv_bo *bo)
|
|||
if (ret != 0)
|
||||
fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno));
|
||||
|
||||
device->bo_count--;
|
||||
device->bo_size -= bo->size;
|
||||
vk_free(&device->alloc, bo);
|
||||
|
||||
return ret == 0;
|
||||
}
|
||||
|
||||
static void
|
||||
bo_cache_free_all(struct v3dv_device *device,
|
||||
bool with_lock)
|
||||
{
|
||||
struct v3dv_bo_cache *cache = &device->bo_cache;
|
||||
|
||||
if (with_lock)
|
||||
mtx_lock(&cache->lock);
|
||||
list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list,
|
||||
time_list) {
|
||||
bo_remove_from_cache(cache, bo);
|
||||
bo_free(device, bo);
|
||||
}
|
||||
if (with_lock)
|
||||
mtx_unlock(&cache->lock);
|
||||
|
||||
}
|
||||
|
||||
struct v3dv_bo *
|
||||
v3dv_bo_alloc(struct v3dv_device *device,
|
||||
uint32_t size,
|
||||
const char *name,
|
||||
bool private)
|
||||
{
|
||||
struct v3dv_bo *bo;
|
||||
|
||||
const uint32_t page_align = 4096; /* Always allocate full pages */
|
||||
size = align(size, page_align);
|
||||
|
||||
if (private) {
|
||||
bo = bo_from_cache(device, size, name);
|
||||
if (bo)
|
||||
return bo;
|
||||
}
|
||||
|
||||
bo = vk_alloc(&device->alloc, sizeof(struct v3dv_bo), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
|
||||
if (!bo) {
|
||||
fprintf(stderr, "Failed to allocate host memory for BO\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
retry:
|
||||
;
|
||||
|
||||
bool cleared_and_retried = false;
|
||||
struct drm_v3d_create_bo create = {
|
||||
.size = size
|
||||
};
|
||||
|
||||
int ret = v3dv_ioctl(device->render_fd, DRM_IOCTL_V3D_CREATE_BO, &create);
|
||||
if (ret != 0) {
|
||||
if (!list_is_empty(&device->bo_cache.time_list) &&
|
||||
!cleared_and_retried) {
|
||||
cleared_and_retried = true;
|
||||
bo_cache_free_all(device, true);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
vk_free(&device->alloc, bo);
|
||||
fprintf(stderr, "Failed to allocate device memory for BO\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
assert(create.offset % page_align == 0);
|
||||
assert((create.offset & 0xffffffff) == create.offset);
|
||||
|
||||
bo->handle = create.handle;
|
||||
bo->size = size;
|
||||
bo->offset = create.offset;
|
||||
bo->map = NULL;
|
||||
bo->map_size = 0;
|
||||
bo->name = name;
|
||||
bo->private = private;
|
||||
list_inithead(&bo->list_link);
|
||||
|
||||
device->bo_count++;
|
||||
device->bo_size += bo->size;
|
||||
|
||||
return bo;
|
||||
}
|
||||
|
||||
bool
|
||||
v3dv_bo_map_unsynchronized(struct v3dv_device *device,
|
||||
struct v3dv_bo *bo,
|
||||
|
|
@ -93,6 +183,9 @@ v3dv_bo_map_unsynchronized(struct v3dv_device *device,
|
|||
{
|
||||
assert(bo != NULL && size <= bo->size);
|
||||
|
||||
if (bo->map)
|
||||
return bo->map;
|
||||
|
||||
struct drm_v3d_mmap_bo map;
|
||||
memset(&map, 0, sizeof(map));
|
||||
map.handle = bo->handle;
|
||||
|
|
@ -158,3 +251,121 @@ v3dv_bo_unmap(struct v3dv_device *device, struct v3dv_bo *bo)
|
|||
bo->map_size = 0;
|
||||
}
|
||||
|
||||
static boolean
|
||||
reallocate_size_list(struct v3dv_bo_cache *cache,
|
||||
struct v3dv_device *device,
|
||||
uint32_t size)
|
||||
{
|
||||
struct list_head *new_list =
|
||||
vk_alloc(&device->alloc, sizeof(struct list_head) * size, 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
|
||||
if (!new_list) {
|
||||
fprintf(stderr, "Failed to allocate host memory for cache bo list\n");
|
||||
return false;
|
||||
}
|
||||
struct list_head *old_list = cache->size_list;
|
||||
|
||||
/* Move old list contents over (since the array has moved, and
|
||||
* therefore the pointers to the list heads have to change).
|
||||
*/
|
||||
for (int i = 0; i < cache->size_list_size; i++) {
|
||||
struct list_head *old_head = &cache->size_list[i];
|
||||
if (list_is_empty(old_head)) {
|
||||
list_inithead(&new_list[i]);
|
||||
} else {
|
||||
new_list[i].next = old_head->next;
|
||||
new_list[i].prev = old_head->prev;
|
||||
new_list[i].next->prev = &new_list[i];
|
||||
new_list[i].prev->next = &new_list[i];
|
||||
}
|
||||
}
|
||||
for (int i = cache->size_list_size; i < size; i++)
|
||||
list_inithead(&new_list[i]);
|
||||
|
||||
cache->size_list = new_list;
|
||||
cache->size_list_size = size;
|
||||
vk_free(&device->alloc, old_list);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
v3dv_bo_cache_init(struct v3dv_device *device)
|
||||
{
|
||||
device->bo_size = 0;
|
||||
device->bo_count = 0;
|
||||
list_inithead(&device->bo_cache.time_list);
|
||||
/* FIXME: perhaps set a initial size for the size-list, to avoid run-time
|
||||
* reallocations
|
||||
*/
|
||||
device->bo_cache.size_list_size = 0;
|
||||
}
|
||||
|
||||
void
|
||||
v3dv_bo_cache_destroy(struct v3dv_device *device)
|
||||
{
|
||||
bo_cache_free_all(device, true);
|
||||
vk_free(&device->alloc, device->bo_cache.size_list);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
free_stale_bos(struct v3dv_device *device,
|
||||
time_t time)
|
||||
{
|
||||
struct v3dv_bo_cache *cache = &device->bo_cache;
|
||||
|
||||
list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list,
|
||||
time_list) {
|
||||
/* If it's more than a second old, free it. */
|
||||
if (time - bo->free_time > 2) {
|
||||
bo_remove_from_cache(cache, bo);
|
||||
bo_free(device, bo);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
v3dv_bo_free(struct v3dv_device *device,
|
||||
struct v3dv_bo *bo)
|
||||
{
|
||||
if (!bo)
|
||||
return true;
|
||||
|
||||
struct timespec time;
|
||||
struct v3dv_bo_cache *cache = &device->bo_cache;
|
||||
uint32_t page_index = bo->size / 4096 - 1;
|
||||
|
||||
if (!bo->private)
|
||||
return bo_free(device, bo);
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &time);
|
||||
mtx_lock(&cache->lock);
|
||||
|
||||
if (cache->size_list_size <= page_index) {
|
||||
if (!reallocate_size_list(cache, device, page_index + 1)) {
|
||||
bool outcome = bo_free(device, bo);
|
||||
/* If the reallocation failed, it usually means that we are out of
|
||||
* memory, so we also free all the bo cache. We need to call it to
|
||||
* not use the cache lock, as we are already under it.
|
||||
*/
|
||||
bo_cache_free_all(device, false);
|
||||
mtx_unlock(&cache->lock);
|
||||
return outcome;
|
||||
}
|
||||
}
|
||||
|
||||
bo->free_time = time.tv_sec;
|
||||
list_addtail(&bo->size_list, &cache->size_list[page_index]);
|
||||
list_addtail(&bo->time_list, &cache->time_list);
|
||||
bo->name = NULL;
|
||||
|
||||
free_stale_bos(device, time.tv_sec);
|
||||
|
||||
mtx_unlock(&cache->lock);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,9 +37,22 @@ struct v3dv_bo {
|
|||
void *map;
|
||||
|
||||
const char *name;
|
||||
|
||||
/** Entry in the linked list of buffers freed, by age. */
|
||||
struct list_head time_list;
|
||||
/** Entry in the per-page-count linked list of buffers freed (by age). */
|
||||
struct list_head size_list;
|
||||
/** Approximate second when the bo was freed. */
|
||||
time_t free_time;
|
||||
|
||||
/**
|
||||
* Whether only our process has a reference to the BO (meaning that
|
||||
* it's safe to reuse it in the BO cache).
|
||||
*/
|
||||
bool private;
|
||||
};
|
||||
|
||||
struct v3dv_bo *v3dv_bo_alloc(struct v3dv_device *device, uint32_t size, const char *name);
|
||||
struct v3dv_bo *v3dv_bo_alloc(struct v3dv_device *device, uint32_t size, const char *name, bool private);
|
||||
|
||||
bool v3dv_bo_free(struct v3dv_device *device, struct v3dv_bo *bo);
|
||||
|
||||
|
|
@ -51,4 +64,7 @@ bool v3dv_bo_map(struct v3dv_device *device, struct v3dv_bo *bo, uint32_t size);
|
|||
|
||||
void v3dv_bo_unmap(struct v3dv_device *device, struct v3dv_bo *bo);
|
||||
|
||||
void v3dv_bo_cache_init(struct v3dv_device *device);
|
||||
void v3dv_bo_cache_destroy(struct v3dv_device *device);
|
||||
|
||||
#endif /* V3DV_BO_H */
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ v3dv_cl_destroy(struct v3dv_cl *cl)
|
|||
static bool
|
||||
cl_alloc_bo(struct v3dv_cl *cl, uint32_t space, bool use_branch)
|
||||
{
|
||||
struct v3dv_bo *bo = v3dv_bo_alloc(cl->job->device, space, "CL");
|
||||
struct v3dv_bo *bo = v3dv_bo_alloc(cl->job->device, space, "CL", true);
|
||||
if (!bo) {
|
||||
fprintf(stderr, "failed to allocate memory for command list\n");
|
||||
v3dv_flag_oom(NULL, cl->job);
|
||||
|
|
|
|||
|
|
@ -519,7 +519,7 @@ v3dv_job_start_frame(struct v3dv_job *job,
|
|||
tile_alloc_size += 512 * 1024;
|
||||
|
||||
job->tile_alloc = v3dv_bo_alloc(job->device, tile_alloc_size,
|
||||
"tile_alloc");
|
||||
"tile_alloc", true);
|
||||
if (!job->tile_alloc) {
|
||||
v3dv_flag_oom(NULL, job);
|
||||
return;
|
||||
|
|
@ -532,7 +532,7 @@ v3dv_job_start_frame(struct v3dv_job *job,
|
|||
tiling->draw_tiles_x *
|
||||
tiling->draw_tiles_y *
|
||||
tsda_per_tile_size;
|
||||
job->tile_state = v3dv_bo_alloc(job->device, tile_state_size, "TSDA");
|
||||
job->tile_state = v3dv_bo_alloc(job->device, tile_state_size, "TSDA", true);
|
||||
if (!job->tile_state) {
|
||||
v3dv_flag_oom(NULL, job);
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -413,7 +413,7 @@ v3dv_CreateDescriptorPool(VkDevice _device,
|
|||
pool->max_entry_count = pCreateInfo->maxSets;
|
||||
|
||||
if (bo_size > 0) {
|
||||
pool->bo = v3dv_bo_alloc(device, bo_size, "descriptor pool bo");
|
||||
pool->bo = v3dv_bo_alloc(device, bo_size, "descriptor pool bo", true);
|
||||
if (!pool->bo)
|
||||
goto out_of_device_memory;
|
||||
|
||||
|
|
|
|||
|
|
@ -1286,6 +1286,7 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
|
||||
init_device_dispatch(device);
|
||||
init_device_meta(device);
|
||||
v3dv_bo_cache_init(device);
|
||||
|
||||
*pDevice = v3dv_device_to_handle(device);
|
||||
|
||||
|
|
@ -1308,6 +1309,7 @@ v3dv_DestroyDevice(VkDevice _device,
|
|||
pthread_mutex_destroy(&device->mutex);
|
||||
drmSyncobjDestroy(device->render_fd, device->last_job_sync);
|
||||
destroy_device_meta(device);
|
||||
v3dv_bo_cache_destroy(device);
|
||||
|
||||
vk_free2(&default_alloc, pAllocator, device);
|
||||
}
|
||||
|
|
@ -1362,7 +1364,7 @@ device_alloc(struct v3dv_device *device,
|
|||
{
|
||||
/* Our kernel interface is 32-bit */
|
||||
assert((size & 0xffffffff) == size);
|
||||
mem->bo = v3dv_bo_alloc(device, size, "device_alloc");
|
||||
mem->bo = v3dv_bo_alloc(device, size, "device_alloc", false);
|
||||
if (!mem->bo)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
return VK_SUCCESS;
|
||||
|
|
|
|||
|
|
@ -1787,7 +1787,7 @@ v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
|
|||
V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
|
||||
|
||||
struct v3dv_bo *src_bo =
|
||||
v3dv_bo_alloc(cmd_buffer->device, dataSize, "vkCmdUpdateBuffer");
|
||||
v3dv_bo_alloc(cmd_buffer->device, dataSize, "vkCmdUpdateBuffer", true);
|
||||
if (!src_bo) {
|
||||
fprintf(stderr, "Failed to allocate BO for vkCmdUpdateBuffer.\n");
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -1160,7 +1160,7 @@ upload_assembly(struct v3dv_pipeline_stage *p_stage,
|
|||
break;
|
||||
};
|
||||
|
||||
struct v3dv_bo *bo = v3dv_bo_alloc(device, size, name);
|
||||
struct v3dv_bo *bo = v3dv_bo_alloc(device, size, name, true);
|
||||
if (!bo) {
|
||||
fprintf(stderr, "failed to allocate memory for shader\n");
|
||||
return false;
|
||||
|
|
@ -2262,7 +2262,8 @@ create_default_attribute_values(struct v3dv_pipeline *pipeline,
|
|||
|
||||
if (pipeline->default_attribute_values == NULL) {
|
||||
pipeline->default_attribute_values = v3dv_bo_alloc(pipeline->device, size,
|
||||
"default_vi_attributes");
|
||||
"default_vi_attributes",
|
||||
true);
|
||||
|
||||
if (!pipeline->default_attribute_values) {
|
||||
fprintf(stderr, "failed to allocate memory for the default "
|
||||
|
|
|
|||
|
|
@ -290,6 +290,19 @@ struct v3dv_device {
|
|||
struct hash_table *cache[3]; /* v3dv_meta_blit_pipeline for 1d, 2d, 3d */
|
||||
} blit;
|
||||
} meta;
|
||||
|
||||
struct v3dv_bo_cache {
|
||||
/** List of struct v3d_bo freed, by age. */
|
||||
struct list_head time_list;
|
||||
/** List of struct v3d_bo freed, per size, by age. */
|
||||
struct list_head *size_list;
|
||||
uint32_t size_list_size;
|
||||
|
||||
mtx_t lock;
|
||||
} bo_cache;
|
||||
|
||||
uint32_t bo_size;
|
||||
uint32_t bo_count;
|
||||
};
|
||||
|
||||
struct v3dv_device_memory {
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ v3dv_CreateQueryPool(VkDevice _device,
|
|||
uint32_t i;
|
||||
for (i = 0; i < pool->query_count; i++) {
|
||||
pool->queries[i].maybe_available = false;
|
||||
pool->queries[i].bo = v3dv_bo_alloc(device, 4096, "query");
|
||||
pool->queries[i].bo = v3dv_bo_alloc(device, 4096, "query", true);
|
||||
if (!pool->queries[i].bo) {
|
||||
result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
goto fail_alloc_bo;
|
||||
|
|
|
|||
|
|
@ -48,7 +48,8 @@ check_push_constants_ubo(struct v3dv_cmd_buffer *cmd_buffer)
|
|||
|
||||
if (cmd_buffer->push_constants_resource.bo == NULL) {
|
||||
cmd_buffer->push_constants_resource.bo =
|
||||
v3dv_bo_alloc(cmd_buffer->device, MAX_PUSH_CONSTANTS_SIZE, "push constants");
|
||||
v3dv_bo_alloc(cmd_buffer->device, MAX_PUSH_CONSTANTS_SIZE,
|
||||
"push constants", true);
|
||||
|
||||
if (!cmd_buffer->push_constants_resource.bo) {
|
||||
fprintf(stderr, "Failed to allocate memory for push constants\n");
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue