mesa/src/broadcom/vulkan/v3dv_bo.c
Alejandro Piñeiro 944b08e597 v3dv/bo: reduce DEFAULT_MAX_BO_CACHE_SIZE to 64
The best way to tune this value is to test Vulkan
applications. Current somewhat big value (512), was obtained by
testing only vkQuake2. Additionally at that time the bo cache was the
first performance oriented improvement we implemented.

After more improvements were included, and retested with more
applications, the conclusion is that we can reduce the value. More
info on the issue that closes.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7090

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18398>
2022-09-05 22:25:09 +00:00

517 lines
14 KiB
C

/*
* Copyright © 2019 Raspberry Pi Ltd
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "v3dv_private.h"
#include <errno.h>
#include <sys/mman.h>
#include "drm-uapi/v3d_drm.h"
#include "util/u_memory.h"
/* Default max size of the bo cache, in MB.
*
* This value comes from testing different Vulkan application. Greater values
* didn't get any further performance benefit. This looks somewhat small, but
* from testing those applications, the main consumer of the bo cache are
* the bos used for the CLs, that are usually small.
*/
#define DEFAULT_MAX_BO_CACHE_SIZE 64
/* Discarded to use a V3D_DEBUG for this, as it would mean adding a run-time
* check for most of the calls
*/
static const bool dump_stats = false;
static void
bo_dump_stats(struct v3dv_device *device)
{
struct v3dv_bo_cache *cache = &device->bo_cache;
fprintf(stderr, " BOs allocated: %d\n", device->bo_count);
fprintf(stderr, " BOs size: %dkb\n", device->bo_size / 1024);
fprintf(stderr, " BOs cached: %d\n", cache->cache_count);
fprintf(stderr, " BOs cached size: %dkb\n", cache->cache_size / 1024);
if (!list_is_empty(&cache->time_list)) {
struct v3dv_bo *first = list_first_entry(&cache->time_list,
struct v3dv_bo,
time_list);
struct v3dv_bo *last = list_last_entry(&cache->time_list,
struct v3dv_bo,
time_list);
fprintf(stderr, " oldest cache time: %ld\n",
(long)first->free_time);
fprintf(stderr, " newest cache time: %ld\n",
(long)last->free_time);
struct timespec time;
clock_gettime(CLOCK_MONOTONIC, &time);
fprintf(stderr, " now: %lld\n",
(long long)time.tv_sec);
}
if (cache->size_list_size) {
uint32_t empty_size_list = 0;
for (uint32_t i = 0; i < cache->size_list_size; i++) {
if (list_is_empty(&cache->size_list[i]))
empty_size_list++;
}
fprintf(stderr, " Empty size_list lists: %d\n", empty_size_list);
}
}
static void
bo_remove_from_cache(struct v3dv_bo_cache *cache, struct v3dv_bo *bo)
{
list_del(&bo->time_list);
list_del(&bo->size_list);
cache->cache_count--;
cache->cache_size -= bo->size;
}
static struct v3dv_bo *
bo_from_cache(struct v3dv_device *device, uint32_t size, const char *name)
{
struct v3dv_bo_cache *cache = &device->bo_cache;
uint32_t page_index = size / 4096 - 1;
if (cache->size_list_size <= page_index)
return NULL;
struct v3dv_bo *bo = NULL;
mtx_lock(&cache->lock);
if (!list_is_empty(&cache->size_list[page_index])) {
bo = list_first_entry(&cache->size_list[page_index],
struct v3dv_bo, size_list);
/* Check that the BO has gone idle. If not, then we want to
* allocate something new instead, since we assume that the
* user will proceed to CPU map it and fill it with stuff.
*/
if (!v3dv_bo_wait(device, bo, 0)) {
mtx_unlock(&cache->lock);
return NULL;
}
bo_remove_from_cache(cache, bo);
bo->name = name;
p_atomic_set(&bo->refcnt, 1);
}
mtx_unlock(&cache->lock);
return bo;
}
static bool
bo_free(struct v3dv_device *device,
struct v3dv_bo *bo)
{
if (!bo)
return true;
assert(p_atomic_read(&bo->refcnt) == 0);
assert(bo->map == NULL);
/* Our BO structs are stored in a sparse array in the physical device,
* so we don't want to free the BO pointer, instead we want to reset it
* to 0, to signal that array entry as being free.
*/
uint32_t handle = bo->handle;
memset(bo, 0, sizeof(*bo));
struct drm_gem_close c;
memset(&c, 0, sizeof(c));
c.handle = handle;
int ret = v3dv_ioctl(device->pdevice->render_fd, DRM_IOCTL_GEM_CLOSE, &c);
if (ret != 0)
fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno));
device->bo_count--;
device->bo_size -= bo->size;
if (dump_stats) {
fprintf(stderr, "Freed %s%s%dkb:\n",
bo->name ? bo->name : "",
bo->name ? " " : "",
bo->size / 1024);
bo_dump_stats(device);
}
return ret == 0;
}
static void
bo_cache_free_all(struct v3dv_device *device,
bool with_lock)
{
struct v3dv_bo_cache *cache = &device->bo_cache;
if (with_lock)
mtx_lock(&cache->lock);
list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list,
time_list) {
bo_remove_from_cache(cache, bo);
bo_free(device, bo);
}
if (with_lock)
mtx_unlock(&cache->lock);
}
void
v3dv_bo_init(struct v3dv_bo *bo,
uint32_t handle,
uint32_t size,
uint32_t offset,
const char *name,
bool private)
{
p_atomic_set(&bo->refcnt, 1);
bo->handle = handle;
bo->handle_bit = 1ull << (handle % 64);
bo->size = size;
bo->offset = offset;
bo->map = NULL;
bo->map_size = 0;
bo->name = name;
bo->private = private;
bo->dumb_handle = -1;
list_inithead(&bo->list_link);
}
struct v3dv_bo *
v3dv_bo_alloc(struct v3dv_device *device,
uint32_t size,
const char *name,
bool private)
{
struct v3dv_bo *bo;
const uint32_t page_align = 4096; /* Always allocate full pages */
size = align(size, page_align);
if (private) {
bo = bo_from_cache(device, size, name);
if (bo) {
if (dump_stats) {
fprintf(stderr, "Allocated %s %dkb from cache:\n",
name, size / 1024);
bo_dump_stats(device);
}
return bo;
}
}
retry:
;
bool cleared_and_retried = false;
struct drm_v3d_create_bo create = {
.size = size
};
int ret = v3dv_ioctl(device->pdevice->render_fd,
DRM_IOCTL_V3D_CREATE_BO, &create);
if (ret != 0) {
if (!list_is_empty(&device->bo_cache.time_list) &&
!cleared_and_retried) {
cleared_and_retried = true;
bo_cache_free_all(device, true);
goto retry;
}
fprintf(stderr, "Failed to allocate device memory for BO\n");
return NULL;
}
assert(create.offset % page_align == 0);
assert((create.offset & 0xffffffff) == create.offset);
bo = v3dv_device_lookup_bo(device->pdevice, create.handle);
assert(bo && bo->handle == 0);
v3dv_bo_init(bo, create.handle, size, create.offset, name, private);
device->bo_count++;
device->bo_size += bo->size;
if (dump_stats) {
fprintf(stderr, "Allocated %s %dkb:\n", name, size / 1024);
bo_dump_stats(device);
}
return bo;
}
bool
v3dv_bo_map_unsynchronized(struct v3dv_device *device,
struct v3dv_bo *bo,
uint32_t size)
{
assert(bo != NULL && size <= bo->size);
if (bo->map)
return bo->map;
struct drm_v3d_mmap_bo map;
memset(&map, 0, sizeof(map));
map.handle = bo->handle;
int ret = v3dv_ioctl(device->pdevice->render_fd,
DRM_IOCTL_V3D_MMAP_BO, &map);
if (ret != 0) {
fprintf(stderr, "map ioctl failure\n");
return false;
}
bo->map = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
device->pdevice->render_fd, map.offset);
if (bo->map == MAP_FAILED) {
fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
bo->handle, (long long)map.offset, (uint32_t)bo->size);
return false;
}
VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, false));
bo->map_size = size;
return true;
}
bool
v3dv_bo_wait(struct v3dv_device *device,
struct v3dv_bo *bo,
uint64_t timeout_ns)
{
struct drm_v3d_wait_bo wait = {
.handle = bo->handle,
.timeout_ns = timeout_ns,
};
return v3dv_ioctl(device->pdevice->render_fd,
DRM_IOCTL_V3D_WAIT_BO, &wait) == 0;
}
bool
v3dv_bo_map(struct v3dv_device *device, struct v3dv_bo *bo, uint32_t size)
{
assert(bo && size <= bo->size);
bool ok = v3dv_bo_map_unsynchronized(device, bo, size);
if (!ok)
return false;
ok = v3dv_bo_wait(device, bo, PIPE_TIMEOUT_INFINITE);
if (!ok) {
fprintf(stderr, "memory wait for map failed\n");
return false;
}
return true;
}
void
v3dv_bo_unmap(struct v3dv_device *device, struct v3dv_bo *bo)
{
assert(bo && bo->map && bo->map_size > 0);
munmap(bo->map, bo->map_size);
VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0));
bo->map = NULL;
bo->map_size = 0;
}
static boolean
reallocate_size_list(struct v3dv_bo_cache *cache,
struct v3dv_device *device,
uint32_t size)
{
struct list_head *new_list =
vk_alloc(&device->vk.alloc, sizeof(struct list_head) * size, 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (!new_list) {
fprintf(stderr, "Failed to allocate host memory for cache bo list\n");
return false;
}
struct list_head *old_list = cache->size_list;
/* Move old list contents over (since the array has moved, and
* therefore the pointers to the list heads have to change).
*/
for (int i = 0; i < cache->size_list_size; i++) {
struct list_head *old_head = &cache->size_list[i];
if (list_is_empty(old_head)) {
list_inithead(&new_list[i]);
} else {
new_list[i].next = old_head->next;
new_list[i].prev = old_head->prev;
new_list[i].next->prev = &new_list[i];
new_list[i].prev->next = &new_list[i];
}
}
for (int i = cache->size_list_size; i < size; i++)
list_inithead(&new_list[i]);
cache->size_list = new_list;
cache->size_list_size = size;
vk_free(&device->vk.alloc, old_list);
return true;
}
void
v3dv_bo_cache_init(struct v3dv_device *device)
{
device->bo_size = 0;
device->bo_count = 0;
list_inithead(&device->bo_cache.time_list);
/* FIXME: perhaps set a initial size for the size-list, to avoid run-time
* reallocations
*/
device->bo_cache.size_list_size = 0;
const char *max_cache_size_str = getenv("V3DV_MAX_BO_CACHE_SIZE");
if (max_cache_size_str == NULL)
device->bo_cache.max_cache_size = DEFAULT_MAX_BO_CACHE_SIZE;
else
device->bo_cache.max_cache_size = atoll(max_cache_size_str);
if (dump_stats) {
fprintf(stderr, "MAX BO CACHE SIZE: %iMB\n", device->bo_cache.max_cache_size);
}
device->bo_cache.max_cache_size *= 1024 * 1024;
device->bo_cache.cache_count = 0;
device->bo_cache.cache_size = 0;
}
void
v3dv_bo_cache_destroy(struct v3dv_device *device)
{
bo_cache_free_all(device, true);
vk_free(&device->vk.alloc, device->bo_cache.size_list);
if (dump_stats) {
fprintf(stderr, "BO stats after screen destroy:\n");
bo_dump_stats(device);
}
}
static void
free_stale_bos(struct v3dv_device *device,
time_t time)
{
struct v3dv_bo_cache *cache = &device->bo_cache;
bool freed_any = false;
list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list,
time_list) {
/* If it's more than a second old, free it. */
if (time - bo->free_time > 2) {
if (dump_stats && !freed_any) {
fprintf(stderr, "Freeing stale BOs:\n");
bo_dump_stats(device);
freed_any = true;
}
bo_remove_from_cache(cache, bo);
bo_free(device, bo);
} else {
break;
}
}
if (dump_stats && freed_any) {
fprintf(stderr, "Freed stale BOs:\n");
bo_dump_stats(device);
}
}
bool
v3dv_bo_free(struct v3dv_device *device,
struct v3dv_bo *bo)
{
if (!bo)
return true;
if (!p_atomic_dec_zero(&bo->refcnt))
return true;
if (bo->map)
v3dv_bo_unmap(device, bo);
struct timespec time;
struct v3dv_bo_cache *cache = &device->bo_cache;
uint32_t page_index = bo->size / 4096 - 1;
if (bo->private &&
bo->size > cache->max_cache_size - cache->cache_size) {
clock_gettime(CLOCK_MONOTONIC, &time);
mtx_lock(&cache->lock);
free_stale_bos(device, time.tv_sec);
mtx_unlock(&cache->lock);
}
if (!bo->private ||
bo->size > cache->max_cache_size - cache->cache_size) {
return bo_free(device, bo);
}
clock_gettime(CLOCK_MONOTONIC, &time);
mtx_lock(&cache->lock);
if (cache->size_list_size <= page_index) {
if (!reallocate_size_list(cache, device, page_index + 1)) {
bool outcome = bo_free(device, bo);
/* If the reallocation failed, it usually means that we are out of
* memory, so we also free all the bo cache. We need to call it to
* not use the cache lock, as we are already under it.
*/
bo_cache_free_all(device, false);
mtx_unlock(&cache->lock);
return outcome;
}
}
bo->free_time = time.tv_sec;
list_addtail(&bo->size_list, &cache->size_list[page_index]);
list_addtail(&bo->time_list, &cache->time_list);
cache->cache_count++;
cache->cache_size += bo->size;
if (dump_stats) {
fprintf(stderr, "Freed %s %dkb to cache:\n",
bo->name, bo->size / 1024);
bo_dump_stats(device);
}
bo->name = NULL;
free_stale_bos(device, time.tv_sec);
mtx_unlock(&cache->lock);
return true;
}