mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-21 04:38:09 +02:00
The best way to tune this value is to test Vulkan applications. Current somewhat big value (512), was obtained by testing only vkQuake2. Additionally at that time the bo cache was the first performance oriented improvement we implemented. After more improvements were included, and retested with more applications, the conclusion is that we can reduce the value. More info on the issue that closes. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7090 Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18398>
517 lines
14 KiB
C
517 lines
14 KiB
C
/*
|
|
* Copyright © 2019 Raspberry Pi Ltd
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "v3dv_private.h"
|
|
|
|
#include <errno.h>
|
|
#include <sys/mman.h>
|
|
|
|
#include "drm-uapi/v3d_drm.h"
|
|
#include "util/u_memory.h"
|
|
|
|
/* Default max size of the bo cache, in MB.
|
|
*
|
|
* This value comes from testing different Vulkan application. Greater values
|
|
* didn't get any further performance benefit. This looks somewhat small, but
|
|
* from testing those applications, the main consumer of the bo cache are
|
|
* the bos used for the CLs, that are usually small.
|
|
*/
|
|
#define DEFAULT_MAX_BO_CACHE_SIZE 64
|
|
|
|
/* Discarded to use a V3D_DEBUG for this, as it would mean adding a run-time
|
|
* check for most of the calls
|
|
*/
|
|
static const bool dump_stats = false;
|
|
|
|
static void
|
|
bo_dump_stats(struct v3dv_device *device)
|
|
{
|
|
struct v3dv_bo_cache *cache = &device->bo_cache;
|
|
|
|
fprintf(stderr, " BOs allocated: %d\n", device->bo_count);
|
|
fprintf(stderr, " BOs size: %dkb\n", device->bo_size / 1024);
|
|
fprintf(stderr, " BOs cached: %d\n", cache->cache_count);
|
|
fprintf(stderr, " BOs cached size: %dkb\n", cache->cache_size / 1024);
|
|
|
|
if (!list_is_empty(&cache->time_list)) {
|
|
struct v3dv_bo *first = list_first_entry(&cache->time_list,
|
|
struct v3dv_bo,
|
|
time_list);
|
|
struct v3dv_bo *last = list_last_entry(&cache->time_list,
|
|
struct v3dv_bo,
|
|
time_list);
|
|
|
|
fprintf(stderr, " oldest cache time: %ld\n",
|
|
(long)first->free_time);
|
|
fprintf(stderr, " newest cache time: %ld\n",
|
|
(long)last->free_time);
|
|
|
|
struct timespec time;
|
|
clock_gettime(CLOCK_MONOTONIC, &time);
|
|
fprintf(stderr, " now: %lld\n",
|
|
(long long)time.tv_sec);
|
|
}
|
|
|
|
if (cache->size_list_size) {
|
|
uint32_t empty_size_list = 0;
|
|
for (uint32_t i = 0; i < cache->size_list_size; i++) {
|
|
if (list_is_empty(&cache->size_list[i]))
|
|
empty_size_list++;
|
|
}
|
|
fprintf(stderr, " Empty size_list lists: %d\n", empty_size_list);
|
|
}
|
|
}
|
|
|
|
static void
|
|
bo_remove_from_cache(struct v3dv_bo_cache *cache, struct v3dv_bo *bo)
|
|
{
|
|
list_del(&bo->time_list);
|
|
list_del(&bo->size_list);
|
|
|
|
cache->cache_count--;
|
|
cache->cache_size -= bo->size;
|
|
}
|
|
|
|
static struct v3dv_bo *
|
|
bo_from_cache(struct v3dv_device *device, uint32_t size, const char *name)
|
|
{
|
|
struct v3dv_bo_cache *cache = &device->bo_cache;
|
|
uint32_t page_index = size / 4096 - 1;
|
|
|
|
if (cache->size_list_size <= page_index)
|
|
return NULL;
|
|
|
|
struct v3dv_bo *bo = NULL;
|
|
|
|
mtx_lock(&cache->lock);
|
|
if (!list_is_empty(&cache->size_list[page_index])) {
|
|
bo = list_first_entry(&cache->size_list[page_index],
|
|
struct v3dv_bo, size_list);
|
|
|
|
/* Check that the BO has gone idle. If not, then we want to
|
|
* allocate something new instead, since we assume that the
|
|
* user will proceed to CPU map it and fill it with stuff.
|
|
*/
|
|
if (!v3dv_bo_wait(device, bo, 0)) {
|
|
mtx_unlock(&cache->lock);
|
|
return NULL;
|
|
}
|
|
|
|
bo_remove_from_cache(cache, bo);
|
|
bo->name = name;
|
|
p_atomic_set(&bo->refcnt, 1);
|
|
}
|
|
mtx_unlock(&cache->lock);
|
|
return bo;
|
|
}
|
|
|
|
static bool
|
|
bo_free(struct v3dv_device *device,
|
|
struct v3dv_bo *bo)
|
|
{
|
|
if (!bo)
|
|
return true;
|
|
|
|
assert(p_atomic_read(&bo->refcnt) == 0);
|
|
assert(bo->map == NULL);
|
|
|
|
/* Our BO structs are stored in a sparse array in the physical device,
|
|
* so we don't want to free the BO pointer, instead we want to reset it
|
|
* to 0, to signal that array entry as being free.
|
|
*/
|
|
uint32_t handle = bo->handle;
|
|
memset(bo, 0, sizeof(*bo));
|
|
|
|
struct drm_gem_close c;
|
|
memset(&c, 0, sizeof(c));
|
|
c.handle = handle;
|
|
int ret = v3dv_ioctl(device->pdevice->render_fd, DRM_IOCTL_GEM_CLOSE, &c);
|
|
if (ret != 0)
|
|
fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno));
|
|
|
|
device->bo_count--;
|
|
device->bo_size -= bo->size;
|
|
|
|
if (dump_stats) {
|
|
fprintf(stderr, "Freed %s%s%dkb:\n",
|
|
bo->name ? bo->name : "",
|
|
bo->name ? " " : "",
|
|
bo->size / 1024);
|
|
bo_dump_stats(device);
|
|
}
|
|
|
|
return ret == 0;
|
|
}
|
|
|
|
static void
|
|
bo_cache_free_all(struct v3dv_device *device,
|
|
bool with_lock)
|
|
{
|
|
struct v3dv_bo_cache *cache = &device->bo_cache;
|
|
|
|
if (with_lock)
|
|
mtx_lock(&cache->lock);
|
|
list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list,
|
|
time_list) {
|
|
bo_remove_from_cache(cache, bo);
|
|
bo_free(device, bo);
|
|
}
|
|
if (with_lock)
|
|
mtx_unlock(&cache->lock);
|
|
|
|
}
|
|
|
|
void
|
|
v3dv_bo_init(struct v3dv_bo *bo,
|
|
uint32_t handle,
|
|
uint32_t size,
|
|
uint32_t offset,
|
|
const char *name,
|
|
bool private)
|
|
{
|
|
p_atomic_set(&bo->refcnt, 1);
|
|
bo->handle = handle;
|
|
bo->handle_bit = 1ull << (handle % 64);
|
|
bo->size = size;
|
|
bo->offset = offset;
|
|
bo->map = NULL;
|
|
bo->map_size = 0;
|
|
bo->name = name;
|
|
bo->private = private;
|
|
bo->dumb_handle = -1;
|
|
list_inithead(&bo->list_link);
|
|
}
|
|
|
|
struct v3dv_bo *
|
|
v3dv_bo_alloc(struct v3dv_device *device,
|
|
uint32_t size,
|
|
const char *name,
|
|
bool private)
|
|
{
|
|
struct v3dv_bo *bo;
|
|
|
|
const uint32_t page_align = 4096; /* Always allocate full pages */
|
|
size = align(size, page_align);
|
|
|
|
if (private) {
|
|
bo = bo_from_cache(device, size, name);
|
|
if (bo) {
|
|
if (dump_stats) {
|
|
fprintf(stderr, "Allocated %s %dkb from cache:\n",
|
|
name, size / 1024);
|
|
bo_dump_stats(device);
|
|
}
|
|
return bo;
|
|
}
|
|
}
|
|
|
|
retry:
|
|
;
|
|
|
|
bool cleared_and_retried = false;
|
|
struct drm_v3d_create_bo create = {
|
|
.size = size
|
|
};
|
|
|
|
int ret = v3dv_ioctl(device->pdevice->render_fd,
|
|
DRM_IOCTL_V3D_CREATE_BO, &create);
|
|
if (ret != 0) {
|
|
if (!list_is_empty(&device->bo_cache.time_list) &&
|
|
!cleared_and_retried) {
|
|
cleared_and_retried = true;
|
|
bo_cache_free_all(device, true);
|
|
goto retry;
|
|
}
|
|
|
|
fprintf(stderr, "Failed to allocate device memory for BO\n");
|
|
return NULL;
|
|
}
|
|
|
|
assert(create.offset % page_align == 0);
|
|
assert((create.offset & 0xffffffff) == create.offset);
|
|
|
|
bo = v3dv_device_lookup_bo(device->pdevice, create.handle);
|
|
assert(bo && bo->handle == 0);
|
|
|
|
v3dv_bo_init(bo, create.handle, size, create.offset, name, private);
|
|
|
|
device->bo_count++;
|
|
device->bo_size += bo->size;
|
|
if (dump_stats) {
|
|
fprintf(stderr, "Allocated %s %dkb:\n", name, size / 1024);
|
|
bo_dump_stats(device);
|
|
}
|
|
|
|
return bo;
|
|
}
|
|
|
|
bool
|
|
v3dv_bo_map_unsynchronized(struct v3dv_device *device,
|
|
struct v3dv_bo *bo,
|
|
uint32_t size)
|
|
{
|
|
assert(bo != NULL && size <= bo->size);
|
|
|
|
if (bo->map)
|
|
return bo->map;
|
|
|
|
struct drm_v3d_mmap_bo map;
|
|
memset(&map, 0, sizeof(map));
|
|
map.handle = bo->handle;
|
|
int ret = v3dv_ioctl(device->pdevice->render_fd,
|
|
DRM_IOCTL_V3D_MMAP_BO, &map);
|
|
if (ret != 0) {
|
|
fprintf(stderr, "map ioctl failure\n");
|
|
return false;
|
|
}
|
|
|
|
bo->map = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
|
|
device->pdevice->render_fd, map.offset);
|
|
if (bo->map == MAP_FAILED) {
|
|
fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
|
|
bo->handle, (long long)map.offset, (uint32_t)bo->size);
|
|
return false;
|
|
}
|
|
VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, false));
|
|
|
|
bo->map_size = size;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
v3dv_bo_wait(struct v3dv_device *device,
|
|
struct v3dv_bo *bo,
|
|
uint64_t timeout_ns)
|
|
{
|
|
struct drm_v3d_wait_bo wait = {
|
|
.handle = bo->handle,
|
|
.timeout_ns = timeout_ns,
|
|
};
|
|
return v3dv_ioctl(device->pdevice->render_fd,
|
|
DRM_IOCTL_V3D_WAIT_BO, &wait) == 0;
|
|
}
|
|
|
|
bool
|
|
v3dv_bo_map(struct v3dv_device *device, struct v3dv_bo *bo, uint32_t size)
|
|
{
|
|
assert(bo && size <= bo->size);
|
|
|
|
bool ok = v3dv_bo_map_unsynchronized(device, bo, size);
|
|
if (!ok)
|
|
return false;
|
|
|
|
ok = v3dv_bo_wait(device, bo, PIPE_TIMEOUT_INFINITE);
|
|
if (!ok) {
|
|
fprintf(stderr, "memory wait for map failed\n");
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void
|
|
v3dv_bo_unmap(struct v3dv_device *device, struct v3dv_bo *bo)
|
|
{
|
|
assert(bo && bo->map && bo->map_size > 0);
|
|
|
|
munmap(bo->map, bo->map_size);
|
|
VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0));
|
|
bo->map = NULL;
|
|
bo->map_size = 0;
|
|
}
|
|
|
|
static boolean
|
|
reallocate_size_list(struct v3dv_bo_cache *cache,
|
|
struct v3dv_device *device,
|
|
uint32_t size)
|
|
{
|
|
struct list_head *new_list =
|
|
vk_alloc(&device->vk.alloc, sizeof(struct list_head) * size, 8,
|
|
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
|
|
|
if (!new_list) {
|
|
fprintf(stderr, "Failed to allocate host memory for cache bo list\n");
|
|
return false;
|
|
}
|
|
struct list_head *old_list = cache->size_list;
|
|
|
|
/* Move old list contents over (since the array has moved, and
|
|
* therefore the pointers to the list heads have to change).
|
|
*/
|
|
for (int i = 0; i < cache->size_list_size; i++) {
|
|
struct list_head *old_head = &cache->size_list[i];
|
|
if (list_is_empty(old_head)) {
|
|
list_inithead(&new_list[i]);
|
|
} else {
|
|
new_list[i].next = old_head->next;
|
|
new_list[i].prev = old_head->prev;
|
|
new_list[i].next->prev = &new_list[i];
|
|
new_list[i].prev->next = &new_list[i];
|
|
}
|
|
}
|
|
for (int i = cache->size_list_size; i < size; i++)
|
|
list_inithead(&new_list[i]);
|
|
|
|
cache->size_list = new_list;
|
|
cache->size_list_size = size;
|
|
vk_free(&device->vk.alloc, old_list);
|
|
|
|
return true;
|
|
}
|
|
|
|
void
|
|
v3dv_bo_cache_init(struct v3dv_device *device)
|
|
{
|
|
device->bo_size = 0;
|
|
device->bo_count = 0;
|
|
list_inithead(&device->bo_cache.time_list);
|
|
/* FIXME: perhaps set a initial size for the size-list, to avoid run-time
|
|
* reallocations
|
|
*/
|
|
device->bo_cache.size_list_size = 0;
|
|
|
|
const char *max_cache_size_str = getenv("V3DV_MAX_BO_CACHE_SIZE");
|
|
if (max_cache_size_str == NULL)
|
|
device->bo_cache.max_cache_size = DEFAULT_MAX_BO_CACHE_SIZE;
|
|
else
|
|
device->bo_cache.max_cache_size = atoll(max_cache_size_str);
|
|
|
|
if (dump_stats) {
|
|
fprintf(stderr, "MAX BO CACHE SIZE: %iMB\n", device->bo_cache.max_cache_size);
|
|
}
|
|
|
|
device->bo_cache.max_cache_size *= 1024 * 1024;
|
|
device->bo_cache.cache_count = 0;
|
|
device->bo_cache.cache_size = 0;
|
|
}
|
|
|
|
void
|
|
v3dv_bo_cache_destroy(struct v3dv_device *device)
|
|
{
|
|
bo_cache_free_all(device, true);
|
|
vk_free(&device->vk.alloc, device->bo_cache.size_list);
|
|
|
|
if (dump_stats) {
|
|
fprintf(stderr, "BO stats after screen destroy:\n");
|
|
bo_dump_stats(device);
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
free_stale_bos(struct v3dv_device *device,
|
|
time_t time)
|
|
{
|
|
struct v3dv_bo_cache *cache = &device->bo_cache;
|
|
bool freed_any = false;
|
|
|
|
list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list,
|
|
time_list) {
|
|
/* If it's more than a second old, free it. */
|
|
if (time - bo->free_time > 2) {
|
|
if (dump_stats && !freed_any) {
|
|
fprintf(stderr, "Freeing stale BOs:\n");
|
|
bo_dump_stats(device);
|
|
freed_any = true;
|
|
}
|
|
|
|
bo_remove_from_cache(cache, bo);
|
|
bo_free(device, bo);
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (dump_stats && freed_any) {
|
|
fprintf(stderr, "Freed stale BOs:\n");
|
|
bo_dump_stats(device);
|
|
}
|
|
}
|
|
|
|
bool
|
|
v3dv_bo_free(struct v3dv_device *device,
|
|
struct v3dv_bo *bo)
|
|
{
|
|
if (!bo)
|
|
return true;
|
|
|
|
if (!p_atomic_dec_zero(&bo->refcnt))
|
|
return true;
|
|
|
|
if (bo->map)
|
|
v3dv_bo_unmap(device, bo);
|
|
|
|
struct timespec time;
|
|
struct v3dv_bo_cache *cache = &device->bo_cache;
|
|
uint32_t page_index = bo->size / 4096 - 1;
|
|
|
|
if (bo->private &&
|
|
bo->size > cache->max_cache_size - cache->cache_size) {
|
|
clock_gettime(CLOCK_MONOTONIC, &time);
|
|
mtx_lock(&cache->lock);
|
|
free_stale_bos(device, time.tv_sec);
|
|
mtx_unlock(&cache->lock);
|
|
}
|
|
|
|
if (!bo->private ||
|
|
bo->size > cache->max_cache_size - cache->cache_size) {
|
|
return bo_free(device, bo);
|
|
}
|
|
|
|
clock_gettime(CLOCK_MONOTONIC, &time);
|
|
mtx_lock(&cache->lock);
|
|
|
|
if (cache->size_list_size <= page_index) {
|
|
if (!reallocate_size_list(cache, device, page_index + 1)) {
|
|
bool outcome = bo_free(device, bo);
|
|
/* If the reallocation failed, it usually means that we are out of
|
|
* memory, so we also free all the bo cache. We need to call it to
|
|
* not use the cache lock, as we are already under it.
|
|
*/
|
|
bo_cache_free_all(device, false);
|
|
mtx_unlock(&cache->lock);
|
|
return outcome;
|
|
}
|
|
}
|
|
|
|
bo->free_time = time.tv_sec;
|
|
list_addtail(&bo->size_list, &cache->size_list[page_index]);
|
|
list_addtail(&bo->time_list, &cache->time_list);
|
|
|
|
cache->cache_count++;
|
|
cache->cache_size += bo->size;
|
|
|
|
if (dump_stats) {
|
|
fprintf(stderr, "Freed %s %dkb to cache:\n",
|
|
bo->name, bo->size / 1024);
|
|
bo_dump_stats(device);
|
|
}
|
|
bo->name = NULL;
|
|
|
|
free_stale_bos(device, time.tv_sec);
|
|
|
|
mtx_unlock(&cache->lock);
|
|
|
|
return true;
|
|
}
|