pvr: introduce suballocator for internal allocations

Add implementation for a simple sub-allocator in order to save
memory when doing internal driver allocations.

Signed-off-by: Luigi Santivetti <luigi.santivetti@imgtec.com>
Reviewed-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22940>
This commit is contained in:
Luigi Santivetti 2023-05-10 08:29:15 +01:00
parent 882fd3c522
commit 06c6cfc55b
2 changed files with 270 additions and 1 deletions

View file

@ -1,6 +1,9 @@
/*
* Copyright © 2022 Imagination Technologies Ltd.
*
* based in part on tu driver which is:
* Copyright © 2022 Google LLC
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
@ -33,6 +36,7 @@
# include <memcheck.h>
#endif
#include "hwdef/rogue_hw_utils.h"
#include "pvr_bo.h"
#include "pvr_debug.h"
#include "pvr_dump.h"
@ -353,6 +357,8 @@ VkResult pvr_bo_alloc(struct pvr_device *device,
if (!pvr_bo)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
pvr_bo->ref_count = 1;
result = device->ws->ops->buffer_create(device->ws,
size,
alignment,
@ -475,6 +481,9 @@ void pvr_bo_free(struct pvr_device *device, struct pvr_bo *pvr_bo)
if (!pvr_bo)
return;
if (!p_atomic_dec_zero(&pvr_bo->ref_count))
return;
#if defined(HAVE_VALGRIND)
vk_free(&device->vk.alloc, pvr_bo->bo->vbits);
#endif /* defined(HAVE_VALGRIND) */
@ -492,6 +501,211 @@ void pvr_bo_free(struct pvr_device *device, struct pvr_bo *pvr_bo)
pvr_bo_free_bo(device, pvr_bo);
}
/**
* \brief Interface to initialize a pvr_suballocator.
*
* \param[in] allocator Sub-allocator to initialize.
* \param[in] heap Heap to sub-allocate device virtual address from.
* \param[in] device Logical device pointer.
* \param[in] default_size Minimum size used for pvr bo(s).
*
* \sa #pvr_bo_suballocator_fini()
*/
void pvr_bo_suballocator_init(struct pvr_suballocator *allocator,
struct pvr_winsys_heap *heap,
struct pvr_device *device,
uint32_t default_size)
{
*allocator = (struct pvr_suballocator){
.device = device,
.default_size = default_size,
.heap = heap,
};
simple_mtx_init(&allocator->mtx, mtx_plain);
}
/**
* \brief Interface to destroy a pvr_suballocator.
*
* \param[in] allocator Sub-allocator to clean-up.
*
* \sa #pvr_bo_suballocator_init()
*/
void pvr_bo_suballocator_fini(struct pvr_suballocator *allocator)
{
pvr_bo_free(allocator->device, allocator->bo);
pvr_bo_free(allocator->device, allocator->bo_cached);
simple_mtx_destroy(&allocator->mtx);
}
static inline struct pvr_bo *pvr_bo_get_ref(struct pvr_bo *bo)
{
p_atomic_inc(&bo->ref_count);
return bo;
}
/**
* \brief Interface to sub-allocate buffer objects.
*
* \param[in] allocator Sub-allocator used to make a sub-allocation.
* \param[in] size Size of buffer to sub-alllocate.
* \param[in] align Required alignment of the allocation. Must be
* a power of two.
* \param[in] zero_on_alloc Require memory for the sub-allocation to be 0.
* \param[out] suballoc_bo_out On success points to the sub-allocated buffer
* object.
* \return VK_SUCCESS on success, or error code otherwise.
*
* \sa # pvr_bo_suballoc_free()
*/
VkResult pvr_bo_suballoc(struct pvr_suballocator *allocator,
uint32_t size,
uint32_t align,
bool zero_on_alloc,
struct pvr_suballoc_bo **const suballoc_bo_out)
{
const struct pvr_device_info *dev_info =
&allocator->device->pdevice->dev_info;
const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
struct pvr_suballoc_bo *suballoc_bo;
uint32_t alloc_size, aligned_size;
VkResult result;
suballoc_bo = vk_alloc(&allocator->device->vk.alloc,
sizeof(*suballoc_bo),
8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!suballoc_bo)
return vk_error(allocator->device, VK_ERROR_OUT_OF_HOST_MEMORY);
/* This cache line value is used for all type of allocations (i.e. USC, PDS
* and general), so always align them to at least the size of the cache line.
*/
align = MAX2(align, cache_line_size);
assert(util_is_power_of_two_nonzero(align));
aligned_size = ALIGN_POT(size, align);
simple_mtx_lock(&allocator->mtx);
if (allocator->bo) {
uint32_t aligned_offset = ALIGN_POT(allocator->next_offset, align);
if (aligned_offset + aligned_size <= allocator->bo->bo->size) {
suballoc_bo->allocator = allocator;
suballoc_bo->bo = pvr_bo_get_ref(allocator->bo);
suballoc_bo->dev_addr =
PVR_DEV_ADDR_OFFSET(allocator->bo->vma->dev_addr, aligned_offset);
suballoc_bo->offset = aligned_offset;
suballoc_bo->size = aligned_size;
allocator->next_offset = aligned_offset + aligned_size;
if (zero_on_alloc)
memset(pvr_bo_suballoc_get_map_addr(suballoc_bo), 0, aligned_size);
*suballoc_bo_out = suballoc_bo;
simple_mtx_unlock(&allocator->mtx);
return VK_SUCCESS;
} else {
pvr_bo_free(allocator->device, allocator->bo);
allocator->bo = NULL;
}
}
alloc_size = MAX2(aligned_size, ALIGN_POT(allocator->default_size, align));
if (allocator->bo_cached) {
struct pvr_winsys_bo *bo_cached = allocator->bo_cached->bo;
if (alloc_size <= bo_cached->size)
allocator->bo = allocator->bo_cached;
else
pvr_bo_free(allocator->device, allocator->bo_cached);
allocator->bo_cached = NULL;
}
if (!allocator->bo) {
result = pvr_bo_alloc(allocator->device,
allocator->heap,
alloc_size,
align,
PVR_BO_ALLOC_FLAG_CPU_MAPPED,
&allocator->bo);
if (result != VK_SUCCESS) {
vk_free(&allocator->device->vk.alloc, suballoc_bo);
simple_mtx_unlock(&allocator->mtx);
return result;
}
}
suballoc_bo->allocator = allocator;
suballoc_bo->bo = pvr_bo_get_ref(allocator->bo);
suballoc_bo->dev_addr = allocator->bo->vma->dev_addr;
suballoc_bo->offset = 0;
suballoc_bo->size = aligned_size;
allocator->next_offset = aligned_size;
if (zero_on_alloc)
memset(pvr_bo_suballoc_get_map_addr(suballoc_bo), 0, aligned_size);
*suballoc_bo_out = suballoc_bo;
simple_mtx_unlock(&allocator->mtx);
return VK_SUCCESS;
}
/**
* \brief Interface to free a sub-allocated buffer object.
*
* \param[in] suballoc_bo Sub-allocated buffer object to free.
*
* \sa #pvr_bo_suballoc()
*/
void pvr_bo_suballoc_free(struct pvr_suballoc_bo *suballoc_bo)
{
if (!suballoc_bo)
return;
simple_mtx_lock(&suballoc_bo->allocator->mtx);
if (p_atomic_read(&suballoc_bo->bo->ref_count) == 1 &&
!suballoc_bo->allocator->bo_cached) {
suballoc_bo->allocator->bo_cached = suballoc_bo->bo;
} else {
pvr_bo_free(suballoc_bo->allocator->device, suballoc_bo->bo);
}
simple_mtx_unlock(&suballoc_bo->allocator->mtx);
vk_free(&suballoc_bo->allocator->device->vk.alloc, suballoc_bo);
}
/**
* \brief Interface to retrieve sub-allocated memory offset from the host
* virtual address space.
*
* \param[in] suballoc_bo Sub-allocated buffer object pointer.
*
* \return Valid host virtual address on success.
*
* \sa #pvr_bo_suballoc()
*/
void *pvr_bo_suballoc_get_map_addr(const struct pvr_suballoc_bo *suballoc_bo)
{
const struct pvr_bo *pvr_bo = suballoc_bo->bo;
assert((uint8_t *)pvr_bo->bo->map + suballoc_bo->offset <
(uint8_t *)pvr_bo->bo->map + pvr_bo->bo->size);
return (uint8_t *)pvr_bo->bo->map + suballoc_bo->offset;
}
#if defined(HAVE_VALGRIND)
void *pvr_bo_cpu_map_unchanged(struct pvr_device *device, struct pvr_bo *pvr_bo)
{

View file

@ -1,6 +1,11 @@
/*
* Copyright © 2022 Imagination Technologies Ltd.
*
* based in part on tu driver which is:
* Copyright © 2016 Red Hat.
* Copyright © 2016 Bas Nieuwenhuizen
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
@ -32,6 +37,7 @@
#include "pvr_winsys.h"
#include "util/list.h"
#include "util/macros.h"
#include "util/simple_mtx.h"
struct pvr_device;
struct pvr_dump_ctx;
@ -40,7 +46,7 @@ struct pvr_winsys_vma;
struct pvr_winsys_heap;
struct pvr_bo {
/* Since multiple components (csb, caching logic, etc) can make use of
/* Since multiple components (csb, caching logic, etc.) can make use of
* linking buffers in a list, we add 'link' in pvr_bo to avoid an extra
* level of structure inheritance. It's the responsibility of the buffer
* user to manage the list and remove the buffer from the list before
@ -50,6 +56,42 @@ struct pvr_bo {
struct pvr_winsys_bo *bo;
struct pvr_winsys_vma *vma;
uint32_t ref_count;
};
struct pvr_suballocator {
/* Pointer to the pvr_device this allocator is associated with */
struct pvr_device *device;
/* Pointer to one heap type (e.g. general, pds or usc) */
struct pvr_winsys_heap *heap;
/* Minimum size of the pvr_bo shared across multiple sub-allocations */
uint32_t default_size;
/* Mutex to protect access to all of the members below this point */
simple_mtx_t mtx;
/* Current buffer object where sub-allocations are made from */
struct pvr_bo *bo;
/* Previous buffer that can be used when a new buffer object is needed */
struct pvr_bo *bo_cached;
/* Track from where to start the next sub-allocation */
uint32_t next_offset;
};
struct pvr_suballoc_bo {
/* Since multiple components (command buffer, clear, descriptor sets,
* pipeline, SPM, etc.) can make use of linking sub-allocated bo(s), we
* add 'link' in pvr_suballoc_bo and avoid one extra level of structure
* inheritance. It is users' responsibility to manage the linked list,
* to remove sub-allocations before freeing it.
*/
struct list_head link;
struct pvr_suballocator *allocator;
struct pvr_bo *bo;
pvr_dev_addr_t dev_addr;
uint64_t offset;
uint32_t size;
};
/**
@ -90,6 +132,19 @@ void *pvr_bo_cpu_map(struct pvr_device *device, struct pvr_bo *bo);
void pvr_bo_cpu_unmap(struct pvr_device *device, struct pvr_bo *bo);
void pvr_bo_free(struct pvr_device *device, struct pvr_bo *bo);
void pvr_bo_suballocator_init(struct pvr_suballocator *allocator,
struct pvr_winsys_heap *heap,
struct pvr_device *device,
uint32_t default_size);
void pvr_bo_suballocator_fini(struct pvr_suballocator *suballoc);
VkResult pvr_bo_suballoc(struct pvr_suballocator *allocator,
uint32_t size,
uint32_t alignment,
bool zero_on_alloc,
struct pvr_suballoc_bo **const suballoc_bo_out);
void pvr_bo_suballoc_free(struct pvr_suballoc_bo *suballoc_bo);
void *pvr_bo_suballoc_get_map_addr(const struct pvr_suballoc_bo *suballoc_bo);
#if defined(HAVE_VALGRIND)
void *pvr_bo_cpu_map_unchanged(struct pvr_device *device,
struct pvr_bo *pvr_bo);