anv: Add the base infrastructure to support memory pool

Allocating larger buffers allows KMD/HW to enable optimizations
that makes access to memory faster, also because of minimum alignment
required in some cases we allocate 4k or 64k long buffers for
usages that only needs a few bytes, wasting a lot of memory.

Memory pool takes care of both of those things and here I'm
adding the base infrastruture to implement this feature.
The next patch will implement the functions in anv_slab_bo.c, spliting
it in two to make review easier.

The idea here is take the same approach as Iris and use pb_slab.h.
In 99% of the places it will be transparent that anv_bo is actually
a slab of a larger and real anv_bo, the remaning 1% of the places are
handled here.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33558>
This commit is contained in:
José Roberto de Souza 2025-02-07 04:55:18 -08:00 committed by Marge Bot
parent 5d8ec0ce5c
commit 3bf6d42fda
9 changed files with 133 additions and 12 deletions

View file

@ -28,6 +28,7 @@
#include <sys/mman.h>
#include "anv_private.h"
#include "anv_slab_bo.h"
#include "common/intel_aux_map.h"
#include "util/anon_file.h"
@ -1616,10 +1617,15 @@ anv_device_alloc_bo(struct anv_device *device,
if (device->info->has_llc && ((alloc_flags & not_allowed_promotion) == 0))
alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT;
const uint32_t bo_flags =
device->kmd_backend->bo_alloc_flags_to_bo_flags(device, alloc_flags);
uint32_t alignment = anv_bo_vma_calc_alignment_requirement(device, alloc_flags, size);
/* calling in here to avoid the 4k size promotion but we can only do that
* because ANV_BO_ALLOC_AUX_CCS is not supported by slab
*/
*bo_out = anv_slab_bo_alloc(device, name, size, alignment, alloc_flags);
if (*bo_out)
return VK_SUCCESS;
/* The kernel is going to give us whole pages anyway. */
size = align64(size, 4096);
@ -1673,7 +1679,7 @@ anv_device_alloc_bo(struct anv_device *device,
.size = size,
.ccs_offset = ccs_offset,
.actual_size = actual_size,
.flags = bo_flags,
.flags = device->kmd_backend->bo_alloc_flags_to_bo_flags(device, alloc_flags),
.alloc_flags = alloc_flags,
};
@ -1726,6 +1732,25 @@ anv_device_map_bo(struct anv_device *device,
assert(!bo->from_host_ptr);
assert(size > 0);
struct anv_bo *real = anv_bo_get_real(bo);
uint64_t offset_adjustment = 0;
if (real != bo) {
offset += (bo->offset - real->offset);
/* KMD rounds munmap() to whole pages, so here doing some adjustments */
const uint64_t munmap_offset = ROUND_DOWN_TO(offset, 4096);
if (munmap_offset != offset) {
offset_adjustment = offset - munmap_offset;
size += offset_adjustment;
offset = munmap_offset;
if (placed_addr)
placed_addr -= offset_adjustment;
}
assert((offset & (4096 - 1)) == 0);
}
void *map = device->kmd_backend->gem_mmap(device, bo, offset, size, placed_addr);
if (unlikely(map == MAP_FAILED))
return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED, "mmap failed: %m");
@ -1736,7 +1761,7 @@ anv_device_map_bo(struct anv_device *device,
VG(VALGRIND_MALLOCLIKE_BLOCK(map, size, 0, 1));
if (map_out)
*map_out = map;
*map_out = map + offset_adjustment;
return VK_SUCCESS;
}
@ -1749,6 +1774,18 @@ anv_device_unmap_bo(struct anv_device *device,
{
assert(!bo->from_host_ptr);
struct anv_bo *real = anv_bo_get_real(bo);
if (real != bo) {
uint64_t slab_offset = bo->offset - real->offset;
if (ROUND_DOWN_TO(slab_offset, 4096) != slab_offset) {
slab_offset -= ROUND_DOWN_TO(slab_offset, 4096);
map -= slab_offset;
map_size += slab_offset;
}
assert(((uintptr_t)map & (4096 - 1)) == 0);
}
if (replace) {
map = mmap(map, map_size, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
@ -2014,6 +2051,7 @@ anv_device_set_bo_tiling(struct anv_device *device,
uint32_t row_pitch_B,
enum isl_tiling tiling)
{
assert(bo->slab_parent == NULL);
int ret = anv_gem_set_tiling(device, bo->gem_handle, row_pitch_B,
isl_tiling_to_i915_tiling(tiling));
if (ret) {
@ -2049,8 +2087,6 @@ anv_device_release_bo(struct anv_device *device,
struct anv_bo_cache *cache = &device->bo_cache;
const bool bo_is_xe_userptr = device->info->kmd_type == INTEL_KMD_TYPE_XE &&
bo->from_host_ptr;
assert(bo_is_xe_userptr ||
anv_device_lookup_bo(device, bo->gem_handle) == bo);
/* Try to decrement the counter but don't go below one. If this succeeds
* then the refcount has been decremented and we are not the last
@ -2059,8 +2095,6 @@ anv_device_release_bo(struct anv_device *device,
if (atomic_dec_not_one(&bo->refcount))
return;
ANV_RMV(bo_destroy, device, bo);
pthread_mutex_lock(&cache->mutex);
/* We are probably the last reference since our attempt to decrement above
@ -2075,6 +2109,17 @@ anv_device_release_bo(struct anv_device *device,
}
assert(bo->refcount == 0);
if (bo->slab_parent) {
pthread_mutex_unlock(&cache->mutex);
anv_slab_bo_free(device, bo);
return;
}
assert(bo_is_xe_userptr ||
anv_device_lookup_bo(device, bo->gem_handle) == bo);
ANV_RMV(bo_destroy, device, bo);
/* Memset the BO just in case. The refcount being zero should be enough to
* prevent someone from assuming the data is valid but it's safer to just
* stomp to zero just in case. We explicitly do this *before* we actually

View file

@ -31,6 +31,7 @@
#include "anv_private.h"
#include "anv_measure.h"
#include "anv_slab_bo.h"
#include "util/u_debug.h"
#include "util/os_file.h"
#include "util/os_misc.h"
@ -485,9 +486,12 @@ VkResult anv_CreateDevice(
list_inithead(&device->image_private_objects);
list_inithead(&device->bvh_dumps);
if (!anv_slab_bo_init(device))
goto fail_vmas;
if (pthread_mutex_init(&device->mutex, NULL) != 0) {
result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
goto fail_vmas;
goto fail_slab;
}
pthread_condattr_t condattr;
@ -1126,6 +1130,8 @@ VkResult anv_CreateDevice(
pthread_cond_destroy(&device->queue_submit);
fail_mutex:
pthread_mutex_destroy(&device->mutex);
fail_slab:
anv_slab_bo_deinit(device);
fail_vmas:
util_vma_heap_finish(&device->vma_trtt);
util_vma_heap_finish(&device->vma_dynamic_visible);
@ -1278,6 +1284,7 @@ void anv_DestroyDevice(
anv_bo_pool_finish(&device->bvh_bo_pool);
anv_bo_pool_finish(&device->batch_bo_pool);
anv_slab_bo_deinit(device);
anv_bo_cache_finish(&device->bo_cache);
util_vma_heap_finish(&device->vma_trtt);
@ -1898,7 +1905,7 @@ VkResult anv_MapMemory2KHR(
}
uint64_t map_offset, map_size;
anv_sanitize_map_params(device, offset, size, &map_offset, &map_size);
anv_sanitize_map_params(device, mem->bo, offset, size, &map_offset, &map_size);
void *map;
VkResult result = anv_device_map_bo(device, mem->bo, map_offset,

View file

@ -2609,7 +2609,7 @@ anv_image_bind_address(struct anv_device *device,
uint64_t offset = image->bindings[binding].address.offset +
image->bindings[binding].memory_range.offset;
uint64_t map_offset, map_size;
anv_sanitize_map_params(device, offset,
anv_sanitize_map_params(device, image->bindings[binding].address.bo, offset,
image->bindings[binding].memory_range.size,
&map_offset, &map_size);

View file

@ -61,6 +61,7 @@
#include "util/macros.h"
#include "util/hash_table.h"
#include "util/list.h"
#include "util/pb_slab.h"
#include "util/perf/u_trace.h"
#include "util/set.h"
#include "util/sparse_array.h"
@ -514,6 +515,10 @@ struct anv_bo {
enum anv_bo_alloc_flags alloc_flags;
/** If slab_parent is set, this bo is a slab */
struct anv_bo *slab_parent;
struct pb_slab_entry slab_entry;
/** True if this BO wraps a host pointer */
bool from_host_ptr:1;
@ -521,6 +526,13 @@ struct anv_bo {
bool gtt_mapped:1;
};
/* If bo is a slab, return the real/slab_parent bo */
static inline struct anv_bo *
anv_bo_get_real(struct anv_bo *bo)
{
return bo->slab_parent ? bo->slab_parent : bo;
}
static inline bool
anv_bo_is_external(const struct anv_bo *bo)
{
@ -2172,6 +2184,8 @@ struct anv_device {
} accel_struct_build;
struct vk_meta_device meta_device;
struct pb_slabs bo_slabs[3];
};
static inline uint32_t
@ -2260,6 +2274,7 @@ void anv_device_finish_blorp(struct anv_device *device);
static inline void
anv_sanitize_map_params(struct anv_device *device,
struct anv_bo *bo,
uint64_t in_offset,
uint64_t in_size,
uint64_t *out_offset,
@ -2273,6 +2288,12 @@ anv_sanitize_map_params(struct anv_device *device,
assert(in_offset >= *out_offset);
*out_size = (in_offset + in_size) - *out_offset;
/* Don't round up slab bos to not fail mmap() of slabs at the end of slab
* parent, all the adjustment for slabs will be done in anv_device_map_bo().
*/
if (anv_bo_get_real(bo) != bo)
return;
/* Let's map whole pages */
*out_size = align64(*out_size, 4096);
}

View file

@ -0,0 +1,28 @@
/* Copyright © 2025 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include "anv_slab_bo.h"
struct anv_bo *
anv_slab_bo_alloc(struct anv_device *device, const char *name, uint64_t requested_size,
uint32_t alignment, enum anv_bo_alloc_flags alloc_flags)
{
return NULL;
}
void
anv_slab_bo_free(struct anv_device *device, struct anv_bo *bo)
{
}
bool
anv_slab_bo_init(struct anv_device *device)
{
return true;
}
void
anv_slab_bo_deinit(struct anv_device *device)
{
}

View file

@ -0,0 +1,15 @@
/* Copyright © 2025 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#pragma once
#include "anv_private.h"
bool anv_slab_bo_init(struct anv_device *device);
void anv_slab_bo_deinit(struct anv_device *device);
struct anv_bo *
anv_slab_bo_alloc(struct anv_device *device, const char *name, uint64_t size,
uint32_t alignment, enum anv_bo_alloc_flags alloc_flags);
void anv_slab_bo_free(struct anv_device *device, struct anv_bo *bo);

View file

@ -95,6 +95,7 @@ anv_execbuf_add_bo(struct anv_device *device,
{
struct drm_i915_gem_exec_object2 *obj = NULL;
bo = anv_bo_get_real(bo);
if (bo->exec_obj_index < exec->bo_count &&
exec->bos[bo->exec_obj_index] == bo)
obj = &exec->objects[bo->exec_obj_index];

View file

@ -204,6 +204,8 @@ libanv_files = files(
'anv_rmv.c',
'anv_rmv.h',
'anv_sampler.c',
'anv_slab_bo.c',
'anv_slab_bo.h',
'anv_sparse.c',
'anv_util.c',
'anv_utrace.c',

View file

@ -154,10 +154,12 @@ anv_vm_bind_to_drm_xe_vm_bind(struct anv_device *device,
struct anv_bo *bo = anv_bind->bo;
uint16_t pat_index = bo ?
anv_device_get_pat_entry(device, bo->alloc_flags)->index : 0;
/* offset from real bo is needed for sparse bindings */
uint64_t obj_offset = (bo ? bo->offset - anv_bo_get_real(bo)->offset : 0) + anv_bind->bo_offset;
struct drm_xe_vm_bind_op xe_bind = {
.obj = 0,
.obj_offset = anv_bind->bo_offset,
.obj_offset = obj_offset,
.range = anv_bind->size,
.addr = intel_48b_address(anv_bind->address),
.op = DRM_XE_VM_BIND_OP_UNMAP,