From 3bf6d42fda02f54e6235d2c0fc6a855b2bd75a66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Fri, 7 Feb 2025 04:55:18 -0800 Subject: [PATCH] anv: Add the base infrastructure to support memory pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allocating larger buffers allows KMD/HW to enable optimizations that makes access to memory faster, also because of minimum alignment required in some cases we allocate 4k or 64k long buffers for usages that only needs a few bytes, wasting a lot of memory. Memory pool takes care of both of those things and here I'm adding the base infrastruture to implement this feature. The next patch will implement the functions in anv_slab_bo.c, spliting it in two to make review easier. The idea here is take the same approach as Iris and use pb_slab.h. In 99% of the places it will be transparent that anv_bo is actually a slab of a larger and real anv_bo, the remaning 1% of the places are handled here. Reviewed-by: Lionel Landwerlin Signed-off-by: José Roberto de Souza Part-of: --- src/intel/vulkan/anv_allocator.c | 61 +++++++++++++++++++++---- src/intel/vulkan/anv_device.c | 11 ++++- src/intel/vulkan/anv_image.c | 2 +- src/intel/vulkan/anv_private.h | 21 +++++++++ src/intel/vulkan/anv_slab_bo.c | 28 ++++++++++++ src/intel/vulkan/anv_slab_bo.h | 15 ++++++ src/intel/vulkan/i915/anv_batch_chain.c | 1 + src/intel/vulkan/meson.build | 2 + src/intel/vulkan/xe/anv_kmd_backend.c | 4 +- 9 files changed, 133 insertions(+), 12 deletions(-) create mode 100644 src/intel/vulkan/anv_slab_bo.c create mode 100644 src/intel/vulkan/anv_slab_bo.h diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index 6fd890d778f..c545eba5739 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -28,6 +28,7 @@ #include #include "anv_private.h" +#include "anv_slab_bo.h" #include "common/intel_aux_map.h" #include "util/anon_file.h" @@ -1616,10 +1617,15 @@ anv_device_alloc_bo(struct anv_device *device, if (device->info->has_llc && ((alloc_flags & not_allowed_promotion) == 0)) alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT; - const uint32_t bo_flags = - device->kmd_backend->bo_alloc_flags_to_bo_flags(device, alloc_flags); uint32_t alignment = anv_bo_vma_calc_alignment_requirement(device, alloc_flags, size); + /* calling in here to avoid the 4k size promotion but we can only do that + * because ANV_BO_ALLOC_AUX_CCS is not supported by slab + */ + *bo_out = anv_slab_bo_alloc(device, name, size, alignment, alloc_flags); + if (*bo_out) + return VK_SUCCESS; + /* The kernel is going to give us whole pages anyway. */ size = align64(size, 4096); @@ -1673,7 +1679,7 @@ anv_device_alloc_bo(struct anv_device *device, .size = size, .ccs_offset = ccs_offset, .actual_size = actual_size, - .flags = bo_flags, + .flags = device->kmd_backend->bo_alloc_flags_to_bo_flags(device, alloc_flags), .alloc_flags = alloc_flags, }; @@ -1726,6 +1732,25 @@ anv_device_map_bo(struct anv_device *device, assert(!bo->from_host_ptr); assert(size > 0); + struct anv_bo *real = anv_bo_get_real(bo); + uint64_t offset_adjustment = 0; + if (real != bo) { + offset += (bo->offset - real->offset); + + /* KMD rounds munmap() to whole pages, so here doing some adjustments */ + const uint64_t munmap_offset = ROUND_DOWN_TO(offset, 4096); + if (munmap_offset != offset) { + offset_adjustment = offset - munmap_offset; + size += offset_adjustment; + offset = munmap_offset; + + if (placed_addr) + placed_addr -= offset_adjustment; + } + + assert((offset & (4096 - 1)) == 0); + } + void *map = device->kmd_backend->gem_mmap(device, bo, offset, size, placed_addr); if (unlikely(map == MAP_FAILED)) return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED, "mmap failed: %m"); @@ -1736,7 +1761,7 @@ anv_device_map_bo(struct anv_device *device, VG(VALGRIND_MALLOCLIKE_BLOCK(map, size, 0, 1)); if (map_out) - *map_out = map; + *map_out = map + offset_adjustment; return VK_SUCCESS; } @@ -1749,6 +1774,18 @@ anv_device_unmap_bo(struct anv_device *device, { assert(!bo->from_host_ptr); + struct anv_bo *real = anv_bo_get_real(bo); + if (real != bo) { + uint64_t slab_offset = bo->offset - real->offset; + + if (ROUND_DOWN_TO(slab_offset, 4096) != slab_offset) { + slab_offset -= ROUND_DOWN_TO(slab_offset, 4096); + map -= slab_offset; + map_size += slab_offset; + } + assert(((uintptr_t)map & (4096 - 1)) == 0); + } + if (replace) { map = mmap(map, map_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); @@ -2014,6 +2051,7 @@ anv_device_set_bo_tiling(struct anv_device *device, uint32_t row_pitch_B, enum isl_tiling tiling) { + assert(bo->slab_parent == NULL); int ret = anv_gem_set_tiling(device, bo->gem_handle, row_pitch_B, isl_tiling_to_i915_tiling(tiling)); if (ret) { @@ -2049,8 +2087,6 @@ anv_device_release_bo(struct anv_device *device, struct anv_bo_cache *cache = &device->bo_cache; const bool bo_is_xe_userptr = device->info->kmd_type == INTEL_KMD_TYPE_XE && bo->from_host_ptr; - assert(bo_is_xe_userptr || - anv_device_lookup_bo(device, bo->gem_handle) == bo); /* Try to decrement the counter but don't go below one. If this succeeds * then the refcount has been decremented and we are not the last @@ -2059,8 +2095,6 @@ anv_device_release_bo(struct anv_device *device, if (atomic_dec_not_one(&bo->refcount)) return; - ANV_RMV(bo_destroy, device, bo); - pthread_mutex_lock(&cache->mutex); /* We are probably the last reference since our attempt to decrement above @@ -2075,6 +2109,17 @@ anv_device_release_bo(struct anv_device *device, } assert(bo->refcount == 0); + if (bo->slab_parent) { + pthread_mutex_unlock(&cache->mutex); + anv_slab_bo_free(device, bo); + return; + } + + assert(bo_is_xe_userptr || + anv_device_lookup_bo(device, bo->gem_handle) == bo); + + ANV_RMV(bo_destroy, device, bo); + /* Memset the BO just in case. The refcount being zero should be enough to * prevent someone from assuming the data is valid but it's safer to just * stomp to zero just in case. We explicitly do this *before* we actually diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index e28a4ffec6e..c6c07e35414 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -31,6 +31,7 @@ #include "anv_private.h" #include "anv_measure.h" +#include "anv_slab_bo.h" #include "util/u_debug.h" #include "util/os_file.h" #include "util/os_misc.h" @@ -485,9 +486,12 @@ VkResult anv_CreateDevice( list_inithead(&device->image_private_objects); list_inithead(&device->bvh_dumps); + if (!anv_slab_bo_init(device)) + goto fail_vmas; + if (pthread_mutex_init(&device->mutex, NULL) != 0) { result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED); - goto fail_vmas; + goto fail_slab; } pthread_condattr_t condattr; @@ -1126,6 +1130,8 @@ VkResult anv_CreateDevice( pthread_cond_destroy(&device->queue_submit); fail_mutex: pthread_mutex_destroy(&device->mutex); +fail_slab: + anv_slab_bo_deinit(device); fail_vmas: util_vma_heap_finish(&device->vma_trtt); util_vma_heap_finish(&device->vma_dynamic_visible); @@ -1278,6 +1284,7 @@ void anv_DestroyDevice( anv_bo_pool_finish(&device->bvh_bo_pool); anv_bo_pool_finish(&device->batch_bo_pool); + anv_slab_bo_deinit(device); anv_bo_cache_finish(&device->bo_cache); util_vma_heap_finish(&device->vma_trtt); @@ -1898,7 +1905,7 @@ VkResult anv_MapMemory2KHR( } uint64_t map_offset, map_size; - anv_sanitize_map_params(device, offset, size, &map_offset, &map_size); + anv_sanitize_map_params(device, mem->bo, offset, size, &map_offset, &map_size); void *map; VkResult result = anv_device_map_bo(device, mem->bo, map_offset, diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 13ad31d08c7..36ee77a138c 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -2609,7 +2609,7 @@ anv_image_bind_address(struct anv_device *device, uint64_t offset = image->bindings[binding].address.offset + image->bindings[binding].memory_range.offset; uint64_t map_offset, map_size; - anv_sanitize_map_params(device, offset, + anv_sanitize_map_params(device, image->bindings[binding].address.bo, offset, image->bindings[binding].memory_range.size, &map_offset, &map_size); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 0c3cd5ca7ba..6e6d67851ce 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -61,6 +61,7 @@ #include "util/macros.h" #include "util/hash_table.h" #include "util/list.h" +#include "util/pb_slab.h" #include "util/perf/u_trace.h" #include "util/set.h" #include "util/sparse_array.h" @@ -514,6 +515,10 @@ struct anv_bo { enum anv_bo_alloc_flags alloc_flags; + /** If slab_parent is set, this bo is a slab */ + struct anv_bo *slab_parent; + struct pb_slab_entry slab_entry; + /** True if this BO wraps a host pointer */ bool from_host_ptr:1; @@ -521,6 +526,13 @@ struct anv_bo { bool gtt_mapped:1; }; +/* If bo is a slab, return the real/slab_parent bo */ +static inline struct anv_bo * +anv_bo_get_real(struct anv_bo *bo) +{ + return bo->slab_parent ? bo->slab_parent : bo; +} + static inline bool anv_bo_is_external(const struct anv_bo *bo) { @@ -2172,6 +2184,8 @@ struct anv_device { } accel_struct_build; struct vk_meta_device meta_device; + + struct pb_slabs bo_slabs[3]; }; static inline uint32_t @@ -2260,6 +2274,7 @@ void anv_device_finish_blorp(struct anv_device *device); static inline void anv_sanitize_map_params(struct anv_device *device, + struct anv_bo *bo, uint64_t in_offset, uint64_t in_size, uint64_t *out_offset, @@ -2273,6 +2288,12 @@ anv_sanitize_map_params(struct anv_device *device, assert(in_offset >= *out_offset); *out_size = (in_offset + in_size) - *out_offset; + /* Don't round up slab bos to not fail mmap() of slabs at the end of slab + * parent, all the adjustment for slabs will be done in anv_device_map_bo(). + */ + if (anv_bo_get_real(bo) != bo) + return; + /* Let's map whole pages */ *out_size = align64(*out_size, 4096); } diff --git a/src/intel/vulkan/anv_slab_bo.c b/src/intel/vulkan/anv_slab_bo.c new file mode 100644 index 00000000000..19b2959d029 --- /dev/null +++ b/src/intel/vulkan/anv_slab_bo.c @@ -0,0 +1,28 @@ +/* Copyright © 2025 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "anv_slab_bo.h" + +struct anv_bo * +anv_slab_bo_alloc(struct anv_device *device, const char *name, uint64_t requested_size, + uint32_t alignment, enum anv_bo_alloc_flags alloc_flags) +{ + return NULL; +} + +void +anv_slab_bo_free(struct anv_device *device, struct anv_bo *bo) +{ +} + +bool +anv_slab_bo_init(struct anv_device *device) +{ + return true; +} + +void +anv_slab_bo_deinit(struct anv_device *device) +{ +} diff --git a/src/intel/vulkan/anv_slab_bo.h b/src/intel/vulkan/anv_slab_bo.h new file mode 100644 index 00000000000..03848183281 --- /dev/null +++ b/src/intel/vulkan/anv_slab_bo.h @@ -0,0 +1,15 @@ +/* Copyright © 2025 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include "anv_private.h" + +bool anv_slab_bo_init(struct anv_device *device); +void anv_slab_bo_deinit(struct anv_device *device); + +struct anv_bo * +anv_slab_bo_alloc(struct anv_device *device, const char *name, uint64_t size, + uint32_t alignment, enum anv_bo_alloc_flags alloc_flags); +void anv_slab_bo_free(struct anv_device *device, struct anv_bo *bo); diff --git a/src/intel/vulkan/i915/anv_batch_chain.c b/src/intel/vulkan/i915/anv_batch_chain.c index 61a34e97959..eba46b284a5 100644 --- a/src/intel/vulkan/i915/anv_batch_chain.c +++ b/src/intel/vulkan/i915/anv_batch_chain.c @@ -95,6 +95,7 @@ anv_execbuf_add_bo(struct anv_device *device, { struct drm_i915_gem_exec_object2 *obj = NULL; + bo = anv_bo_get_real(bo); if (bo->exec_obj_index < exec->bo_count && exec->bos[bo->exec_obj_index] == bo) obj = &exec->objects[bo->exec_obj_index]; diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build index 44c333dd4b9..f1f7463be79 100644 --- a/src/intel/vulkan/meson.build +++ b/src/intel/vulkan/meson.build @@ -204,6 +204,8 @@ libanv_files = files( 'anv_rmv.c', 'anv_rmv.h', 'anv_sampler.c', + 'anv_slab_bo.c', + 'anv_slab_bo.h', 'anv_sparse.c', 'anv_util.c', 'anv_utrace.c', diff --git a/src/intel/vulkan/xe/anv_kmd_backend.c b/src/intel/vulkan/xe/anv_kmd_backend.c index 2ebd1bf94c2..c13352d6ad6 100644 --- a/src/intel/vulkan/xe/anv_kmd_backend.c +++ b/src/intel/vulkan/xe/anv_kmd_backend.c @@ -154,10 +154,12 @@ anv_vm_bind_to_drm_xe_vm_bind(struct anv_device *device, struct anv_bo *bo = anv_bind->bo; uint16_t pat_index = bo ? anv_device_get_pat_entry(device, bo->alloc_flags)->index : 0; + /* offset from real bo is needed for sparse bindings */ + uint64_t obj_offset = (bo ? bo->offset - anv_bo_get_real(bo)->offset : 0) + anv_bind->bo_offset; struct drm_xe_vm_bind_op xe_bind = { .obj = 0, - .obj_offset = anv_bind->bo_offset, + .obj_offset = obj_offset, .range = anv_bind->size, .addr = intel_48b_address(anv_bind->address), .op = DRM_XE_VM_BIND_OP_UNMAP,