diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index 6fd890d778f..c545eba5739 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -28,6 +28,7 @@ #include #include "anv_private.h" +#include "anv_slab_bo.h" #include "common/intel_aux_map.h" #include "util/anon_file.h" @@ -1616,10 +1617,15 @@ anv_device_alloc_bo(struct anv_device *device, if (device->info->has_llc && ((alloc_flags & not_allowed_promotion) == 0)) alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT; - const uint32_t bo_flags = - device->kmd_backend->bo_alloc_flags_to_bo_flags(device, alloc_flags); uint32_t alignment = anv_bo_vma_calc_alignment_requirement(device, alloc_flags, size); + /* calling in here to avoid the 4k size promotion but we can only do that + * because ANV_BO_ALLOC_AUX_CCS is not supported by slab + */ + *bo_out = anv_slab_bo_alloc(device, name, size, alignment, alloc_flags); + if (*bo_out) + return VK_SUCCESS; + /* The kernel is going to give us whole pages anyway. */ size = align64(size, 4096); @@ -1673,7 +1679,7 @@ anv_device_alloc_bo(struct anv_device *device, .size = size, .ccs_offset = ccs_offset, .actual_size = actual_size, - .flags = bo_flags, + .flags = device->kmd_backend->bo_alloc_flags_to_bo_flags(device, alloc_flags), .alloc_flags = alloc_flags, }; @@ -1726,6 +1732,25 @@ anv_device_map_bo(struct anv_device *device, assert(!bo->from_host_ptr); assert(size > 0); + struct anv_bo *real = anv_bo_get_real(bo); + uint64_t offset_adjustment = 0; + if (real != bo) { + offset += (bo->offset - real->offset); + + /* KMD rounds munmap() to whole pages, so here doing some adjustments */ + const uint64_t munmap_offset = ROUND_DOWN_TO(offset, 4096); + if (munmap_offset != offset) { + offset_adjustment = offset - munmap_offset; + size += offset_adjustment; + offset = munmap_offset; + + if (placed_addr) + placed_addr -= offset_adjustment; + } + + assert((offset & (4096 - 1)) == 0); + } + void *map = device->kmd_backend->gem_mmap(device, bo, offset, size, placed_addr); if (unlikely(map == MAP_FAILED)) return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED, "mmap failed: %m"); @@ -1736,7 +1761,7 @@ anv_device_map_bo(struct anv_device *device, VG(VALGRIND_MALLOCLIKE_BLOCK(map, size, 0, 1)); if (map_out) - *map_out = map; + *map_out = map + offset_adjustment; return VK_SUCCESS; } @@ -1749,6 +1774,18 @@ anv_device_unmap_bo(struct anv_device *device, { assert(!bo->from_host_ptr); + struct anv_bo *real = anv_bo_get_real(bo); + if (real != bo) { + uint64_t slab_offset = bo->offset - real->offset; + + if (ROUND_DOWN_TO(slab_offset, 4096) != slab_offset) { + slab_offset -= ROUND_DOWN_TO(slab_offset, 4096); + map -= slab_offset; + map_size += slab_offset; + } + assert(((uintptr_t)map & (4096 - 1)) == 0); + } + if (replace) { map = mmap(map, map_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); @@ -2014,6 +2051,7 @@ anv_device_set_bo_tiling(struct anv_device *device, uint32_t row_pitch_B, enum isl_tiling tiling) { + assert(bo->slab_parent == NULL); int ret = anv_gem_set_tiling(device, bo->gem_handle, row_pitch_B, isl_tiling_to_i915_tiling(tiling)); if (ret) { @@ -2049,8 +2087,6 @@ anv_device_release_bo(struct anv_device *device, struct anv_bo_cache *cache = &device->bo_cache; const bool bo_is_xe_userptr = device->info->kmd_type == INTEL_KMD_TYPE_XE && bo->from_host_ptr; - assert(bo_is_xe_userptr || - anv_device_lookup_bo(device, bo->gem_handle) == bo); /* Try to decrement the counter but don't go below one. If this succeeds * then the refcount has been decremented and we are not the last @@ -2059,8 +2095,6 @@ anv_device_release_bo(struct anv_device *device, if (atomic_dec_not_one(&bo->refcount)) return; - ANV_RMV(bo_destroy, device, bo); - pthread_mutex_lock(&cache->mutex); /* We are probably the last reference since our attempt to decrement above @@ -2075,6 +2109,17 @@ anv_device_release_bo(struct anv_device *device, } assert(bo->refcount == 0); + if (bo->slab_parent) { + pthread_mutex_unlock(&cache->mutex); + anv_slab_bo_free(device, bo); + return; + } + + assert(bo_is_xe_userptr || + anv_device_lookup_bo(device, bo->gem_handle) == bo); + + ANV_RMV(bo_destroy, device, bo); + /* Memset the BO just in case. The refcount being zero should be enough to * prevent someone from assuming the data is valid but it's safer to just * stomp to zero just in case. We explicitly do this *before* we actually diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index e28a4ffec6e..c6c07e35414 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -31,6 +31,7 @@ #include "anv_private.h" #include "anv_measure.h" +#include "anv_slab_bo.h" #include "util/u_debug.h" #include "util/os_file.h" #include "util/os_misc.h" @@ -485,9 +486,12 @@ VkResult anv_CreateDevice( list_inithead(&device->image_private_objects); list_inithead(&device->bvh_dumps); + if (!anv_slab_bo_init(device)) + goto fail_vmas; + if (pthread_mutex_init(&device->mutex, NULL) != 0) { result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED); - goto fail_vmas; + goto fail_slab; } pthread_condattr_t condattr; @@ -1126,6 +1130,8 @@ VkResult anv_CreateDevice( pthread_cond_destroy(&device->queue_submit); fail_mutex: pthread_mutex_destroy(&device->mutex); +fail_slab: + anv_slab_bo_deinit(device); fail_vmas: util_vma_heap_finish(&device->vma_trtt); util_vma_heap_finish(&device->vma_dynamic_visible); @@ -1278,6 +1284,7 @@ void anv_DestroyDevice( anv_bo_pool_finish(&device->bvh_bo_pool); anv_bo_pool_finish(&device->batch_bo_pool); + anv_slab_bo_deinit(device); anv_bo_cache_finish(&device->bo_cache); util_vma_heap_finish(&device->vma_trtt); @@ -1898,7 +1905,7 @@ VkResult anv_MapMemory2KHR( } uint64_t map_offset, map_size; - anv_sanitize_map_params(device, offset, size, &map_offset, &map_size); + anv_sanitize_map_params(device, mem->bo, offset, size, &map_offset, &map_size); void *map; VkResult result = anv_device_map_bo(device, mem->bo, map_offset, diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 13ad31d08c7..36ee77a138c 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -2609,7 +2609,7 @@ anv_image_bind_address(struct anv_device *device, uint64_t offset = image->bindings[binding].address.offset + image->bindings[binding].memory_range.offset; uint64_t map_offset, map_size; - anv_sanitize_map_params(device, offset, + anv_sanitize_map_params(device, image->bindings[binding].address.bo, offset, image->bindings[binding].memory_range.size, &map_offset, &map_size); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 0c3cd5ca7ba..6e6d67851ce 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -61,6 +61,7 @@ #include "util/macros.h" #include "util/hash_table.h" #include "util/list.h" +#include "util/pb_slab.h" #include "util/perf/u_trace.h" #include "util/set.h" #include "util/sparse_array.h" @@ -514,6 +515,10 @@ struct anv_bo { enum anv_bo_alloc_flags alloc_flags; + /** If slab_parent is set, this bo is a slab */ + struct anv_bo *slab_parent; + struct pb_slab_entry slab_entry; + /** True if this BO wraps a host pointer */ bool from_host_ptr:1; @@ -521,6 +526,13 @@ struct anv_bo { bool gtt_mapped:1; }; +/* If bo is a slab, return the real/slab_parent bo */ +static inline struct anv_bo * +anv_bo_get_real(struct anv_bo *bo) +{ + return bo->slab_parent ? bo->slab_parent : bo; +} + static inline bool anv_bo_is_external(const struct anv_bo *bo) { @@ -2172,6 +2184,8 @@ struct anv_device { } accel_struct_build; struct vk_meta_device meta_device; + + struct pb_slabs bo_slabs[3]; }; static inline uint32_t @@ -2260,6 +2274,7 @@ void anv_device_finish_blorp(struct anv_device *device); static inline void anv_sanitize_map_params(struct anv_device *device, + struct anv_bo *bo, uint64_t in_offset, uint64_t in_size, uint64_t *out_offset, @@ -2273,6 +2288,12 @@ anv_sanitize_map_params(struct anv_device *device, assert(in_offset >= *out_offset); *out_size = (in_offset + in_size) - *out_offset; + /* Don't round up slab bos to not fail mmap() of slabs at the end of slab + * parent, all the adjustment for slabs will be done in anv_device_map_bo(). + */ + if (anv_bo_get_real(bo) != bo) + return; + /* Let's map whole pages */ *out_size = align64(*out_size, 4096); } diff --git a/src/intel/vulkan/anv_slab_bo.c b/src/intel/vulkan/anv_slab_bo.c new file mode 100644 index 00000000000..19b2959d029 --- /dev/null +++ b/src/intel/vulkan/anv_slab_bo.c @@ -0,0 +1,28 @@ +/* Copyright © 2025 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "anv_slab_bo.h" + +struct anv_bo * +anv_slab_bo_alloc(struct anv_device *device, const char *name, uint64_t requested_size, + uint32_t alignment, enum anv_bo_alloc_flags alloc_flags) +{ + return NULL; +} + +void +anv_slab_bo_free(struct anv_device *device, struct anv_bo *bo) +{ +} + +bool +anv_slab_bo_init(struct anv_device *device) +{ + return true; +} + +void +anv_slab_bo_deinit(struct anv_device *device) +{ +} diff --git a/src/intel/vulkan/anv_slab_bo.h b/src/intel/vulkan/anv_slab_bo.h new file mode 100644 index 00000000000..03848183281 --- /dev/null +++ b/src/intel/vulkan/anv_slab_bo.h @@ -0,0 +1,15 @@ +/* Copyright © 2025 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include "anv_private.h" + +bool anv_slab_bo_init(struct anv_device *device); +void anv_slab_bo_deinit(struct anv_device *device); + +struct anv_bo * +anv_slab_bo_alloc(struct anv_device *device, const char *name, uint64_t size, + uint32_t alignment, enum anv_bo_alloc_flags alloc_flags); +void anv_slab_bo_free(struct anv_device *device, struct anv_bo *bo); diff --git a/src/intel/vulkan/i915/anv_batch_chain.c b/src/intel/vulkan/i915/anv_batch_chain.c index 61a34e97959..eba46b284a5 100644 --- a/src/intel/vulkan/i915/anv_batch_chain.c +++ b/src/intel/vulkan/i915/anv_batch_chain.c @@ -95,6 +95,7 @@ anv_execbuf_add_bo(struct anv_device *device, { struct drm_i915_gem_exec_object2 *obj = NULL; + bo = anv_bo_get_real(bo); if (bo->exec_obj_index < exec->bo_count && exec->bos[bo->exec_obj_index] == bo) obj = &exec->objects[bo->exec_obj_index]; diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build index 44c333dd4b9..f1f7463be79 100644 --- a/src/intel/vulkan/meson.build +++ b/src/intel/vulkan/meson.build @@ -204,6 +204,8 @@ libanv_files = files( 'anv_rmv.c', 'anv_rmv.h', 'anv_sampler.c', + 'anv_slab_bo.c', + 'anv_slab_bo.h', 'anv_sparse.c', 'anv_util.c', 'anv_utrace.c', diff --git a/src/intel/vulkan/xe/anv_kmd_backend.c b/src/intel/vulkan/xe/anv_kmd_backend.c index 2ebd1bf94c2..c13352d6ad6 100644 --- a/src/intel/vulkan/xe/anv_kmd_backend.c +++ b/src/intel/vulkan/xe/anv_kmd_backend.c @@ -154,10 +154,12 @@ anv_vm_bind_to_drm_xe_vm_bind(struct anv_device *device, struct anv_bo *bo = anv_bind->bo; uint16_t pat_index = bo ? anv_device_get_pat_entry(device, bo->alloc_flags)->index : 0; + /* offset from real bo is needed for sparse bindings */ + uint64_t obj_offset = (bo ? bo->offset - anv_bo_get_real(bo)->offset : 0) + anv_bind->bo_offset; struct drm_xe_vm_bind_op xe_bind = { .obj = 0, - .obj_offset = anv_bind->bo_offset, + .obj_offset = obj_offset, .range = anv_bind->size, .addr = intel_48b_address(anv_bind->address), .op = DRM_XE_VM_BIND_OP_UNMAP,