From c97f9193ef5d99ec8dae90d92671be8a687c2cbe Mon Sep 17 00:00:00 2001 From: Yiwei Zhang Date: Thu, 23 May 2024 13:03:39 -0700 Subject: [PATCH] venus: drop internal memory pools This exists due to historical limitations which have long gone obsolete. This persists longer due to hostorical perf issues that have recently gone obsolete on the platforms shipping Venus. Meanwhile, clients like skiavk and ANGLE nowadays do a better job managing suballocations. The tiny perf win from having this giant internal pool has been beaten by the memory waste, longer one-shot jank due to largier alloc, allocations no need to be mapped but only because host-visible is advertised across mem types and varies workarounds and markups needed to make alignment work and make VVL happy. Dropping it also reduces the maintenance cost. Signed-off-by: Yiwei Zhang Part-of: --- src/virtio/vulkan/vn_buffer.c | 17 +- src/virtio/vulkan/vn_common.c | 1 - src/virtio/vulkan/vn_common.h | 1 - src/virtio/vulkan/vn_device.c | 15 +- src/virtio/vulkan/vn_device.h | 2 - src/virtio/vulkan/vn_device_memory.c | 234 ++------------------------- src/virtio/vulkan/vn_device_memory.h | 13 -- src/virtio/vulkan/vn_image.c | 9 -- src/virtio/vulkan/vn_queue.c | 2 +- 9 files changed, 14 insertions(+), 280 deletions(-) diff --git a/src/virtio/vulkan/vn_buffer.c b/src/virtio/vulkan/vn_buffer.c index 337cab231d2..d4837c401ea 100644 --- a/src/virtio/vulkan/vn_buffer.c +++ b/src/virtio/vulkan/vn_buffer.c @@ -451,24 +451,9 @@ vn_BindBufferMemory2(VkDevice device, uint32_t bindInfoCount, const VkBindBufferMemoryInfo *pBindInfos) { - STACK_ARRAY(VkBindBufferMemoryInfo, bind_infos, bindInfoCount); - typed_memcpy(bind_infos, pBindInfos, bindInfoCount); - - for (uint32_t i = 0; i < bindInfoCount; i++) { - VkBindBufferMemoryInfo *info = &bind_infos[i]; - struct vn_device_memory *mem = - vn_device_memory_from_handle(info->memory); - if (mem->base_memory) { - info->memory = vn_device_memory_to_handle(mem->base_memory); - info->memoryOffset += mem->base_offset; - } - } - struct vn_device *dev = vn_device_from_handle(device); vn_async_vkBindBufferMemory2(dev->primary_ring, device, bindInfoCount, - bind_infos); - - STACK_ARRAY_FINISH(bind_infos); + pBindInfos); return VK_SUCCESS; } diff --git a/src/virtio/vulkan/vn_common.c b/src/virtio/vulkan/vn_common.c index 4d22f184487..0fb0312cf15 100644 --- a/src/virtio/vulkan/vn_common.c +++ b/src/virtio/vulkan/vn_common.c @@ -43,7 +43,6 @@ static const struct debug_control vn_perf_options[] = { { "no_async_queue_submit", VN_PERF_NO_ASYNC_QUEUE_SUBMIT }, { "no_event_feedback", VN_PERF_NO_EVENT_FEEDBACK }, { "no_fence_feedback", VN_PERF_NO_FENCE_FEEDBACK }, - { "no_memory_suballoc", VN_PERF_NO_MEMORY_SUBALLOC }, { "no_cmd_batching", VN_PERF_NO_CMD_BATCHING }, { "no_semaphore_feedback", VN_PERF_NO_SEMAPHORE_FEEDBACK }, { "no_query_feedback", VN_PERF_NO_QUERY_FEEDBACK }, diff --git a/src/virtio/vulkan/vn_common.h b/src/virtio/vulkan/vn_common.h index 2d5f222ab0a..21cd82d4a7f 100644 --- a/src/virtio/vulkan/vn_common.h +++ b/src/virtio/vulkan/vn_common.h @@ -120,7 +120,6 @@ enum vn_perf { VN_PERF_NO_ASYNC_QUEUE_SUBMIT = 1ull << 2, VN_PERF_NO_EVENT_FEEDBACK = 1ull << 3, VN_PERF_NO_FENCE_FEEDBACK = 1ull << 4, - VN_PERF_NO_MEMORY_SUBALLOC = 1ull << 5, VN_PERF_NO_CMD_BATCHING = 1ull << 6, VN_PERF_NO_SEMAPHORE_FEEDBACK = 1ull << 7, VN_PERF_NO_QUERY_FEEDBACK = 1ull << 8, diff --git a/src/virtio/vulkan/vn_device.c b/src/virtio/vulkan/vn_device.c index e7e1ca61c4c..a21e10a8526 100644 --- a/src/virtio/vulkan/vn_device.c +++ b/src/virtio/vulkan/vn_device.c @@ -480,14 +480,9 @@ vn_device_init(struct vn_device *dev, goto out_memory_report_fini; } - for (uint32_t i = 0; i < ARRAY_SIZE(dev->memory_pools); i++) { - struct vn_device_memory_pool *pool = &dev->memory_pools[i]; - mtx_init(&pool->mutex, mtx_plain); - } - result = vn_device_feedback_pool_init(dev); if (result != VK_SUCCESS) - goto out_memory_pool_fini; + goto out_queue_family_fini; result = vn_feedback_cmd_pools_init(dev); if (result != VK_SUCCESS) @@ -513,10 +508,7 @@ out_feedback_cmd_pools_fini: out_feedback_pool_fini: vn_device_feedback_pool_fini(dev); -out_memory_pool_fini: - for (uint32_t i = 0; i < ARRAY_SIZE(dev->memory_pools); i++) - vn_device_memory_pool_fini(dev, i); - +out_queue_family_fini: vn_device_queue_family_fini(dev); out_memory_report_fini: @@ -600,9 +592,6 @@ vn_DestroyDevice(VkDevice device, const VkAllocationCallbacks *pAllocator) vn_device_feedback_pool_fini(dev); - for (uint32_t i = 0; i < ARRAY_SIZE(dev->memory_pools); i++) - vn_device_memory_pool_fini(dev, i); - vn_device_queue_family_fini(dev); vn_device_memory_report_fini(dev); diff --git a/src/virtio/vulkan/vn_device.h b/src/virtio/vulkan/vn_device.h index d80aa0c5a51..ec640b4b5d9 100644 --- a/src/virtio/vulkan/vn_device.h +++ b/src/virtio/vulkan/vn_device.h @@ -39,8 +39,6 @@ struct vn_device { uint32_t *queue_families; uint32_t queue_family_count; - struct vn_device_memory_pool memory_pools[VK_MAX_MEMORY_TYPES]; - struct vn_feedback_pool feedback_pool; /* feedback cmd pool per queue family used by the device diff --git a/src/virtio/vulkan/vn_device_memory.c b/src/virtio/vulkan/vn_device_memory.c index 1317a41d065..e445c11c993 100644 --- a/src/virtio/vulkan/vn_device_memory.c +++ b/src/virtio/vulkan/vn_device_memory.c @@ -105,204 +105,6 @@ vn_device_memory_bo_fini(struct vn_device *dev, struct vn_device_memory *mem) } } -static VkResult -vn_device_memory_pool_grow_alloc(struct vn_device *dev, - uint32_t mem_type_index, - VkDeviceSize size, - struct vn_device_memory **out_mem) -{ - const VkMemoryAllocateInfo alloc_info = { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .allocationSize = size, - .memoryTypeIndex = mem_type_index, - }; - struct vn_device_memory *mem = vk_device_memory_create( - &dev->base.base, &alloc_info, NULL, sizeof(*mem)); - if (!mem) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - vn_object_set_id(mem, vn_get_next_obj_id(), VK_OBJECT_TYPE_DEVICE_MEMORY); - - VkResult result = vn_device_memory_alloc_simple(dev, mem, &alloc_info); - if (result != VK_SUCCESS) - goto obj_fini; - - result = vn_device_memory_bo_init(dev, mem); - if (result != VK_SUCCESS) - goto mem_free; - - result = - vn_ring_submit_roundtrip(dev->primary_ring, &mem->bo_roundtrip_seqno); - if (result != VK_SUCCESS) - goto bo_unref; - - mem->bo_roundtrip_seqno_valid = true; - *out_mem = mem; - - return VK_SUCCESS; - -bo_unref: - vn_renderer_bo_unref(dev->renderer, mem->base_bo); -mem_free: - vn_device_memory_free_simple(dev, mem); -obj_fini: - vk_device_memory_destroy(&dev->base.base, NULL, &mem->base.base); - return result; -} - -static struct vn_device_memory * -vn_device_memory_pool_ref(struct vn_device *dev, - struct vn_device_memory *pool_mem) -{ - assert(pool_mem->base_bo); - - vn_renderer_bo_ref(dev->renderer, pool_mem->base_bo); - - return pool_mem; -} - -static void -vn_device_memory_pool_unref(struct vn_device *dev, - struct vn_device_memory *pool_mem) -{ - assert(pool_mem->base_bo); - - if (!vn_renderer_bo_unref(dev->renderer, pool_mem->base_bo)) - return; - - /* wait on valid bo_roundtrip_seqno before vkFreeMemory */ - if (pool_mem->bo_roundtrip_seqno_valid) - vn_ring_wait_roundtrip(dev->primary_ring, pool_mem->bo_roundtrip_seqno); - - vn_device_memory_free_simple(dev, pool_mem); - vk_device_memory_destroy(&dev->base.base, NULL, &pool_mem->base.base); -} - -void -vn_device_memory_pool_fini(struct vn_device *dev, uint32_t mem_type_index) -{ - struct vn_device_memory_pool *pool = &dev->memory_pools[mem_type_index]; - if (pool->memory) - vn_device_memory_pool_unref(dev, pool->memory); - mtx_destroy(&pool->mutex); -} - -static VkResult -vn_device_memory_pool_grow_locked(struct vn_device *dev, - uint32_t mem_type_index, - VkDeviceSize size) -{ - struct vn_device_memory *mem; - VkResult result = - vn_device_memory_pool_grow_alloc(dev, mem_type_index, size, &mem); - if (result != VK_SUCCESS) - return result; - - struct vn_device_memory_pool *pool = &dev->memory_pools[mem_type_index]; - if (pool->memory) - vn_device_memory_pool_unref(dev, pool->memory); - - pool->memory = mem; - pool->used = 0; - - return VK_SUCCESS; -} - -static VkResult -vn_device_memory_pool_suballocate(struct vn_device *dev, - struct vn_device_memory *mem) -{ - static const VkDeviceSize pool_size = 16 * 1024 * 1024; - /* TODO fix https://gitlab.freedesktop.org/mesa/mesa/-/issues/9351 - * Before that, we use 64K default alignment because some GPUs have 64K - * pages. It is also required by newer Intel GPUs. Meanwhile, use prior 4K - * align on implementations known to fit. - */ - const bool is_renderer_mali = dev->physical_device->renderer_driver_id == - VK_DRIVER_ID_ARM_PROPRIETARY; - const VkDeviceSize pool_align = is_renderer_mali ? 4096 : 64 * 1024; - const struct vk_device_memory *mem_vk = &mem->base.base; - struct vn_device_memory_pool *pool = - &dev->memory_pools[mem_vk->memory_type_index]; - - assert(mem_vk->size <= pool_size); - - mtx_lock(&pool->mutex); - - if (!pool->memory || pool->used + mem_vk->size > pool_size) { - VkResult result = vn_device_memory_pool_grow_locked( - dev, mem_vk->memory_type_index, pool_size); - if (result != VK_SUCCESS) { - mtx_unlock(&pool->mutex); - return result; - } - } - - mem->base_memory = vn_device_memory_pool_ref(dev, pool->memory); - - /* point mem->base_bo at pool base_bo and assign base_offset accordingly */ - mem->base_bo = pool->memory->base_bo; - mem->base_offset = pool->used; - pool->used += align64(mem_vk->size, pool_align); - - mtx_unlock(&pool->mutex); - - return VK_SUCCESS; -} - -static bool -vn_device_memory_should_suballocate(const struct vn_device *dev, - const VkMemoryAllocateInfo *alloc_info) -{ - if (VN_PERF(NO_MEMORY_SUBALLOC)) - return false; - - if (dev->renderer->info.has_guest_vram) - return false; - - /* We should not support suballocations because apps can do better. But - * each BO takes up a KVM memslot currently and some CTS tests exhausts - * them. This might not be needed on newer (host) kernels where there are - * many more KVM memslots. - */ - - /* consider host-visible memory only */ - const VkMemoryType *mem_type = - &dev->physical_device->memory_properties - .memoryTypes[alloc_info->memoryTypeIndex]; - if (!(mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) - return false; - - /* reject larger allocations */ - if (alloc_info->allocationSize > 64 * 1024) - return false; - - /* reject if there is any pnext struct other than - * VkMemoryDedicatedAllocateInfo, or if dedicated allocation is required - */ - if (alloc_info->pNext) { - const VkMemoryDedicatedAllocateInfo *dedicated = alloc_info->pNext; - if (dedicated->sType != - VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO || - dedicated->pNext) - return false; - - const struct vn_image *img = vn_image_from_handle(dedicated->image); - if (img) { - for (uint32_t i = 0; i < ARRAY_SIZE(img->requirements); i++) { - if (img->requirements[i].dedicated.requiresDedicatedAllocation) - return false; - } - } - - const struct vn_buffer *buf = vn_buffer_from_handle(dedicated->buffer); - if (buf && buf->requirements.dedicated.requiresDedicatedAllocation) - return false; - } - - return true; -} - VkResult vn_device_memory_import_dma_buf(struct vn_device *dev, struct vn_device_memory *mem, @@ -593,8 +395,6 @@ vn_AllocateMemory(VkDevice device, } else if (import_fd_info) { result = vn_device_memory_import_dma_buf(dev, mem, pAllocateInfo, false, import_fd_info->fd); - } else if (vn_device_memory_should_suballocate(dev, pAllocateInfo)) { - result = vn_device_memory_pool_suballocate(dev, mem); } else { result = vn_device_memory_alloc(dev, mem, pAllocateInfo); } @@ -623,18 +423,13 @@ vn_FreeMemory(VkDevice device, vn_device_memory_emit_report(dev, mem, /* is_alloc */ false, VK_SUCCESS); - if (mem->base_memory) { - vn_device_memory_pool_unref(dev, mem->base_memory); - } else { - /* ensure renderer side import still sees the resource */ - vn_device_memory_bo_fini(dev, mem); + /* ensure renderer side import still sees the resource */ + vn_device_memory_bo_fini(dev, mem); - if (mem->bo_roundtrip_seqno_valid) - vn_ring_wait_roundtrip(dev->primary_ring, mem->bo_roundtrip_seqno); - - vn_device_memory_free_simple(dev, mem); - } + if (mem->bo_roundtrip_seqno_valid) + vn_ring_wait_roundtrip(dev->primary_ring, mem->bo_roundtrip_seqno); + vn_device_memory_free_simple(dev, mem); vk_device_memory_destroy(&dev->base.base, pAllocator, &mem->base.base); } @@ -643,10 +438,6 @@ vn_GetDeviceMemoryOpaqueCaptureAddress( VkDevice device, const VkDeviceMemoryOpaqueCaptureAddressInfo *pInfo) { struct vn_device *dev = vn_device_from_handle(device); - ASSERTED struct vn_device_memory *mem = - vn_device_memory_from_handle(pInfo->memory); - - assert(!mem->base_memory); return vn_call_vkGetDeviceMemoryOpaqueCaptureAddress(dev->primary_ring, device, pInfo); } @@ -702,7 +493,7 @@ vn_MapMemory(VkDevice device, mem->map_end = size == VK_WHOLE_SIZE ? mem_vk->size : offset + size; - *ppData = ptr + mem->base_offset + offset; + *ppData = ptr + offset; return VK_SUCCESS; } @@ -727,8 +518,7 @@ vn_FlushMappedMemoryRanges(VkDevice device, const VkDeviceSize size = range->size == VK_WHOLE_SIZE ? mem->map_end - range->offset : range->size; - vn_renderer_bo_flush(dev->renderer, mem->base_bo, - mem->base_offset + range->offset, size); + vn_renderer_bo_flush(dev->renderer, mem->base_bo, range->offset, size); } return VK_SUCCESS; @@ -749,8 +539,8 @@ vn_InvalidateMappedMemoryRanges(VkDevice device, const VkDeviceSize size = range->size == VK_WHOLE_SIZE ? mem->map_end - range->offset : range->size; - vn_renderer_bo_invalidate(dev->renderer, mem->base_bo, - mem->base_offset + range->offset, size); + vn_renderer_bo_invalidate(dev->renderer, mem->base_bo, range->offset, + size); } return VK_SUCCESS; @@ -762,10 +552,6 @@ vn_GetDeviceMemoryCommitment(VkDevice device, VkDeviceSize *pCommittedMemoryInBytes) { struct vn_device *dev = vn_device_from_handle(device); - ASSERTED struct vn_device_memory *mem = - vn_device_memory_from_handle(memory); - - assert(!mem->base_memory); vn_call_vkGetDeviceMemoryCommitment(dev->primary_ring, device, memory, pCommittedMemoryInBytes); } @@ -784,7 +570,7 @@ vn_GetMemoryFdKHR(VkDevice device, assert(pGetFdInfo->handleType & (VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT)); - assert(!mem->base_memory && mem->base_bo); + assert(mem->base_bo); *pFd = vn_renderer_bo_export_dma_buf(dev->renderer, mem->base_bo); if (*pFd < 0) return vn_error(dev->instance, VK_ERROR_TOO_MANY_OBJECTS); diff --git a/src/virtio/vulkan/vn_device_memory.h b/src/virtio/vulkan/vn_device_memory.h index 4c8800a4023..eeff571d1ca 100644 --- a/src/virtio/vulkan/vn_device_memory.h +++ b/src/virtio/vulkan/vn_device_memory.h @@ -13,17 +13,9 @@ #include "vn_common.h" -struct vn_device_memory_pool { - mtx_t mutex; - struct vn_device_memory *memory; - VkDeviceSize used; -}; - struct vn_device_memory { struct vn_device_memory_base base; - /* non-NULL when suballocated */ - struct vn_device_memory *base_memory; /* non-NULL when mappable or external */ struct vn_renderer_bo *base_bo; @@ -55,8 +47,6 @@ struct vn_device_memory { bool bo_roundtrip_seqno_valid; uint64_t bo_roundtrip_seqno; - VkDeviceSize base_offset; - VkDeviceSize map_end; }; VK_DEFINE_NONDISP_HANDLE_CASTS(vn_device_memory, @@ -64,9 +54,6 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(vn_device_memory, VkDeviceMemory, VK_OBJECT_TYPE_DEVICE_MEMORY) -void -vn_device_memory_pool_fini(struct vn_device *dev, uint32_t mem_type_index); - VkResult vn_device_memory_import_dma_buf(struct vn_device *dev, struct vn_device_memory *mem, diff --git a/src/virtio/vulkan/vn_image.c b/src/virtio/vulkan/vn_image.c index 6584beaf38f..dde0438b281 100644 --- a/src/virtio/vulkan/vn_image.c +++ b/src/virtio/vulkan/vn_image.c @@ -809,15 +809,6 @@ vn_BindImageMemory2(VkDevice device, if (img->wsi.is_wsi) vn_image_bind_wsi_memory(img, mem); - - /* If mem is suballocated, mem->base_memory is non-NULL and we must - * patch it in. If VkBindImageMemorySwapchainInfoKHR is given, we've - * looked mem up above and also need to patch it in. - */ - if (mem->base_memory) { - info->memory = vn_device_memory_to_handle(mem->base_memory); - info->memoryOffset += mem->base_offset; - } } struct vn_device *dev = vn_device_from_handle(device); diff --git a/src/virtio/vulkan/vn_queue.c b/src/virtio/vulkan/vn_queue.c index 52e098deb63..b48af030db6 100644 --- a/src/virtio/vulkan/vn_queue.c +++ b/src/virtio/vulkan/vn_queue.c @@ -516,7 +516,7 @@ vn_queue_submission_prepare(struct vn_queue_submission *submit) submit->submit_batches[0].pNext, WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA); if (info) { submit->wsi_mem = vn_device_memory_from_handle(info->memory); - assert(!submit->wsi_mem->base_memory && submit->wsi_mem->base_bo); + assert(submit->wsi_mem->base_bo); } }