From a13860e5dfd0cf28ff5292b410d5be44791ca7cc Mon Sep 17 00:00:00 2001 From: Zan Dobersek Date: Thu, 7 Mar 2024 16:15:39 +0100 Subject: [PATCH] tu: add RMV support Add RMV support for Turnip. The internal RMV layer is added and integrated into the VkDevice entrypoint dispatch tables. As elsewhere, memory tracing is put into action when enabled through the MESA_VK_TRACE environment variable. Similar to other implementations of RMV support in Mesa, tracing points are added across Turnip to report on different types of RMV events, calling into tu_rmv logging functions to emit the relevant RMV token data. TU_BO_ALLOC_INTERNAL_RESOURCE allocation flag is added. When used, in RMV output such an allocation will be associated with an internal resource of the VK_RMV_RESOURCE_TYPE_MISC_INTERNAL type. Signed-off-by: Zan Dobersek Part-of: --- src/freedreno/vulkan/layers/tu_rmv_layer.cc | 95 ++++ src/freedreno/vulkan/meson.build | 3 + src/freedreno/vulkan/tu_cs.cc | 8 + src/freedreno/vulkan/tu_descriptor_set.cc | 9 +- src/freedreno/vulkan/tu_device.cc | 55 +- src/freedreno/vulkan/tu_image.cc | 5 + src/freedreno/vulkan/tu_knl.h | 1 + src/freedreno/vulkan/tu_knl_drm.cc | 6 +- src/freedreno/vulkan/tu_knl_drm_msm.cc | 15 +- src/freedreno/vulkan/tu_knl_kgsl.cc | 14 +- src/freedreno/vulkan/tu_pipeline.cc | 20 +- src/freedreno/vulkan/tu_query.cc | 6 + src/freedreno/vulkan/tu_rmv.cc | 582 ++++++++++++++++++++ src/freedreno/vulkan/tu_rmv.h | 84 +++ src/freedreno/vulkan/tu_shader.cc | 8 +- 15 files changed, 891 insertions(+), 20 deletions(-) create mode 100644 src/freedreno/vulkan/layers/tu_rmv_layer.cc create mode 100644 src/freedreno/vulkan/tu_rmv.cc create mode 100644 src/freedreno/vulkan/tu_rmv.h diff --git a/src/freedreno/vulkan/layers/tu_rmv_layer.cc b/src/freedreno/vulkan/layers/tu_rmv_layer.cc new file mode 100644 index 00000000000..3d717f3b17d --- /dev/null +++ b/src/freedreno/vulkan/layers/tu_rmv_layer.cc @@ -0,0 +1,95 @@ +/* + * Copyright © 2024 Igalia S.L. + * SPDX-License-Identifier: MIT + */ + +#include "tu_device.h" +#include "tu_entrypoints.h" +#include "tu_rmv.h" +#include "vk_common_entrypoints.h" +#include "wsi_common_entrypoints.h" + +VKAPI_ATTR VkResult VKAPI_CALL +tu_rmv_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo) +{ + TU_FROM_HANDLE(tu_queue, queue, _queue); + struct tu_device *device = queue->device; + + VkResult result = wsi_QueuePresentKHR(_queue, pPresentInfo); + if (!(result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR) + || !device->vk.memory_trace_data.is_enabled) + return result; + + vk_rmv_log_misc_token(&device->vk, VK_RMV_MISC_EVENT_TYPE_PRESENT); + return result; +} + +VKAPI_ATTR VkResult VKAPI_CALL +tu_rmv_FlushMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges) +{ + TU_FROM_HANDLE(tu_device, device, _device); + + VkResult result = tu_FlushMappedMemoryRanges(_device, memoryRangeCount, + pMemoryRanges); + if (result != VK_SUCCESS || !device->vk.memory_trace_data.is_enabled) + return result; + + vk_rmv_log_misc_token(&device->vk, VK_RMV_MISC_EVENT_TYPE_FLUSH_MAPPED_RANGE); + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +tu_rmv_InvalidateMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges) +{ + TU_FROM_HANDLE(tu_device, device, _device); + + VkResult result = tu_InvalidateMappedMemoryRanges(_device, memoryRangeCount, + pMemoryRanges); + if (result != VK_SUCCESS || !device->vk.memory_trace_data.is_enabled) + return result; + + vk_rmv_log_misc_token(&device->vk, VK_RMV_MISC_EVENT_TYPE_INVALIDATE_RANGES); + return VK_SUCCESS; +} + +VkResult tu_rmv_DebugMarkerSetObjectNameEXT(VkDevice device, + const VkDebugMarkerObjectNameInfoEXT* pNameInfo) +{ + assert(pNameInfo->sType == VK_STRUCTURE_TYPE_DEBUG_MARKER_OBJECT_NAME_INFO_EXT); + VkDebugUtilsObjectNameInfoEXT name_info; + name_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT; + name_info.objectType = static_cast(pNameInfo->objectType); + name_info.objectHandle = pNameInfo->object; + name_info.pObjectName = pNameInfo->pObjectName; + return tu_rmv_SetDebugUtilsObjectNameEXT(device, &name_info); +} + +VkResult tu_rmv_SetDebugUtilsObjectNameEXT(VkDevice _device, + const VkDebugUtilsObjectNameInfoEXT* pNameInfo) +{ + assert(pNameInfo->sType == VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT); + TU_FROM_HANDLE(tu_device, device, _device); + + VkResult result = vk_common_SetDebugUtilsObjectNameEXT(_device, pNameInfo); + if (result != VK_SUCCESS || !device->vk.memory_trace_data.is_enabled) + return result; + + switch (pNameInfo->objectType) { + case VK_OBJECT_TYPE_BUFFER: + case VK_OBJECT_TYPE_DEVICE_MEMORY: + case VK_OBJECT_TYPE_IMAGE: + case VK_OBJECT_TYPE_EVENT: + case VK_OBJECT_TYPE_QUERY_POOL: + case VK_OBJECT_TYPE_DESCRIPTOR_POOL: + case VK_OBJECT_TYPE_PIPELINE: + break; + default: + return VK_SUCCESS; + } + + tu_rmv_log_resource_name(device, (const void *) pNameInfo->objectHandle, + pNameInfo->pObjectName); + return VK_SUCCESS; +} diff --git a/src/freedreno/vulkan/meson.build b/src/freedreno/vulkan/meson.build index 0697492a16a..572281f7291 100644 --- a/src/freedreno/vulkan/meson.build +++ b/src/freedreno/vulkan/meson.build @@ -12,12 +12,14 @@ tu_entrypoints = custom_target( '--tmpl-prefix', 'tu', '--tmpl-param', 'chip CHIP', '--tmpl-variants', '', '', '--beta', with_vulkan_beta.to_string(), + '--device-prefix', 'tu_rmv', ], depend_files : vk_entrypoints_gen_depend_files, ) libtu_files = files( + 'layers/tu_rmv_layer.cc', 'tu_autotune.cc', 'tu_clear_blit.cc', 'tu_cmd_buffer.cc', @@ -34,6 +36,7 @@ libtu_files = files( 'tu_pass.cc', 'tu_pipeline.cc', 'tu_query.cc', + 'tu_rmv.cc', 'tu_shader.cc', 'tu_suballoc.cc', 'tu_util.cc', diff --git a/src/freedreno/vulkan/tu_cs.cc b/src/freedreno/vulkan/tu_cs.cc index a52109bb396..98e0b04025e 100644 --- a/src/freedreno/vulkan/tu_cs.cc +++ b/src/freedreno/vulkan/tu_cs.cc @@ -5,6 +5,8 @@ #include "tu_cs.h" +#include "tu_device.h" +#include "tu_rmv.h" #include "tu_suballoc.h" /** @@ -70,10 +72,12 @@ void tu_cs_finish(struct tu_cs *cs) { for (uint32_t i = 0; i < cs->read_only.bo_count; ++i) { + TU_RMV(resource_destroy, cs->device, cs->read_only.bos[i]); tu_bo_finish(cs->device, cs->read_only.bos[i]); } for (uint32_t i = 0; i < cs->read_write.bo_count; ++i) { + TU_RMV(resource_destroy, cs->device, cs->read_write.bos[i]); tu_bo_finish(cs->device, cs->read_write.bos[i]); } @@ -166,6 +170,8 @@ tu_cs_add_bo(struct tu_cs *cs, uint32_t size) return result; } + TU_RMV(cmd_buffer_bo_create, cs->device, new_bo); + bos->bos[bos->bo_count++] = new_bo; cs->start = cs->cur = cs->reserved_end = (uint32_t *) new_bo->map; @@ -482,10 +488,12 @@ tu_cs_reset(struct tu_cs *cs) } for (uint32_t i = 0; i + 1 < cs->read_only.bo_count; ++i) { + TU_RMV(resource_destroy, cs->device, cs->read_only.bos[i]); tu_bo_finish(cs->device, cs->read_only.bos[i]); } for (uint32_t i = 0; i + 1 < cs->read_write.bo_count; ++i) { + TU_RMV(resource_destroy, cs->device, cs->read_write.bos[i]); tu_bo_finish(cs->device, cs->read_write.bos[i]); } diff --git a/src/freedreno/vulkan/tu_descriptor_set.cc b/src/freedreno/vulkan/tu_descriptor_set.cc index 5d106ed16d2..aa630c65d6e 100644 --- a/src/freedreno/vulkan/tu_descriptor_set.cc +++ b/src/freedreno/vulkan/tu_descriptor_set.cc @@ -28,6 +28,7 @@ #include "tu_device.h" #include "tu_image.h" #include "tu_formats.h" +#include "tu_rmv.h" static inline uint8_t * pool_base(struct tu_descriptor_pool *pool) @@ -277,7 +278,9 @@ tu_CreateDescriptorSetLayout( if (pCreateInfo->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT) { result = tu_bo_init_new(device, &set_layout->embedded_samplers, - set_layout->size, TU_BO_ALLOC_ALLOW_DUMP, + set_layout->size, + (enum tu_bo_alloc_flags) (TU_BO_ALLOC_ALLOW_DUMP | + TU_BO_ALLOC_INTERNAL_RESOURCE), "embedded samplers"); if (result != VK_SUCCESS) { vk_object_free(&device->vk, pAllocator, set_layout); @@ -820,6 +823,8 @@ tu_CreateDescriptorPool(VkDevice _device, list_inithead(&pool->desc_sets); + TU_RMV(descriptor_pool_create, device, pCreateInfo, pool); + *pDescriptorPool = tu_descriptor_pool_to_handle(pool); return VK_SUCCESS; @@ -841,6 +846,8 @@ tu_DestroyDescriptorPool(VkDevice _device, if (!pool) return; + TU_RMV(resource_destroy, device, pool); + list_for_each_entry_safe(struct tu_descriptor_set, set, &pool->desc_sets, pool_link) { vk_descriptor_set_layout_unref(&device->vk, &set->layout->vk); diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 841b7feec9e..124860ee75e 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -38,6 +38,7 @@ #include "tu_image.h" #include "tu_pass.h" #include "tu_query.h" +#include "tu_rmv.h" #include "tu_tracepoints.h" #include "tu_wsi.h" @@ -1619,7 +1620,7 @@ tu_trace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size) container_of(utctx, struct tu_device, trace_context); struct tu_bo *bo; - tu_bo_init_new(device, &bo, size, TU_BO_ALLOC_NO_FLAGS, "trace"); + tu_bo_init_new(device, &bo, size, TU_BO_ALLOC_INTERNAL_RESOURCE, "trace"); return bo; } @@ -2137,8 +2138,16 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, return vk_startup_errorf(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY, "OOM"); struct vk_device_dispatch_table dispatch_table; + bool override_initial_entrypoints = true; + + if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV) { + vk_device_dispatch_table_from_entrypoints( + &dispatch_table, &tu_rmv_device_entrypoints, true); + override_initial_entrypoints = false; + } + vk_device_dispatch_table_from_entrypoints( - &dispatch_table, &tu_device_entrypoints, true); + &dispatch_table, &tu_device_entrypoints, override_initial_entrypoints); switch (fd_dev_gen(&physical_device->dev_id)) { case 6: @@ -2197,6 +2206,9 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, if (TU_DEBUG(BOS)) device->bo_sizes = _mesa_hash_table_create(NULL, _mesa_hash_string, _mesa_key_string_equal); + if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV) + tu_memory_trace_init(device); + /* kgsl is not a drm device: */ if (!is_kgsl(physical_device->instance)) vk_device_set_drm_fd(&device->vk, device->fd); @@ -2278,16 +2290,24 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, tu_bo_suballocator_init( &device->pipeline_suballoc, device, 128 * 1024, - (enum tu_bo_alloc_flags) (TU_BO_ALLOC_GPU_READ_ONLY | TU_BO_ALLOC_ALLOW_DUMP), "pipeline_suballoc"); + (enum tu_bo_alloc_flags) (TU_BO_ALLOC_GPU_READ_ONLY | + TU_BO_ALLOC_ALLOW_DUMP | + TU_BO_ALLOC_INTERNAL_RESOURCE), + "pipeline_suballoc"); tu_bo_suballocator_init(&device->autotune_suballoc, device, - 128 * 1024, TU_BO_ALLOC_NO_FLAGS, "autotune_suballoc"); + 128 * 1024, TU_BO_ALLOC_INTERNAL_RESOURCE, + "autotune_suballoc"); if (is_kgsl(physical_device->instance)) { tu_bo_suballocator_init(&device->kgsl_profiling_suballoc, device, - 128 * 1024, TU_BO_ALLOC_NO_FLAGS, "kgsl_profiling_suballoc"); + 128 * 1024, TU_BO_ALLOC_INTERNAL_RESOURCE, + "kgsl_profiling_suballoc"); } - result = tu_bo_init_new(device, &device->global_bo, global_size, - TU_BO_ALLOC_ALLOW_DUMP, "global"); + result = tu_bo_init_new( + device, &device->global_bo, global_size, + (enum tu_bo_alloc_flags) (TU_BO_ALLOC_ALLOW_DUMP | + TU_BO_ALLOC_INTERNAL_RESOURCE), + "global"); if (result != VK_SUCCESS) { vk_startup_errorf(device->instance, result, "BO init"); goto fail_global_bo; @@ -2488,6 +2508,7 @@ fail_dynamic_rendering: fail_empty_shaders: tu_destroy_clear_blit_shaders(device); fail_global_bo_map: + TU_RMV(resource_destroy, device, device->global_bo); tu_bo_finish(device, device->global_bo); vk_free(&device->vk.alloc, device->bo_list); fail_global_bo: @@ -2519,6 +2540,8 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) if (!device) return; + tu_memory_trace_finish(device); + if (FD_RD_DUMP(ENABLE)) fd_rd_output_fini(&device->rd_output); @@ -2633,7 +2656,7 @@ tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo) unsigned bo_size = 1ull << size_log2; VkResult result = tu_bo_init_new(dev, &dev->scratch_bos[index].bo, bo_size, - TU_BO_ALLOC_NO_FLAGS, "scratch"); + TU_BO_ALLOC_INTERNAL_RESOURCE, "scratch"); if (result != VK_SUCCESS) { mtx_unlock(&dev->scratch_bos[index].construct_mtx); return result; @@ -2804,6 +2827,8 @@ tu_AllocateMemory(VkDevice _device, mem->image = NULL; } + TU_RMV(heap_create, device, pAllocateInfo, mem); + *pMem = tu_device_memory_to_handle(mem); return VK_SUCCESS; @@ -2820,6 +2845,8 @@ tu_FreeMemory(VkDevice _device, if (mem == NULL) return; + TU_RMV(resource_destroy, device, mem); + p_atomic_add(&device->physical_device->heap.used, -mem->bo->size); tu_bo_finish(device, mem->bo); vk_object_free(&device->vk, pAllocator, mem); @@ -2934,6 +2961,8 @@ tu_BindBufferMemory2(VkDevice device, } else { buffer->bo = NULL; } + + TU_RMV(buffer_bind, dev, buffer); } return VK_SUCCESS; } @@ -2969,6 +2998,8 @@ tu_BindImageMemory2(VkDevice _device, image->map = NULL; image->iova = 0; } + + TU_RMV(image_bind, device, image); } return VK_SUCCESS; @@ -3006,6 +3037,8 @@ tu_CreateEvent(VkDevice _device, if (result != VK_SUCCESS) goto fail_map; + TU_RMV(event_create, device, pCreateInfo, event); + *pEvent = tu_event_to_handle(event); return VK_SUCCESS; @@ -3028,6 +3061,8 @@ tu_DestroyEvent(VkDevice _device, if (!event) return; + TU_RMV(resource_destroy, device, event); + tu_bo_finish(device, event->bo); vk_object_free(&device->vk, pAllocator, event); } @@ -3078,6 +3113,8 @@ tu_CreateBuffer(VkDevice _device, if (buffer == NULL) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + TU_RMV(buffer_create, device, buffer); + *pBuffer = tu_buffer_to_handle(buffer); return VK_SUCCESS; @@ -3094,6 +3131,8 @@ tu_DestroyBuffer(VkDevice _device, if (!buffer) return; + TU_RMV(buffer_destroy, device, buffer); + vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk); } diff --git a/src/freedreno/vulkan/tu_image.cc b/src/freedreno/vulkan/tu_image.cc index 8cdd2c28db1..bfa18267982 100644 --- a/src/freedreno/vulkan/tu_image.cc +++ b/src/freedreno/vulkan/tu_image.cc @@ -21,6 +21,7 @@ #include "tu_descriptor_set.h" #include "tu_device.h" #include "tu_formats.h" +#include "tu_rmv.h" uint32_t tu6_plane_count(VkFormat format) @@ -732,6 +733,8 @@ tu_CreateImage(VkDevice _device, return result; } + TU_RMV(image_create, device, image); + *pImage = tu_image_to_handle(image); #if DETECT_OS_ANDROID @@ -753,6 +756,8 @@ tu_DestroyImage(VkDevice _device, if (!image) return; + TU_RMV(image_destroy, device, image); + #if DETECT_OS_ANDROID if (image->owned_memory != VK_NULL_HANDLE) tu_FreeMemory(_device, image->owned_memory, pAllocator); diff --git a/src/freedreno/vulkan/tu_knl.h b/src/freedreno/vulkan/tu_knl.h index e9293e3d08b..52d11be8f30 100644 --- a/src/freedreno/vulkan/tu_knl.h +++ b/src/freedreno/vulkan/tu_knl.h @@ -21,6 +21,7 @@ enum tu_bo_alloc_flags TU_BO_ALLOC_ALLOW_DUMP = 1 << 0, TU_BO_ALLOC_GPU_READ_ONLY = 1 << 1, TU_BO_ALLOC_REPLAYABLE = 1 << 2, + TU_BO_ALLOC_INTERNAL_RESOURCE = 1 << 3, }; /* Define tu_timeline_sync type based on drm syncobj for a point type diff --git a/src/freedreno/vulkan/tu_knl_drm.cc b/src/freedreno/vulkan/tu_knl_drm.cc index 9f8a5ba6eb3..1e661d52b11 100644 --- a/src/freedreno/vulkan/tu_knl_drm.cc +++ b/src/freedreno/vulkan/tu_knl_drm.cc @@ -10,6 +10,7 @@ #include "tu_knl_drm.h" #include "tu_device.h" +#include "tu_rmv.h" static inline void tu_sync_cacheline_to_gpu(void const *p __attribute__((unused))) @@ -164,9 +165,12 @@ tu_drm_bo_finish(struct tu_device *dev, struct tu_bo *bo) return; } - if (bo->map) + if (bo->map) { + TU_RMV(bo_unmap, dev, bo); munmap(bo->map, bo->size); + } + TU_RMV(bo_destroy, dev, bo); tu_debug_bos_del(dev, bo); mtx_lock(&dev->bo_mutex); diff --git a/src/freedreno/vulkan/tu_knl_drm_msm.cc b/src/freedreno/vulkan/tu_knl_drm_msm.cc index e34bbc121d4..31ab576c06e 100644 --- a/src/freedreno/vulkan/tu_knl_drm_msm.cc +++ b/src/freedreno/vulkan/tu_knl_drm_msm.cc @@ -23,6 +23,7 @@ #include "tu_device.h" #include "tu_dynamic_rendering.h" #include "tu_knl_drm.h" +#include "tu_rmv.h" #include "redump.h" struct tu_queue_submit @@ -472,6 +473,8 @@ tu_bo_init(struct tu_device *dev, mtx_unlock(&dev->bo_mutex); + TU_RMV(bo_allocate, dev, bo); + return VK_SUCCESS; } @@ -544,10 +547,14 @@ msm_bo_init(struct tu_device *dev, VkResult result = tu_bo_init(dev, bo, req.handle, size, client_iova, flags, name); - if (result != VK_SUCCESS) - memset(bo, 0, sizeof(*bo)); - else + if (result == VK_SUCCESS) { *out_bo = bo; + if (flags & TU_BO_ALLOC_INTERNAL_RESOURCE) { + TU_RMV(internal_resource_create, dev, bo); + TU_RMV(resource_name, dev, bo, name); + } + } else + memset(bo, 0, sizeof(*bo)); /* We don't use bo->name here because for the !TU_DEBUG=bo case bo->name is NULL. */ tu_bo_set_kernel_name(dev, bo, name); @@ -640,6 +647,8 @@ msm_bo_map(struct tu_device *dev, struct tu_bo *bo) return vk_error(dev, VK_ERROR_MEMORY_MAP_FAILED); bo->map = map; + TU_RMV(bo_map, dev, bo); + return VK_SUCCESS; } diff --git a/src/freedreno/vulkan/tu_knl_kgsl.cc b/src/freedreno/vulkan/tu_knl_kgsl.cc index 73db6bd6f34..3faee4e9ce8 100644 --- a/src/freedreno/vulkan/tu_knl_kgsl.cc +++ b/src/freedreno/vulkan/tu_knl_kgsl.cc @@ -24,6 +24,7 @@ #include "tu_cs.h" #include "tu_device.h" #include "tu_dynamic_rendering.h" +#include "tu_rmv.h" static int safe_ioctl(int fd, unsigned long request, void *arg) @@ -117,6 +118,12 @@ kgsl_bo_init(struct tu_device *dev, *out_bo = bo; + TU_RMV(bo_allocate, dev, bo); + if (flags & TU_BO_ALLOC_INTERNAL_RESOURCE) { + TU_RMV(internal_resource_create, dev, bo); + TU_RMV(resource_name, dev, bo, name); + } + return VK_SUCCESS; } @@ -190,6 +197,7 @@ kgsl_bo_map(struct tu_device *dev, struct tu_bo *bo) return vk_error(dev, VK_ERROR_MEMORY_MAP_FAILED); bo->map = map; + TU_RMV(bo_map, dev, bo); return VK_SUCCESS; } @@ -207,8 +215,12 @@ kgsl_bo_finish(struct tu_device *dev, struct tu_bo *bo) if (!p_atomic_dec_zero(&bo->refcnt)) return; - if (bo->map) + if (bo->map) { + TU_RMV(bo_unmap, dev, bo); munmap(bo->map, bo->size); + } + + TU_RMV(bo_destroy, dev, bo); struct kgsl_gpumem_free_id req = { .id = bo->gem_handle diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc index 6ef11a907a4..f07e1d5b701 100644 --- a/src/freedreno/vulkan/tu_pipeline.cc +++ b/src/freedreno/vulkan/tu_pipeline.cc @@ -30,6 +30,7 @@ #include "tu_formats.h" #include "tu_lrz.h" #include "tu_pass.h" +#include "tu_rmv.h" /* Emit IB that preloads the descriptors that the shader uses */ @@ -1072,8 +1073,10 @@ tu_get_tess_iova(struct tu_device *dev, /* Create the shared tess factor BO the first time tess is used on the device. */ if (!dev->tess_bo) { mtx_lock(&dev->mutex); - if (!dev->tess_bo) - tu_bo_init_new(dev, &dev->tess_bo, TU_TESS_BO_SIZE, TU_BO_ALLOC_NO_FLAGS, "tess"); + if (!dev->tess_bo) { + tu_bo_init_new(dev, &dev->tess_bo, TU_TESS_BO_SIZE, + TU_BO_ALLOC_INTERNAL_RESOURCE, "tess"); + } mtx_unlock(&dev->mutex); } @@ -1403,6 +1406,7 @@ tu_pipeline_allocate_cs(struct tu_device *dev, if (result != VK_SUCCESS) return result; + TU_RMV(cmd_buffer_suballoc_bo_create, dev, &pipeline->bo); tu_cs_init_suballoc(&pipeline->cs, dev, &pipeline->bo); return VK_SUCCESS; @@ -3660,6 +3664,8 @@ tu_pipeline_finish(struct tu_pipeline *pipeline, const VkAllocationCallbacks *alloc) { tu_cs_finish(&pipeline->cs); + TU_RMV(resource_destroy, dev, &pipeline->bo); + mtx_lock(&dev->pipeline_mutex); tu_suballoc_bo_free(&dev->pipeline_suballoc, &pipeline->bo); mtx_unlock(&dev->pipeline_mutex); @@ -4045,9 +4051,11 @@ tu_graphics_pipeline_create(VkDevice device, VkResult result = tu_pipeline_builder_build(&builder, &pipeline); tu_pipeline_builder_finish(&builder); - if (result == VK_SUCCESS) + if (result == VK_SUCCESS) { + TU_RMV(graphics_pipeline_create, dev, tu_pipeline_to_graphics(pipeline)); + *pPipeline = tu_pipeline_to_handle(pipeline); - else + } else *pPipeline = VK_NULL_HANDLE; return result; @@ -4230,6 +4238,8 @@ tu_compute_pipeline_create(VkDevice device, ralloc_free(pipeline_mem_ctx); + TU_RMV(compute_pipeline_create, dev, pipeline); + *pPipeline = tu_pipeline_to_handle(&pipeline->base); return VK_SUCCESS; @@ -4294,6 +4304,8 @@ tu_DestroyPipeline(VkDevice _device, if (!_pipeline) return; + TU_RMV(resource_destroy, dev, pipeline); + tu_pipeline_finish(pipeline, dev, pAllocator); vk_object_free(&dev->vk, pAllocator, pipeline); } diff --git a/src/freedreno/vulkan/tu_query.cc b/src/freedreno/vulkan/tu_query.cc index 7b24f54f471..d0cf6662b58 100644 --- a/src/freedreno/vulkan/tu_query.cc +++ b/src/freedreno/vulkan/tu_query.cc @@ -18,6 +18,7 @@ #include "tu_cmd_buffer.h" #include "tu_cs.h" #include "tu_device.h" +#include "tu_rmv.h" #include "common/freedreno_gpu_event.h" @@ -333,6 +334,9 @@ tu_CreateQueryPool(VkDevice _device, pool->stride = slot_size; pool->size = pCreateInfo->queryCount; pool->pipeline_statistics = pCreateInfo->pipelineStatistics; + + TU_RMV(query_pool_create, device, pool); + *pQueryPool = tu_query_pool_to_handle(pool); return VK_SUCCESS; @@ -349,6 +353,8 @@ tu_DestroyQueryPool(VkDevice _device, if (!pool) return; + TU_RMV(resource_destroy, device, pool); + tu_bo_finish(device, pool->bo); vk_object_free(&device->vk, pAllocator, pool); } diff --git a/src/freedreno/vulkan/tu_rmv.cc b/src/freedreno/vulkan/tu_rmv.cc new file mode 100644 index 00000000000..2c95e967eb6 --- /dev/null +++ b/src/freedreno/vulkan/tu_rmv.cc @@ -0,0 +1,582 @@ +/* + * Copyright © 2024 Igalia S.L. + * SPDX-License-Identifier: MIT + */ + +#include "tu_rmv.h" + +#include "tu_cmd_buffer.h" +#include "tu_cs.h" +#include "tu_device.h" +#include "tu_image.h" +#include "tu_query.h" + +#include + +static VkResult +capture_trace(VkQueue _queue) +{ + TU_FROM_HANDLE(tu_queue, queue, _queue); + struct tu_device *device = queue->device; + assert(device->vk.memory_trace_data.is_enabled); + + simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); + + vk_dump_rmv_capture(&queue->device->vk.memory_trace_data); + + simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); + return VK_SUCCESS; +} + +static void +tu_rmv_fill_device_info(struct tu_device *device, + struct vk_rmv_device_info *info) +{ + struct tu_physical_device *physical_device = device->physical_device; + + /* Turnip backends only set up a single device-local heap. When available, + * the kernel-provided VA range is used, otherwise we fall back to that + * heap's calculated size. + */ + struct vk_rmv_memory_info *device_local_memory_info = + &info->memory_infos[VK_RMV_MEMORY_LOCATION_DEVICE]; + if (physical_device->has_set_iova) { + *device_local_memory_info = { + .size = physical_device->va_size, + .physical_base_address = physical_device->va_start, + }; + } else { + *device_local_memory_info = { + .size = physical_device->heap.size, .physical_base_address = 0, + }; + } + + info->memory_infos[VK_RMV_MEMORY_LOCATION_DEVICE_INVISIBLE] = { + .size = 0, .physical_base_address = 0, + }; + info->memory_infos[VK_RMV_MEMORY_LOCATION_HOST] = { + .size = 0, .physical_base_address = 0, + }; + + /* No PCI-e information to provide. Instead, we can include the device's + * chip ID in the device name string. + */ + snprintf(info->device_name, sizeof(info->device_name), "%s (0x%" PRIx64 ")", + physical_device->name, physical_device->dev_id.chip_id); + info->pcie_family_id = info->pcie_revision_id = info->pcie_device_id = 0; + + /* TODO: provide relevant information here. */ + info->vram_type = VK_RMV_MEMORY_TYPE_LPDDR5; + info->vram_operations_per_clock = info->vram_bus_width = info->vram_bandwidth = 1; + info->minimum_shader_clock = info->minimum_memory_clock = 0; + info->maximum_shader_clock = info->maximum_memory_clock = 1; +} + +void +tu_memory_trace_init(struct tu_device *device) +{ + struct vk_rmv_device_info info; + memset(&info, 0, sizeof(info)); + tu_rmv_fill_device_info(device, &info); + + vk_memory_trace_init(&device->vk, &info); + if (!device->vk.memory_trace_data.is_enabled) + return; + + device->vk.capture_trace = capture_trace; +} + +void +tu_memory_trace_finish(struct tu_device *device) +{ + vk_memory_trace_finish(&device->vk); +} + +static inline uint32_t +tu_rmv_get_resource_id_locked(struct tu_device *device, const void *resource) +{ + return vk_rmv_get_resource_id_locked(&device->vk, (uint64_t) resource); +} + +static inline void +tu_rmv_destroy_resource_id_locked(struct tu_device *device, + const void *resource) +{ + vk_rmv_destroy_resource_id_locked(&device->vk, (uint64_t) resource); +} + +static inline void +tu_rmv_emit_resource_bind_locked(struct tu_device *device, uint32_t resource_id, + uint64_t address, uint64_t size) +{ + struct vk_rmv_resource_bind_token token = { + .address = address, + .size = size, + .is_system_memory = false, + .resource_id = resource_id, + }; + vk_rmv_emit_token(&device->vk.memory_trace_data, + VK_RMV_TOKEN_TYPE_RESOURCE_BIND, &token); +} + +static inline void +tu_rmv_emit_cpu_map_locked(struct tu_device *device, uint64_t address, + bool unmapped) +{ + struct vk_rmv_cpu_map_token token = { + .address = address, + .unmapped = unmapped, + }; + vk_rmv_emit_token(&device->vk.memory_trace_data, + VK_RMV_TOKEN_TYPE_CPU_MAP, &token); +} + +static inline void +tu_rmv_emit_page_table_update_locked(struct tu_device *device, struct tu_bo *bo, + bool is_unmap) +{ + /* These tokens are mainly useful for RMV to properly associate buffer + * allocations and deallocations to a specific memory domain. + */ + struct vk_rmv_page_table_update_token token = { + .virtual_address = bo->iova, + .physical_address = bo->iova, + .page_count = DIV_ROUND_UP(bo->size, 4096), + .page_size = 4096, + .pid = 0, + .is_unmap = is_unmap, + .type = VK_RMV_PAGE_TABLE_UPDATE_TYPE_UPDATE, + }; + vk_rmv_emit_token(&device->vk.memory_trace_data, + VK_RMV_TOKEN_TYPE_PAGE_TABLE_UPDATE, &token); +} + +void +tu_rmv_log_heap_create(struct tu_device *device, + const VkMemoryAllocateInfo *allocate_info, + struct tu_device_memory *device_memory) +{ + const VkMemoryAllocateFlagsInfo *flags_info = vk_find_struct_const( + allocate_info->pNext, MEMORY_ALLOCATE_FLAGS_INFO); + + simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); + + struct vk_rmv_resource_create_token token = { + .resource_id = tu_rmv_get_resource_id_locked(device, device_memory), + .is_driver_internal = false, + .type = VK_RMV_RESOURCE_TYPE_HEAP, + .heap = { + .alloc_flags = flags_info ? flags_info->flags : 0, + .size = device_memory->bo->size, + .alignment = 4096, + .heap_index = VK_RMV_MEMORY_LOCATION_DEVICE, + }, + }; + vk_rmv_emit_token(&device->vk.memory_trace_data, + VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &token); + + tu_rmv_emit_resource_bind_locked(device, token.resource_id, + device_memory->bo->iova, + device_memory->bo->size); + + simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); +} + +void +tu_rmv_log_bo_allocate(struct tu_device *device, struct tu_bo *bo) +{ + simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); + + tu_rmv_emit_page_table_update_locked(device, bo, false); + + struct vk_rmv_virtual_allocate_token virtual_allocate_token = { + .page_count = DIV_ROUND_UP(bo->size, 4096), + .is_driver_internal = false, + .is_in_invisible_vram = false, + .address = bo->iova, + .preferred_domains = VK_RMV_KERNEL_MEMORY_DOMAIN_VRAM, + }; + vk_rmv_emit_token(&device->vk.memory_trace_data, + VK_RMV_TOKEN_TYPE_VIRTUAL_ALLOCATE, + &virtual_allocate_token); + + simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); +} + +void +tu_rmv_log_bo_destroy(struct tu_device *device, struct tu_bo *bo) +{ + simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); + + struct vk_rmv_virtual_free_token virtual_free_token = { + .address = bo->iova, + }; + vk_rmv_emit_token(&device->vk.memory_trace_data, + VK_RMV_TOKEN_TYPE_VIRTUAL_FREE, &virtual_free_token); + + tu_rmv_emit_page_table_update_locked(device, bo, true); + + simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); +} + +void +tu_rmv_log_bo_map(struct tu_device *device, struct tu_bo *bo) +{ + simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); + + tu_rmv_emit_cpu_map_locked(device, bo->iova, false); + + simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); +} + +void +tu_rmv_log_bo_unmap(struct tu_device *device, struct tu_bo *bo) +{ + simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); + + tu_rmv_emit_cpu_map_locked(device, bo->iova, true); + + simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); +} + +void +tu_rmv_log_buffer_create(struct tu_device *device, struct tu_buffer *buffer) +{ + simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); + + struct vk_rmv_resource_create_token token = { + .resource_id = tu_rmv_get_resource_id_locked(device, buffer), + .is_driver_internal = false, + .type = VK_RMV_RESOURCE_TYPE_BUFFER, + .buffer = { + .create_flags = buffer->vk.create_flags, + .usage_flags = buffer->vk.usage, + .size = buffer->vk.size, + }, + }; + vk_rmv_emit_token(&device->vk.memory_trace_data, + VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &token); + + /* Any sparse data would also be reported here, if supported. */ + + simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); +} + +void +tu_rmv_log_buffer_destroy(struct tu_device *device, struct tu_buffer *buffer) +{ + /* Any sparse data would also be reported here, if supported. */ + tu_rmv_log_resource_destroy(device, buffer); +} + +void +tu_rmv_log_buffer_bind(struct tu_device *device, struct tu_buffer *buffer) +{ + simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); + + tu_rmv_emit_resource_bind_locked(device, + tu_rmv_get_resource_id_locked(device, buffer), + buffer->bo ? buffer->iova : 0, + buffer->vk.size); + + simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); +} + +void +tu_rmv_log_image_create(struct tu_device *device, struct tu_image *image) +{ + simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); + + /* TODO: provide the image metadata information */ + struct vk_rmv_resource_create_token token = { + .resource_id = tu_rmv_get_resource_id_locked(device, image), + .is_driver_internal = false, + .type = VK_RMV_RESOURCE_TYPE_IMAGE, + .image = { + .create_flags = image->vk.create_flags, + .usage_flags = image->vk.usage, + .type = image->vk.image_type, + .extent = image->vk.extent, + .format = image->vk.format, + .num_mips = image->vk.mip_levels, + .num_slices = image->vk.array_layers, + .tiling = image->vk.tiling, + .log2_samples = util_logbase2(image->vk.samples), + .log2_storage_samples = util_logbase2(image->vk.samples), + /* any bound memory should have alignment of 4096 */ + .alignment_log2 = util_logbase2(4096), + .metadata_alignment_log2 = 0, + .image_alignment_log2 = util_logbase2(image->layout[0].base_align), + .size = image->total_size, + .metadata_size = 0, + .metadata_header_size = 0, + .metadata_offset = 0, + .metadata_header_offset = 0, + /* TODO: find a better way to determine if an image is presentable */ + .presentable = image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + }, + }; + vk_rmv_emit_token(&device->vk.memory_trace_data, + VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &token); + + /* Any sparse data would also be reported here, if supported. */ + + simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); +} + +void +tu_rmv_log_image_destroy(struct tu_device *device, struct tu_image *image) +{ + /* Any sparse data would also be reported here, if supported. */ + tu_rmv_log_resource_destroy(device, image); +} + +void +tu_rmv_log_image_bind(struct tu_device *device, struct tu_image *image) +{ + simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); + + uint64_t address = image->bo ? image->iova : 0; + uint64_t size = image->bo ? image->total_size : 0; + tu_rmv_emit_resource_bind_locked(device, + tu_rmv_get_resource_id_locked(device, image), + address, size); + + simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); +} + +static inline void +tu_rmv_log_command_allocator_create(struct tu_device *device, void *bo, + uint64_t address, uint64_t size) +{ + simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); + + struct vk_rmv_resource_create_token token = { + .resource_id = tu_rmv_get_resource_id_locked(device, bo), + .is_driver_internal = true, + .type = VK_RMV_RESOURCE_TYPE_COMMAND_ALLOCATOR, + .command_buffer = { + .preferred_domain = VK_RMV_KERNEL_MEMORY_DOMAIN_VRAM, + .executable_size = size, + .app_available_executable_size = size, + .embedded_data_size = 0, + .app_available_embedded_data_size = 0, + .scratch_size = 0, + .app_available_scratch_size = 0, + }, + }; + vk_rmv_emit_token(&device->vk.memory_trace_data, + VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &token); + + tu_rmv_emit_resource_bind_locked(device, token.resource_id, address, size); + + simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); +} + +void +tu_rmv_log_cmd_buffer_bo_create(struct tu_device *device, + struct tu_bo *bo) +{ + tu_rmv_log_command_allocator_create(device, bo, bo->iova, bo->size); +} + +void +tu_rmv_log_cmd_buffer_suballoc_bo_create(struct tu_device *device, + struct tu_suballoc_bo *suballoc_bo) +{ + tu_rmv_log_command_allocator_create(device, suballoc_bo, + suballoc_bo->iova, suballoc_bo->size); +} + +void +tu_rmv_log_query_pool_create(struct tu_device *device, + struct tu_query_pool *query_pool) +{ + simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); + + struct vk_rmv_resource_create_token token = { + .resource_id = tu_rmv_get_resource_id_locked(device, query_pool), + .is_driver_internal = false, + .type = VK_RMV_RESOURCE_TYPE_QUERY_HEAP, + .query_pool = { + .type = query_pool->type, + .has_cpu_access = true, + }, + }; + vk_rmv_emit_token(&device->vk.memory_trace_data, + VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &token); + + tu_rmv_emit_resource_bind_locked(device, token.resource_id, + query_pool->bo->iova, query_pool->bo->size); + + simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); +} + +void +tu_rmv_log_descriptor_pool_create(struct tu_device *device, + const VkDescriptorPoolCreateInfo *create_info, + struct tu_descriptor_pool *descriptor_pool) +{ + size_t pool_sizes_size = + create_info->poolSizeCount * sizeof(VkDescriptorPoolSize); + VkDescriptorPoolSize *pool_sizes = + (VkDescriptorPoolSize *) malloc(pool_sizes_size); + if (!pool_sizes) + return; + + memcpy(pool_sizes, create_info->pPoolSizes, pool_sizes_size); + + simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); + + struct vk_rmv_resource_create_token token = { + .resource_id = tu_rmv_get_resource_id_locked(device, descriptor_pool), + .is_driver_internal = false, + .type = VK_RMV_RESOURCE_TYPE_DESCRIPTOR_POOL, + .descriptor_pool = { + .max_sets = create_info->maxSets, + .pool_size_count = create_info->poolSizeCount, + .pool_sizes = pool_sizes, + }, + }; + vk_rmv_emit_token(&device->vk.memory_trace_data, + VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &token); + + if (descriptor_pool->bo) { + tu_rmv_emit_resource_bind_locked(device, token.resource_id, + descriptor_pool->bo->iova, + descriptor_pool->bo->size); + } + + simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); +} + +static inline void +tu_rmv_log_pipeline_create(struct tu_device *device, + struct tu_pipeline *pipeline) +{ + simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); + + struct vk_rmv_resource_create_token token = { + .resource_id = tu_rmv_get_resource_id_locked(device, pipeline), + .is_driver_internal = false, + .type = VK_RMV_RESOURCE_TYPE_PIPELINE, + .pipeline = { + .is_internal = false, + /* TODO: provide pipeline hash data when available. */ + .hash_lo = 0, .hash_hi = 0, + .shader_stages = pipeline->active_stages, + .is_ngg = false, + }, + }; + vk_rmv_emit_token(&device->vk.memory_trace_data, + VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &token); + + if (pipeline->bo.bo) { + tu_rmv_emit_resource_bind_locked(device, token.resource_id, + pipeline->bo.iova, pipeline->bo.size); + } + + simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); +} + +void +tu_rmv_log_graphics_pipeline_create(struct tu_device *device, + struct tu_graphics_pipeline *graphics_pipeline) +{ + tu_rmv_log_pipeline_create(device, &graphics_pipeline->base); +} + +void +tu_rmv_log_compute_pipeline_create(struct tu_device *device, + struct tu_compute_pipeline *compute_pipeline) +{ + tu_rmv_log_pipeline_create(device, &compute_pipeline->base); +} + +void +tu_rmv_log_event_create(struct tu_device *device, + const VkEventCreateInfo *create_info, + struct tu_event *event) +{ + simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); + + struct vk_rmv_resource_create_token token = { + .resource_id = tu_rmv_get_resource_id_locked(device, event), + .is_driver_internal = false, + .type = VK_RMV_RESOURCE_TYPE_GPU_EVENT, + .event = { + .flags = create_info->flags, + }, + }; + vk_rmv_emit_token(&device->vk.memory_trace_data, + VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &token); + + if (event->bo) { + tu_rmv_emit_resource_bind_locked(device, token.resource_id, + event->bo->iova, event->bo->size); + } + + simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); +} + +void +tu_rmv_log_internal_resource_create(struct tu_device *device, struct tu_bo *bo) +{ + simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); + + struct vk_rmv_resource_create_token token = { + .resource_id = tu_rmv_get_resource_id_locked(device, bo), + .is_driver_internal = true, + .type = VK_RMV_RESOURCE_TYPE_MISC_INTERNAL, + .misc_internal = { + .type = VK_RMV_MISC_INTERNAL_TYPE_PADDING, + }, + }; + vk_rmv_emit_token(&device->vk.memory_trace_data, + VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &token); + + tu_rmv_emit_resource_bind_locked(device, token.resource_id, + bo->iova, bo->size); + + simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); +} + +void +tu_rmv_log_resource_name(struct tu_device *device, const void *resource, + const char *resource_name) +{ + size_t name_len = MIN2(strlen(resource_name) + 1, 128); + char *name_buf = (char *) malloc(name_len); + if (!name_buf) + return; + + strncpy(name_buf, resource_name, name_len); + name_buf[name_len - 1] = '\0'; + + simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); + + struct vk_rmv_userdata_token token = { + .name = name_buf, + .resource_id = tu_rmv_get_resource_id_locked(device, resource) + }; + vk_rmv_emit_token(&device->vk.memory_trace_data, + VK_RMV_TOKEN_TYPE_USERDATA, &token); + + simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); +} + +void +tu_rmv_log_resource_destroy(struct tu_device *device, const void *resource) +{ + simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); + + struct vk_rmv_resource_destroy_token token = { + .resource_id = tu_rmv_get_resource_id_locked(device, resource), + }; + vk_rmv_emit_token(&device->vk.memory_trace_data, + VK_RMV_TOKEN_TYPE_RESOURCE_DESTROY, &token); + + tu_rmv_destroy_resource_id_locked(device, resource); + simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); +} + diff --git a/src/freedreno/vulkan/tu_rmv.h b/src/freedreno/vulkan/tu_rmv.h new file mode 100644 index 00000000000..2dbe36a5fde --- /dev/null +++ b/src/freedreno/vulkan/tu_rmv.h @@ -0,0 +1,84 @@ +/* + * Copyright © 2024 Igalia S.L. + * SPDX-License-Identifier: MIT + */ + +#ifndef TU_RMV_H +#define TU_RMV_H + +#include "tu_common.h" + +#include "rmv/vk_rmv_common.h" + +#define TU_RMV(func, device, ...) do { \ + if (unlikely((device)->vk.memory_trace_data.is_enabled)) \ + tu_rmv_log_##func(device, __VA_ARGS__); \ + } while(0) + +void +tu_memory_trace_init(struct tu_device *device); + +void +tu_memory_trace_finish(struct tu_device *device); + +void +tu_rmv_log_heap_create(struct tu_device *device, + const VkMemoryAllocateInfo *allocate_info, + struct tu_device_memory *device_memory); + +void +tu_rmv_log_bo_allocate(struct tu_device *device, struct tu_bo *bo); +void +tu_rmv_log_bo_destroy(struct tu_device *device, struct tu_bo *bo); +void +tu_rmv_log_bo_map(struct tu_device *device, struct tu_bo *bo); +void +tu_rmv_log_bo_unmap(struct tu_device *device, struct tu_bo *bo); + +void +tu_rmv_log_buffer_create(struct tu_device *device, struct tu_buffer *buffer); +void +tu_rmv_log_buffer_destroy(struct tu_device *device, struct tu_buffer *buffer); +void +tu_rmv_log_buffer_bind(struct tu_device *device, struct tu_buffer *buffer); + +void +tu_rmv_log_image_create(struct tu_device *device, struct tu_image *image); +void +tu_rmv_log_image_destroy(struct tu_device *device, struct tu_image *image); +void +tu_rmv_log_image_bind(struct tu_device *device, struct tu_image *image); + +void +tu_rmv_log_cmd_buffer_bo_create(struct tu_device *device, + struct tu_bo *bo); +void +tu_rmv_log_cmd_buffer_suballoc_bo_create(struct tu_device *device, + struct tu_suballoc_bo *suballoc_bo); +void +tu_rmv_log_query_pool_create(struct tu_device *device, + struct tu_query_pool *query_pool); +void +tu_rmv_log_descriptor_pool_create(struct tu_device *device, + const VkDescriptorPoolCreateInfo *create_info, + struct tu_descriptor_pool *descriptor_pool); +void +tu_rmv_log_graphics_pipeline_create(struct tu_device *device, + struct tu_graphics_pipeline *graphics_pipeline); +void +tu_rmv_log_compute_pipeline_create(struct tu_device *device, + struct tu_compute_pipeline *compute_pipeline); +void +tu_rmv_log_event_create(struct tu_device *device, + const VkEventCreateInfo *create_info, + struct tu_event *event); + +void +tu_rmv_log_internal_resource_create(struct tu_device *device, struct tu_bo *bo); +void +tu_rmv_log_resource_name(struct tu_device *device, const void *resource, + const char *resource_name); +void +tu_rmv_log_resource_destroy(struct tu_device *device, const void *resource); + +#endif /* TU_RMV_H */ diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index 7abf5c2a343..608420e47eb 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -18,8 +18,9 @@ #include "tu_device.h" #include "tu_descriptor_set.h" -#include "tu_pipeline.h" #include "tu_lrz.h" +#include "tu_pipeline.h" +#include "tu_rmv.h" #include @@ -2078,7 +2079,7 @@ tu_setup_pvtmem(struct tu_device *dev, dev->physical_device->info->num_sp_cores * pvtmem_bo->per_sp_size; VkResult result = tu_bo_init_new(dev, &pvtmem_bo->bo, total_size, - TU_BO_ALLOC_NO_FLAGS, "pvtmem"); + TU_BO_ALLOC_INTERNAL_RESOURCE, "pvtmem"); if (result != VK_SUCCESS) { mtx_unlock(&pvtmem_bo->mtx); return result; @@ -2190,6 +2191,7 @@ tu_upload_shader(struct tu_device *dev, return result; } + TU_RMV(cmd_buffer_suballoc_bo_create, dev, &shader->bo); tu_cs_init_suballoc(&shader->cs, dev, &shader->bo); uint64_t iova = tu_upload_variant(&shader->cs, v); @@ -2886,6 +2888,7 @@ tu_empty_shader_create(struct tu_device *dev, return result; } + TU_RMV(cmd_buffer_suballoc_bo_create, dev, &shader->bo); tu_cs_init_suballoc(&shader->cs, dev, &shader->bo); struct tu_pvtmem_config pvtmem_config = { }; @@ -2987,6 +2990,7 @@ tu_shader_destroy(struct tu_device *dev, struct tu_shader *shader) { tu_cs_finish(&shader->cs); + TU_RMV(resource_destroy, dev, &shader->bo); pthread_mutex_lock(&dev->pipeline_mutex); tu_suballoc_bo_free(&dev->pipeline_suballoc, &shader->bo);