From c20139c2922276238c12dc6a6cf0fbbc39e9d732 Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Thu, 18 Aug 2022 12:47:04 +0300 Subject: [PATCH] tu: Enable bufferDeviceAddressCaptureReplay Now that we are able to set iova from userspace we could support the replay feature of BDA. In order to prevent address clashing between ordinary allocations and replayable ones we allocate: - ordinary allocations - from the lowest address; - replayable allocations - from the highest address. Passes: dEQP-VK.binding_model.buffer_device_address.*replay* Signed-off-by: Danylo Piliaiev Part-of: --- src/freedreno/vulkan/tu_device.c | 33 +++++++++++++++++++----- src/freedreno/vulkan/tu_drm.c | 44 +++++++++++++++++++++++++++----- src/freedreno/vulkan/tu_drm.h | 15 +++++++++-- src/freedreno/vulkan/tu_kgsl.c | 9 +++++-- 4 files changed, 83 insertions(+), 18 deletions(-) diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index 415e0253952..f1ac184a58d 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -618,7 +618,7 @@ tu_get_physical_device_features_1_2(struct tu_physical_device *pdevice, features->hostQueryReset = true; features->timelineSemaphore = true; features->bufferDeviceAddress = true; - features->bufferDeviceAddressCaptureReplay = false; + features->bufferDeviceAddressCaptureReplay = pdevice->has_set_iova; features->bufferDeviceAddressMultiDevice = false; features->vulkanMemoryModel = true; features->vulkanMemoryModelDeviceScope = true; @@ -2339,9 +2339,28 @@ tu_AllocateMemory(VkDevice _device, close(fd_info->fd); } } else { - result = - tu_bo_init_new(device, &mem->bo, pAllocateInfo->allocationSize, - TU_BO_ALLOC_NO_FLAGS); + uint64_t client_address = 0; + enum tu_bo_alloc_flags alloc_flags = TU_BO_ALLOC_NO_FLAGS; + + const VkMemoryOpaqueCaptureAddressAllocateInfo *replay_info = + vk_find_struct_const(pAllocateInfo->pNext, + MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO); + if (replay_info && replay_info->opaqueCaptureAddress) { + client_address = replay_info->opaqueCaptureAddress; + alloc_flags |= TU_BO_ALLOC_REPLAYABLE; + } + + const VkMemoryAllocateFlagsInfo *flags_info = vk_find_struct_const( + pAllocateInfo->pNext, MEMORY_ALLOCATE_FLAGS_INFO); + if (flags_info && + (flags_info->flags & + VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT)) { + alloc_flags |= TU_BO_ALLOC_REPLAYABLE; + } + + result = tu_bo_init_new_explicit_iova(device, &mem->bo, + pAllocateInfo->allocationSize, + client_address, alloc_flags); } @@ -3022,7 +3041,7 @@ uint64_t tu_GetBufferOpaqueCaptureAddress( VkDevice device, const VkBufferDeviceAddressInfo* pInfo) { - tu_stub(); + /* We care only about memory allocation opaque addresses */ return 0; } @@ -3030,6 +3049,6 @@ uint64_t tu_GetDeviceMemoryOpaqueCaptureAddress( VkDevice device, const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo) { - tu_stub(); - return 0; + TU_FROM_HANDLE(tu_device_memory, mem, pInfo->memory); + return mem->bo->iova; } diff --git a/src/freedreno/vulkan/tu_drm.c b/src/freedreno/vulkan/tu_drm.c index 94936978e93..f4667ca8dfe 100644 --- a/src/freedreno/vulkan/tu_drm.c +++ b/src/freedreno/vulkan/tu_drm.c @@ -216,12 +216,35 @@ static VkResult tu_allocate_userspace_iova(struct tu_device *dev, uint32_t gem_handle, uint64_t size, + uint64_t client_iova, + enum tu_bo_alloc_flags flags, uint64_t *iova) { mtx_lock(&dev->physical_device->vma_mutex); - dev->physical_device->vma.alloc_high = false; - *iova = util_vma_heap_alloc(&dev->physical_device->vma, size, 0x1000); + *iova = 0; + + if (flags & TU_BO_ALLOC_REPLAYABLE) { + if (client_iova) { + if (util_vma_heap_alloc_addr(&dev->physical_device->vma, client_iova, + size)) { + *iova = client_iova; + } else { + return VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS; + } + } else { + /* We have to separate replayable IOVAs from ordinary one in order to + * for them not to clash. The easiest way to do this is to allocate + * them from the other end of the address space. + */ + dev->physical_device->vma.alloc_high = true; + *iova = + util_vma_heap_alloc(&dev->physical_device->vma, size, 0x1000); + } + } else { + dev->physical_device->vma.alloc_high = false; + *iova = util_vma_heap_alloc(&dev->physical_device->vma, size, 0x1000); + } mtx_unlock(&dev->physical_device->vma_mutex); @@ -259,13 +282,17 @@ tu_bo_init(struct tu_device *dev, struct tu_bo *bo, uint32_t gem_handle, uint64_t size, + uint64_t client_iova, enum tu_bo_alloc_flags flags) { VkResult result = VK_SUCCESS; uint64_t iova = 0; + assert(!client_iova || dev->physical_device->has_set_iova); + if (dev->physical_device->has_set_iova) { - result = tu_allocate_userspace_iova(dev, gem_handle, size, &iova); + result = tu_allocate_userspace_iova(dev, gem_handle, size, client_iova, + flags, &iova); } else { result = tu_allocate_kernel_iova(dev, gem_handle, &iova); } @@ -317,8 +344,11 @@ fail_bo_list: } VkResult -tu_bo_init_new(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size, - enum tu_bo_alloc_flags flags) +tu_bo_init_new_explicit_iova(struct tu_device *dev, + struct tu_bo **out_bo, + uint64_t size, + uint64_t client_iova, + enum tu_bo_alloc_flags flags) { /* TODO: Choose better flags. As of 2018-11-12, freedreno/drm/msm_bo.c * always sets `flags = MSM_BO_WC`, and we copy that behavior here. @@ -340,7 +370,7 @@ tu_bo_init_new(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size, assert(bo && bo->gem_handle == 0); VkResult result = - tu_bo_init(dev, bo, req.handle, size, flags); + tu_bo_init(dev, bo, req.handle, size, client_iova, flags); if (result != VK_SUCCESS) memset(bo, 0, sizeof(*bo)); @@ -389,7 +419,7 @@ tu_bo_init_dmabuf(struct tu_device *dev, } VkResult result = - tu_bo_init(dev, bo, gem_handle, size, TU_BO_ALLOC_NO_FLAGS); + tu_bo_init(dev, bo, gem_handle, size, 0, TU_BO_ALLOC_NO_FLAGS); if (result != VK_SUCCESS) memset(bo, 0, sizeof(*bo)); diff --git a/src/freedreno/vulkan/tu_drm.h b/src/freedreno/vulkan/tu_drm.h index 1fc057a82c2..b27dbbb3478 100644 --- a/src/freedreno/vulkan/tu_drm.h +++ b/src/freedreno/vulkan/tu_drm.h @@ -26,6 +26,7 @@ enum tu_bo_alloc_flags TU_BO_ALLOC_NO_FLAGS = 0, TU_BO_ALLOC_ALLOW_DUMP = 1 << 0, TU_BO_ALLOC_GPU_READ_ONLY = 1 << 1, + TU_BO_ALLOC_REPLAYABLE = 1 << 2, }; /* Define tu_timeline_sync type based on drm syncobj for a point type @@ -67,8 +68,18 @@ struct tu_timeline_sync { }; VkResult -tu_bo_init_new(struct tu_device *dev, struct tu_bo **bo, uint64_t size, - enum tu_bo_alloc_flags flags); +tu_bo_init_new_explicit_iova(struct tu_device *dev, + struct tu_bo **out_bo, + uint64_t size, + uint64_t client_iova, + enum tu_bo_alloc_flags flags); + +static inline VkResult +tu_bo_init_new(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size, + enum tu_bo_alloc_flags flags) +{ + return tu_bo_init_new_explicit_iova(dev, out_bo, size, 0, flags); +} VkResult tu_bo_init_dmabuf(struct tu_device *dev, diff --git a/src/freedreno/vulkan/tu_kgsl.c b/src/freedreno/vulkan/tu_kgsl.c index b8d6350a25b..3f5dca645e7 100644 --- a/src/freedreno/vulkan/tu_kgsl.c +++ b/src/freedreno/vulkan/tu_kgsl.c @@ -69,9 +69,14 @@ tu_drm_submitqueue_close(const struct tu_device *dev, uint32_t queue_id) } VkResult -tu_bo_init_new(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size, - enum tu_bo_alloc_flags flags) +tu_bo_init_new_explicit_iova(struct tu_device *dev, + struct tu_bo **out_bo, + uint64_t size, + uint64_t client_iova, + enum tu_bo_alloc_flags flags) { + assert(client_iova == 0); + struct kgsl_gpumem_alloc_id req = { .size = size, };