tu: Enable bufferDeviceAddressCaptureReplay

Now that we are able to set iova from userspace we could support the replay feature of BDA. In order to prevent address clashing between ordinary allocations and replayable ones we allocate: - ordinary allocations - from the lowest address; - replayable allocations - from the highest address. Passes: dEQP-VK.binding_model.buffer_device_address.*replay* Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15676>
2025-12-24 19:40:10 +01:00 · 2022-08-18 12:47:04 +03:00 · 2022-08-18 12:47:04 +03:00 · c20139c292
commit c20139c292
parent e23c4fbd9b
4 changed files with 83 additions and 18 deletions
--- a/src/freedreno/vulkan/tu_device.c
+++ b/src/freedreno/vulkan/tu_device.c
@ -618,7 +618,7 @@ tu_get_physical_device_features_1_2(struct tu_physical_device *pdevice,
   features->hostQueryReset                      = true;
   features->timelineSemaphore                   = true;
   features->bufferDeviceAddress                 = true;
-   features->bufferDeviceAddressCaptureReplay    = false;
+   features->bufferDeviceAddressCaptureReplay    = pdevice->has_set_iova;
   features->bufferDeviceAddressMultiDevice      = false;
   features->vulkanMemoryModel                   = true;
   features->vulkanMemoryModelDeviceScope        = true;
@ -2339,9 +2339,28 @@ tu_AllocateMemory(VkDevice _device,
         close(fd_info->fd);
      }
   } else {
-      result =
-         tu_bo_init_new(device, &mem->bo, pAllocateInfo->allocationSize,
-                        TU_BO_ALLOC_NO_FLAGS);
+      uint64_t client_address = 0;
+      enum tu_bo_alloc_flags alloc_flags = TU_BO_ALLOC_NO_FLAGS;
+
+      const VkMemoryOpaqueCaptureAddressAllocateInfo *replay_info =
+         vk_find_struct_const(pAllocateInfo->pNext,
+                              MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO);
+      if (replay_info && replay_info->opaqueCaptureAddress) {
+         client_address = replay_info->opaqueCaptureAddress;
+         alloc_flags |= TU_BO_ALLOC_REPLAYABLE;
+      }
+
+      const VkMemoryAllocateFlagsInfo *flags_info = vk_find_struct_const(
+         pAllocateInfo->pNext, MEMORY_ALLOCATE_FLAGS_INFO);
+      if (flags_info &&
+          (flags_info->flags &
+           VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT)) {
+         alloc_flags |= TU_BO_ALLOC_REPLAYABLE;
+      }
+
+      result = tu_bo_init_new_explicit_iova(device, &mem->bo,
+                                            pAllocateInfo->allocationSize,
+                                            client_address, alloc_flags);
   }


@ -3022,7 +3041,7 @@ uint64_t tu_GetBufferOpaqueCaptureAddress(
    VkDevice                                    device,
    const VkBufferDeviceAddressInfo*            pInfo)
 {
-   tu_stub();
+   /* We care only about memory allocation opaque addresses */
   return 0;
 }

@ -3030,6 +3049,6 @@ uint64_t tu_GetDeviceMemoryOpaqueCaptureAddress(
    VkDevice                                    device,
    const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
 {
-   tu_stub();
-   return 0;
+   TU_FROM_HANDLE(tu_device_memory, mem, pInfo->memory);
+   return mem->bo->iova;
 }
--- a/src/freedreno/vulkan/tu_drm.c
+++ b/src/freedreno/vulkan/tu_drm.c
@ -216,12 +216,35 @@ static VkResult
 tu_allocate_userspace_iova(struct tu_device *dev,
                           uint32_t gem_handle,
                           uint64_t size,
+                           uint64_t client_iova,
+                           enum tu_bo_alloc_flags flags,
                           uint64_t *iova)
 {
   mtx_lock(&dev->physical_device->vma_mutex);

-   dev->physical_device->vma.alloc_high = false;
-   *iova = util_vma_heap_alloc(&dev->physical_device->vma, size, 0x1000);
+   *iova = 0;
+
+   if (flags & TU_BO_ALLOC_REPLAYABLE) {
+      if (client_iova) {
+         if (util_vma_heap_alloc_addr(&dev->physical_device->vma, client_iova,
+                                      size)) {
+            *iova = client_iova;
+         } else {
+            return VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS;
+         }
+      } else {
+         /* We have to separate replayable IOVAs from ordinary one in order to
+          * for them not to clash. The easiest way to do this is to allocate
+          * them from the other end of the address space.
+          */
+         dev->physical_device->vma.alloc_high = true;
+         *iova =
+            util_vma_heap_alloc(&dev->physical_device->vma, size, 0x1000);
+      }
+   } else {
+      dev->physical_device->vma.alloc_high = false;
+      *iova = util_vma_heap_alloc(&dev->physical_device->vma, size, 0x1000);
+   }

   mtx_unlock(&dev->physical_device->vma_mutex);

@ -259,13 +282,17 @@ tu_bo_init(struct tu_device *dev,
           struct tu_bo *bo,
           uint32_t gem_handle,
           uint64_t size,
+           uint64_t client_iova,
           enum tu_bo_alloc_flags flags)
 {
   VkResult result = VK_SUCCESS;
   uint64_t iova = 0;

+   assert(!client_iova || dev->physical_device->has_set_iova);
+
   if (dev->physical_device->has_set_iova) {
-      result = tu_allocate_userspace_iova(dev, gem_handle, size, &iova);
+      result = tu_allocate_userspace_iova(dev, gem_handle, size, client_iova,
+                                          flags, &iova);
   } else {
      result = tu_allocate_kernel_iova(dev, gem_handle, &iova);
   }
@ -317,8 +344,11 @@ fail_bo_list:
 }

 VkResult
-tu_bo_init_new(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
-               enum tu_bo_alloc_flags flags)
+tu_bo_init_new_explicit_iova(struct tu_device *dev,
+                             struct tu_bo **out_bo,
+                             uint64_t size,
+                             uint64_t client_iova,
+                             enum tu_bo_alloc_flags flags)
 {
   /* TODO: Choose better flags. As of 2018-11-12, freedreno/drm/msm_bo.c
    * always sets `flags = MSM_BO_WC`, and we copy that behavior here.
@ -340,7 +370,7 @@ tu_bo_init_new(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
   assert(bo && bo->gem_handle == 0);

   VkResult result =
-      tu_bo_init(dev, bo, req.handle, size, flags);
+      tu_bo_init(dev, bo, req.handle, size, client_iova, flags);

   if (result != VK_SUCCESS)
      memset(bo, 0, sizeof(*bo));
@ -389,7 +419,7 @@ tu_bo_init_dmabuf(struct tu_device *dev,
   }

   VkResult result =
-      tu_bo_init(dev, bo, gem_handle, size, TU_BO_ALLOC_NO_FLAGS);
+      tu_bo_init(dev, bo, gem_handle, size, 0, TU_BO_ALLOC_NO_FLAGS);

   if (result != VK_SUCCESS)
      memset(bo, 0, sizeof(*bo));
--- a/src/freedreno/vulkan/tu_drm.h
+++ b/src/freedreno/vulkan/tu_drm.h
@ -26,6 +26,7 @@ enum tu_bo_alloc_flags
   TU_BO_ALLOC_NO_FLAGS = 0,
   TU_BO_ALLOC_ALLOW_DUMP = 1 << 0,
   TU_BO_ALLOC_GPU_READ_ONLY = 1 << 1,
+   TU_BO_ALLOC_REPLAYABLE = 1 << 2,
 };

 /* Define tu_timeline_sync type based on drm syncobj for a point type
@ -67,8 +68,18 @@ struct tu_timeline_sync {
 };

 VkResult
-tu_bo_init_new(struct tu_device *dev, struct tu_bo **bo, uint64_t size,
-               enum tu_bo_alloc_flags flags);
+tu_bo_init_new_explicit_iova(struct tu_device *dev,
+                             struct tu_bo **out_bo,
+                             uint64_t size,
+                             uint64_t client_iova,
+                             enum tu_bo_alloc_flags flags);
+
+static inline VkResult
+tu_bo_init_new(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
+               enum tu_bo_alloc_flags flags)
+{
+   return tu_bo_init_new_explicit_iova(dev, out_bo, size, 0, flags);
+}

 VkResult
 tu_bo_init_dmabuf(struct tu_device *dev,
--- a/src/freedreno/vulkan/tu_kgsl.c
+++ b/src/freedreno/vulkan/tu_kgsl.c
@ -69,9 +69,14 @@ tu_drm_submitqueue_close(const struct tu_device *dev, uint32_t queue_id)
 }

 VkResult
-tu_bo_init_new(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
-               enum tu_bo_alloc_flags flags)
+tu_bo_init_new_explicit_iova(struct tu_device *dev,
+                             struct tu_bo **out_bo,
+                             uint64_t size,
+                             uint64_t client_iova,
+                             enum tu_bo_alloc_flags flags)
 {
+   assert(client_iova == 0);
+
   struct kgsl_gpumem_alloc_id req = {
      .size = size,
   };