radv/amdgpu: remove the virtual BOs tracking logic

All BOs allocated by applications are local BOs, so this can be removed completely. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40466>
2026-05-06 09:28:07 +02:00 · 2026-03-17 14:31:48 +01:00 · 2026-03-17 14:31:48 +01:00 · fb195bd6bd
commit fb195bd6bd
parent 2cf84eedb9
3 changed files with 6 additions and 304 deletions
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
@ -80,52 +80,6 @@ radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, uint32_t bo_handle, uint64_t
   return r;
 }

-static int
-bo_comparator(const void *ap, const void *bp)
-{
-   struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap;
-   struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp;
-   return (a > b) ? 1 : (a < b) ? -1 : 0;
-}
-
-static VkResult
-radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
-{
-   u_rwlock_wrlock(&bo->lock);
-
-   if (bo->bo_capacity < bo->range_count) {
-      uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count);
-      struct radv_amdgpu_winsys_bo **bos = realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
-      if (!bos) {
-         u_rwlock_wrunlock(&bo->lock);
-         return VK_ERROR_OUT_OF_HOST_MEMORY;
-      }
-      bo->bos = bos;
-      bo->bo_capacity = new_count;
-   }
-
-   uint32_t temp_bo_count = 0;
-   for (uint32_t i = 0; i < bo->range_count; ++i)
-      if (bo->ranges[i].bo)
-         bo->bos[temp_bo_count++] = bo->ranges[i].bo;
-
-   qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator);
-
-   if (!temp_bo_count) {
-      bo->bo_count = 0;
-   } else {
-      uint32_t final_bo_count = 1;
-      for (uint32_t i = 1; i < temp_bo_count; ++i)
-         if (bo->bos[i] != bo->bos[i - 1])
-            bo->bos[final_bo_count++] = bo->bos[i];
-
-      bo->bo_count = final_bo_count;
-   }
-
-   u_rwlock_wrunlock(&bo->lock);
-   return VK_SUCCESS;
-}
-
 static uint64_t
 radv_amdgpu_canonicalize_va(uint64_t va)
 {
@ -231,10 +185,6 @@ radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_wins
   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
   struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
   struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo *)_bo;
-   int range_count_delta, new_idx;
-   int first = 0, last;
-   struct radv_amdgpu_map_range new_first, new_last;
-   VkResult result;
   int r;

   assert(parent->base.is_virtual);
@ -254,116 +204,6 @@ radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_wins
      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
   }

-   /* Do not add the BO to the virtual BO list if it's already in the global list to avoid dangling
-    * BO references because it might have been destroyed without being previously unbound. Resetting
-    * it to NULL clears the old BO ranges if present.
-    *
-    * This is going to be clarified in the Vulkan spec:
-    * https://gitlab.khronos.org/vulkan/vulkan/-/issues/3125
-    *
-    * The issue still exists for non-global BO but it will be addressed later, once we are 100% it's
-    * RADV fault (mostly because the solution looks more complicated).
-    */
-   if (bo && radv_buffer_is_resident(&bo->base)) {
-      bo = NULL;
-      bo_offset = 0;
-   }
-
-   /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that
-    * contains the newly bound range). */
-   if (parent->range_capacity - parent->range_count < 2) {
-      uint32_t range_capacity = parent->range_capacity + 2;
-      struct radv_amdgpu_map_range *ranges =
-         realloc(parent->ranges, range_capacity * sizeof(struct radv_amdgpu_map_range));
-      if (!ranges)
-         return VK_ERROR_OUT_OF_HOST_MEMORY;
-      parent->ranges = ranges;
-      parent->range_capacity = range_capacity;
-   }
-
-   /*
-    * [first, last] is exactly the range of ranges that either overlap the
-    * new parent, or are adjacent to it. This corresponds to the bind ranges
-    * that may change.
-    */
-   while (first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset)
-      ++first;
-
-   last = first;
-   while (last + 1 < parent->range_count && parent->ranges[last + 1].offset <= offset + size)
-      ++last;
-
-   /* Whether the first or last range are going to be totally removed or just
-    * resized/left alone. Note that in the case of first == last, we will split
-    * this into a part before and after the new range. The remove flag is then
-    * whether to not create the corresponding split part. */
-   bool remove_first = parent->ranges[first].offset == offset;
-   bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
-
-   assert(parent->ranges[first].offset <= offset);
-   assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
-
-   /* Try to merge the new range with the first range. */
-   if (parent->ranges[first].bo == bo &&
-       (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
-      size += offset - parent->ranges[first].offset;
-      offset = parent->ranges[first].offset;
-      bo_offset = parent->ranges[first].bo_offset;
-      remove_first = true;
-   }
-
-   /* Try to merge the new range with the last range. */
-   if (parent->ranges[last].bo == bo &&
-       (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
-      size = parent->ranges[last].offset + parent->ranges[last].size - offset;
-      remove_last = true;
-   }
-
-   range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
-   new_idx = first + !remove_first;
-
-   /* If the first/last range are not left alone we unmap then and optionally map
-    * them again after modifications. Not that this implicitly can do the splitting
-    * if first == last. */
-   new_first = parent->ranges[first];
-   new_last = parent->ranges[last];
-
-   if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
-      if (!remove_first) {
-         new_first.size = offset - new_first.offset;
-      }
-   }
-
-   if (parent->ranges[last].offset < offset + size || remove_last) {
-      if (!remove_last) {
-         new_last.size -= offset + size - new_last.offset;
-         new_last.bo_offset += (offset + size - new_last.offset);
-         new_last.offset = offset + size;
-      }
-   }
-
-   /* Moves the range list after last to account for the changed number of ranges. */
-   memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
-           sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
-
-   if (!remove_first)
-      parent->ranges[first] = new_first;
-
-   if (!remove_last)
-      parent->ranges[new_idx + 1] = new_last;
-
-   /* Actually set up the new range. */
-   parent->ranges[new_idx].offset = offset;
-   parent->ranges[new_idx].size = size;
-   parent->ranges[new_idx].bo = bo;
-   parent->ranges[new_idx].bo_offset = bo_offset;
-
-   parent->range_count += range_count_delta;
-
-   result = radv_amdgpu_winsys_rebuild_bo_list(parent);
-   if (result != VK_SUCCESS)
-      return result;
-
   return VK_SUCCESS;
 }

@ -447,9 +287,6 @@ radv_amdgpu_winsys_virtual_bo_destroy(struct radeon_winsys *_ws, struct radeon_w
      fprintf(stderr, "radv/amdgpu: Failed to clear a PRT VA region (%d).\n", r);
   }

-   free(bo->bos);
-   free(bo->ranges);
-   u_rwlock_destroy(&bo->lock);
   ac_drm_va_range_free(bo->va_handle);
   FREE(bo);
 }
@ -499,7 +336,6 @@ radv_amdgpu_winsys_virtual_bo_create(struct radeon_winsys *_ws, uint64_t size, u
 {
   struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
   struct radv_amdgpu_winsys_bo *bo;
-   struct radv_amdgpu_map_range *ranges = NULL;
   uint64_t va = 0;
   amdgpu_va_handle va_handle;
   int r;
@ -537,23 +373,6 @@ radv_amdgpu_winsys_virtual_bo_create(struct radeon_winsys *_ws, uint64_t size, u
   bo->va_handle = va_handle;
   bo->base.is_virtual = true;

-   ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
-   if (!ranges) {
-      result = VK_ERROR_OUT_OF_HOST_MEMORY;
-      goto error_ranges_alloc;
-   }
-
-   u_rwlock_init(&bo->lock);
-
-   bo->ranges = ranges;
-   bo->range_count = 1;
-   bo->range_capacity = 1;
-
-   bo->ranges[0].offset = 0;
-   bo->ranges[0].size = size;
-   bo->ranges[0].bo = NULL;
-   bo->ranges[0].bo_offset = 0;
-
   /* Reserve a PRT VA region. */
   r = radv_amdgpu_virtual_bo_init_mapping(ws, bo, va_size);
   if (r) {
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h
@ -25,40 +25,16 @@ struct radv_amdgpu_winsys_bo_log {
   uint8_t virtual_mapping : 1;
 };

-struct radv_amdgpu_map_range {
-   uint64_t offset;
-   uint64_t size;
-   struct radv_amdgpu_winsys_bo *bo;
-   uint64_t bo_offset;
-};
-
 struct radv_amdgpu_winsys_bo {
   struct radeon_winsys_bo base;
   amdgpu_va_handle va_handle;
   uint32_t flags;
   uint8_t priority;

-   union {
-      /* physical bo */
-      struct {
-         ac_drm_bo bo;
-         uint32_t bo_handle;
+   ac_drm_bo bo;
+   uint32_t bo_handle;

-         void *cpu_map;
-      };
-      /* virtual bo */
-      struct {
-         struct u_rwlock lock;
-
-         struct radv_amdgpu_map_range *ranges;
-         uint32_t range_count;
-         uint32_t range_capacity;
-
-         struct radv_amdgpu_winsys_bo **bos;
-         uint32_t bo_count;
-         uint32_t bo_capacity;
-      };
-   };
+   void *cpu_map;
 };

 static inline struct radv_amdgpu_winsys_bo *
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@ -40,8 +40,6 @@
 /* Maximum allowed total number of submitted IBs. */
 #define RADV_MAX_IBS_PER_SUBMIT 192

-enum { VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024 };
-
 struct radv_amdgpu_ib {
   struct radeon_winsys_bo *bo; /* NULL when not owned by the current CS object */
   uint64_t va;
@ -79,11 +77,6 @@ struct radv_amdgpu_cs {
   int buffer_hash_table[1024];
   unsigned hw_ip;

-   unsigned num_virtual_buffers;
-   unsigned max_num_virtual_buffers;
-   struct radeon_winsys_bo **virtual_buffers;
-   int *virtual_buffer_hash_table;
-
   struct hash_table *annotations;
 };

@ -193,8 +186,6 @@ radv_amdgpu_cs_destroy(struct ac_cmdbuf *rcs)
      cs->ws->base.buffer_destroy(&cs->ws->base, cs->ib_buffers[i].bo);

   free(cs->ib_buffers);
-   free(cs->virtual_buffers);
-   free(cs->virtual_buffer_hash_table);
   free(cs->handles);
   free(cs);
 }
@ -533,13 +524,7 @@ radv_amdgpu_cs_reset(struct ac_cmdbuf *_cs)
      cs->buffer_hash_table[hash] = -1;
   }

-   for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
-      unsigned hash = ((uintptr_t)cs->virtual_buffers[i] >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
-      cs->virtual_buffer_hash_table[hash] = -1;
-   }
-
   cs->num_buffers = 0;
-   cs->num_virtual_buffers = 0;

   /* When the CS is finalized and IBs are not allowed, use last IB. */
   assert(cs->ib_buffer || cs->num_ib_buffers);
@ -661,55 +646,6 @@ radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs, uint32_t bo, uint8
   ++cs->num_buffers;
 }

-static void
-radv_amdgpu_cs_add_virtual_buffer(struct ac_cmdbuf *_cs, struct radeon_winsys_bo *bo)
-{
-   struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
-   unsigned hash = ((uintptr_t)bo >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
-
-   if (!cs->virtual_buffer_hash_table) {
-      int *virtual_buffer_hash_table = malloc(VIRTUAL_BUFFER_HASH_TABLE_SIZE * sizeof(int));
-      if (!virtual_buffer_hash_table) {
-         cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
-         return;
-      }
-      cs->virtual_buffer_hash_table = virtual_buffer_hash_table;
-
-      for (int i = 0; i < VIRTUAL_BUFFER_HASH_TABLE_SIZE; ++i)
-         cs->virtual_buffer_hash_table[i] = -1;
-   }
-
-   if (cs->virtual_buffer_hash_table[hash] >= 0) {
-      int idx = cs->virtual_buffer_hash_table[hash];
-      if (cs->virtual_buffers[idx] == bo) {
-         return;
-      }
-      for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
-         if (cs->virtual_buffers[i] == bo) {
-            cs->virtual_buffer_hash_table[hash] = i;
-            return;
-         }
-      }
-   }
-
-   if (cs->max_num_virtual_buffers <= cs->num_virtual_buffers) {
-      unsigned max_num_virtual_buffers = MAX2(2, cs->max_num_virtual_buffers * 2);
-      struct radeon_winsys_bo **virtual_buffers =
-         realloc(cs->virtual_buffers, sizeof(struct radeon_winsys_bo *) * max_num_virtual_buffers);
-      if (!virtual_buffers) {
-         cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
-         return;
-      }
-      cs->max_num_virtual_buffers = max_num_virtual_buffers;
-      cs->virtual_buffers = virtual_buffers;
-   }
-
-   cs->virtual_buffers[cs->num_virtual_buffers] = bo;
-
-   cs->virtual_buffer_hash_table[hash] = cs->num_virtual_buffers;
-   ++cs->num_virtual_buffers;
-}
-
 static void
 radv_amdgpu_cs_add_buffer(struct ac_cmdbuf *_cs, struct radeon_winsys_bo *_bo)
 {
@ -719,10 +655,8 @@ radv_amdgpu_cs_add_buffer(struct ac_cmdbuf *_cs, struct radeon_winsys_bo *_bo)
   if (cs->status != VK_SUCCESS)
      return;

-   if (bo->base.is_virtual) {
-      radv_amdgpu_cs_add_virtual_buffer(_cs, _bo);
+   if (bo->base.is_virtual)
      return;
-   }

   radv_amdgpu_cs_add_buffer_internal(cs, bo->bo_handle, bo->priority);
 }
@ -776,10 +710,6 @@ radv_amdgpu_cs_execute_secondary(struct ac_cmdbuf *_parent, struct ac_cmdbuf *_c
      radv_amdgpu_cs_add_buffer_internal(parent, child->handles[i].bo_handle, child->handles[i].bo_priority);
   }

-   for (unsigned i = 0; i < child->num_virtual_buffers; ++i) {
-      radv_amdgpu_cs_add_buffer(&parent->base, child->virtual_buffers[i]);
-   }
-
   if (use_ib2) {
      radv_amdgpu_cs_emit_secondary_ib2(parent, child);
   } else {
@ -911,8 +841,6 @@ radv_amdgpu_count_cs_bo(struct radv_amdgpu_cs *start_cs)

   for (struct radv_amdgpu_cs *cs = start_cs; cs; cs = cs->chained_to) {
      num_bo += cs->num_buffers;
-      for (unsigned j = 0; j < cs->num_virtual_buffers; ++j)
-         num_bo += radv_amdgpu_winsys_bo(cs->virtual_buffers[j])->bo_count;
   }

   return num_bo;
@ -936,7 +864,7 @@ radv_amdgpu_add_cs_to_bo_list(struct radv_amdgpu_cs *cs, struct drm_amdgpu_bo_li
   if (!cs->num_buffers)
      return num_handles;

-   if (num_handles == 0 && !cs->num_virtual_buffers) {
+   if (num_handles == 0) {
      memcpy(handles, cs->handles, cs->num_buffers * sizeof(struct drm_amdgpu_bo_list_entry));
      return cs->num_buffers;
   }
@ -955,26 +883,6 @@ radv_amdgpu_add_cs_to_bo_list(struct radv_amdgpu_cs *cs, struct drm_amdgpu_bo_li
         ++num_handles;
      }
   }
-   for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) {
-      struct radv_amdgpu_winsys_bo *virtual_bo = radv_amdgpu_winsys_bo(cs->virtual_buffers[j]);
-      u_rwlock_rdlock(&virtual_bo->lock);
-      for (unsigned k = 0; k < virtual_bo->bo_count; ++k) {
-         struct radv_amdgpu_winsys_bo *bo = virtual_bo->bos[k];
-         bool found = false;
-         for (unsigned m = 0; m < num_handles; ++m) {
-            if (handles[m].bo_handle == bo->bo_handle) {
-               found = true;
-               break;
-            }
-         }
-         if (!found) {
-            handles[num_handles].bo_handle = bo->bo_handle;
-            handles[num_handles].bo_priority = bo->priority;
-            ++num_handles;
-         }
-      }
-      u_rwlock_rdunlock(&virtual_bo->lock);
-   }

   return num_handles;
 }
@ -1020,8 +928,7 @@ radv_amdgpu_get_bo_list(struct radv_amdgpu_winsys *ws, struct ac_cmdbuf **cs_arr

      num_handles = radv_amdgpu_copy_global_bo_list(ws, handles);
   } else if (count == 1 && !num_initial_preambles && !num_continue_preambles && !num_postambles &&
-              !radv_amdgpu_cs(cs_array[0])->num_virtual_buffers && !radv_amdgpu_cs(cs_array[0])->chained_to &&
-              !ws->global_bo_list.count) {
+              !radv_amdgpu_cs(cs_array[0])->chained_to && !ws->global_bo_list.count) {
      struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)cs_array[0];
      if (cs->num_buffers == 0)
         return VK_SUCCESS;