From fb195bd6bd82c5a887b9d378b74aab37a1ae6bdd Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 17 Mar 2026 14:31:48 +0100 Subject: [PATCH] radv/amdgpu: remove the virtual BOs tracking logic All BOs allocated by applications are local BOs, so this can be removed completely. Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c | 181 ------------------ src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h | 30 +-- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 99 +--------- 3 files changed, 6 insertions(+), 304 deletions(-) diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c index a060f2fe444..dc1e472a07e 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c @@ -80,52 +80,6 @@ radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, uint32_t bo_handle, uint64_t return r; } -static int -bo_comparator(const void *ap, const void *bp) -{ - struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap; - struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp; - return (a > b) ? 1 : (a < b) ? -1 : 0; -} - -static VkResult -radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo) -{ - u_rwlock_wrlock(&bo->lock); - - if (bo->bo_capacity < bo->range_count) { - uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count); - struct radv_amdgpu_winsys_bo **bos = realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *)); - if (!bos) { - u_rwlock_wrunlock(&bo->lock); - return VK_ERROR_OUT_OF_HOST_MEMORY; - } - bo->bos = bos; - bo->bo_capacity = new_count; - } - - uint32_t temp_bo_count = 0; - for (uint32_t i = 0; i < bo->range_count; ++i) - if (bo->ranges[i].bo) - bo->bos[temp_bo_count++] = bo->ranges[i].bo; - - qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator); - - if (!temp_bo_count) { - bo->bo_count = 0; - } else { - uint32_t final_bo_count = 1; - for (uint32_t i = 1; i < temp_bo_count; ++i) - if (bo->bos[i] != bo->bos[i - 1]) - bo->bos[final_bo_count++] = bo->bos[i]; - - bo->bo_count = final_bo_count; - } - - u_rwlock_wrunlock(&bo->lock); - return VK_SUCCESS; -} - static uint64_t radv_amdgpu_canonicalize_va(uint64_t va) { @@ -231,10 +185,6 @@ radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_wins struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent; struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo *)_bo; - int range_count_delta, new_idx; - int first = 0, last; - struct radv_amdgpu_map_range new_first, new_last; - VkResult result; int r; assert(parent->base.is_virtual); @@ -254,116 +204,6 @@ radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_wins return VK_ERROR_OUT_OF_DEVICE_MEMORY; } - /* Do not add the BO to the virtual BO list if it's already in the global list to avoid dangling - * BO references because it might have been destroyed without being previously unbound. Resetting - * it to NULL clears the old BO ranges if present. - * - * This is going to be clarified in the Vulkan spec: - * https://gitlab.khronos.org/vulkan/vulkan/-/issues/3125 - * - * The issue still exists for non-global BO but it will be addressed later, once we are 100% it's - * RADV fault (mostly because the solution looks more complicated). - */ - if (bo && radv_buffer_is_resident(&bo->base)) { - bo = NULL; - bo_offset = 0; - } - - /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that - * contains the newly bound range). */ - if (parent->range_capacity - parent->range_count < 2) { - uint32_t range_capacity = parent->range_capacity + 2; - struct radv_amdgpu_map_range *ranges = - realloc(parent->ranges, range_capacity * sizeof(struct radv_amdgpu_map_range)); - if (!ranges) - return VK_ERROR_OUT_OF_HOST_MEMORY; - parent->ranges = ranges; - parent->range_capacity = range_capacity; - } - - /* - * [first, last] is exactly the range of ranges that either overlap the - * new parent, or are adjacent to it. This corresponds to the bind ranges - * that may change. - */ - while (first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset) - ++first; - - last = first; - while (last + 1 < parent->range_count && parent->ranges[last + 1].offset <= offset + size) - ++last; - - /* Whether the first or last range are going to be totally removed or just - * resized/left alone. Note that in the case of first == last, we will split - * this into a part before and after the new range. The remove flag is then - * whether to not create the corresponding split part. */ - bool remove_first = parent->ranges[first].offset == offset; - bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size; - - assert(parent->ranges[first].offset <= offset); - assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size); - - /* Try to merge the new range with the first range. */ - if (parent->ranges[first].bo == bo && - (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) { - size += offset - parent->ranges[first].offset; - offset = parent->ranges[first].offset; - bo_offset = parent->ranges[first].bo_offset; - remove_first = true; - } - - /* Try to merge the new range with the last range. */ - if (parent->ranges[last].bo == bo && - (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) { - size = parent->ranges[last].offset + parent->ranges[last].size - offset; - remove_last = true; - } - - range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last; - new_idx = first + !remove_first; - - /* If the first/last range are not left alone we unmap then and optionally map - * them again after modifications. Not that this implicitly can do the splitting - * if first == last. */ - new_first = parent->ranges[first]; - new_last = parent->ranges[last]; - - if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) { - if (!remove_first) { - new_first.size = offset - new_first.offset; - } - } - - if (parent->ranges[last].offset < offset + size || remove_last) { - if (!remove_last) { - new_last.size -= offset + size - new_last.offset; - new_last.bo_offset += (offset + size - new_last.offset); - new_last.offset = offset + size; - } - } - - /* Moves the range list after last to account for the changed number of ranges. */ - memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1, - sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1)); - - if (!remove_first) - parent->ranges[first] = new_first; - - if (!remove_last) - parent->ranges[new_idx + 1] = new_last; - - /* Actually set up the new range. */ - parent->ranges[new_idx].offset = offset; - parent->ranges[new_idx].size = size; - parent->ranges[new_idx].bo = bo; - parent->ranges[new_idx].bo_offset = bo_offset; - - parent->range_count += range_count_delta; - - result = radv_amdgpu_winsys_rebuild_bo_list(parent); - if (result != VK_SUCCESS) - return result; - return VK_SUCCESS; } @@ -447,9 +287,6 @@ radv_amdgpu_winsys_virtual_bo_destroy(struct radeon_winsys *_ws, struct radeon_w fprintf(stderr, "radv/amdgpu: Failed to clear a PRT VA region (%d).\n", r); } - free(bo->bos); - free(bo->ranges); - u_rwlock_destroy(&bo->lock); ac_drm_va_range_free(bo->va_handle); FREE(bo); } @@ -499,7 +336,6 @@ radv_amdgpu_winsys_virtual_bo_create(struct radeon_winsys *_ws, uint64_t size, u { struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); struct radv_amdgpu_winsys_bo *bo; - struct radv_amdgpu_map_range *ranges = NULL; uint64_t va = 0; amdgpu_va_handle va_handle; int r; @@ -537,23 +373,6 @@ radv_amdgpu_winsys_virtual_bo_create(struct radeon_winsys *_ws, uint64_t size, u bo->va_handle = va_handle; bo->base.is_virtual = true; - ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range)); - if (!ranges) { - result = VK_ERROR_OUT_OF_HOST_MEMORY; - goto error_ranges_alloc; - } - - u_rwlock_init(&bo->lock); - - bo->ranges = ranges; - bo->range_count = 1; - bo->range_capacity = 1; - - bo->ranges[0].offset = 0; - bo->ranges[0].size = size; - bo->ranges[0].bo = NULL; - bo->ranges[0].bo_offset = 0; - /* Reserve a PRT VA region. */ r = radv_amdgpu_virtual_bo_init_mapping(ws, bo, va_size); if (r) { diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h index c2be7ce9777..86f83fdf93e 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h @@ -25,40 +25,16 @@ struct radv_amdgpu_winsys_bo_log { uint8_t virtual_mapping : 1; }; -struct radv_amdgpu_map_range { - uint64_t offset; - uint64_t size; - struct radv_amdgpu_winsys_bo *bo; - uint64_t bo_offset; -}; - struct radv_amdgpu_winsys_bo { struct radeon_winsys_bo base; amdgpu_va_handle va_handle; uint32_t flags; uint8_t priority; - union { - /* physical bo */ - struct { - ac_drm_bo bo; - uint32_t bo_handle; + ac_drm_bo bo; + uint32_t bo_handle; - void *cpu_map; - }; - /* virtual bo */ - struct { - struct u_rwlock lock; - - struct radv_amdgpu_map_range *ranges; - uint32_t range_count; - uint32_t range_capacity; - - struct radv_amdgpu_winsys_bo **bos; - uint32_t bo_count; - uint32_t bo_capacity; - }; - }; + void *cpu_map; }; static inline struct radv_amdgpu_winsys_bo * diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index fe71bd51928..541115701b7 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -40,8 +40,6 @@ /* Maximum allowed total number of submitted IBs. */ #define RADV_MAX_IBS_PER_SUBMIT 192 -enum { VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024 }; - struct radv_amdgpu_ib { struct radeon_winsys_bo *bo; /* NULL when not owned by the current CS object */ uint64_t va; @@ -79,11 +77,6 @@ struct radv_amdgpu_cs { int buffer_hash_table[1024]; unsigned hw_ip; - unsigned num_virtual_buffers; - unsigned max_num_virtual_buffers; - struct radeon_winsys_bo **virtual_buffers; - int *virtual_buffer_hash_table; - struct hash_table *annotations; }; @@ -193,8 +186,6 @@ radv_amdgpu_cs_destroy(struct ac_cmdbuf *rcs) cs->ws->base.buffer_destroy(&cs->ws->base, cs->ib_buffers[i].bo); free(cs->ib_buffers); - free(cs->virtual_buffers); - free(cs->virtual_buffer_hash_table); free(cs->handles); free(cs); } @@ -533,13 +524,7 @@ radv_amdgpu_cs_reset(struct ac_cmdbuf *_cs) cs->buffer_hash_table[hash] = -1; } - for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) { - unsigned hash = ((uintptr_t)cs->virtual_buffers[i] >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1); - cs->virtual_buffer_hash_table[hash] = -1; - } - cs->num_buffers = 0; - cs->num_virtual_buffers = 0; /* When the CS is finalized and IBs are not allowed, use last IB. */ assert(cs->ib_buffer || cs->num_ib_buffers); @@ -661,55 +646,6 @@ radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs, uint32_t bo, uint8 ++cs->num_buffers; } -static void -radv_amdgpu_cs_add_virtual_buffer(struct ac_cmdbuf *_cs, struct radeon_winsys_bo *bo) -{ - struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs); - unsigned hash = ((uintptr_t)bo >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1); - - if (!cs->virtual_buffer_hash_table) { - int *virtual_buffer_hash_table = malloc(VIRTUAL_BUFFER_HASH_TABLE_SIZE * sizeof(int)); - if (!virtual_buffer_hash_table) { - cs->status = VK_ERROR_OUT_OF_HOST_MEMORY; - return; - } - cs->virtual_buffer_hash_table = virtual_buffer_hash_table; - - for (int i = 0; i < VIRTUAL_BUFFER_HASH_TABLE_SIZE; ++i) - cs->virtual_buffer_hash_table[i] = -1; - } - - if (cs->virtual_buffer_hash_table[hash] >= 0) { - int idx = cs->virtual_buffer_hash_table[hash]; - if (cs->virtual_buffers[idx] == bo) { - return; - } - for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) { - if (cs->virtual_buffers[i] == bo) { - cs->virtual_buffer_hash_table[hash] = i; - return; - } - } - } - - if (cs->max_num_virtual_buffers <= cs->num_virtual_buffers) { - unsigned max_num_virtual_buffers = MAX2(2, cs->max_num_virtual_buffers * 2); - struct radeon_winsys_bo **virtual_buffers = - realloc(cs->virtual_buffers, sizeof(struct radeon_winsys_bo *) * max_num_virtual_buffers); - if (!virtual_buffers) { - cs->status = VK_ERROR_OUT_OF_HOST_MEMORY; - return; - } - cs->max_num_virtual_buffers = max_num_virtual_buffers; - cs->virtual_buffers = virtual_buffers; - } - - cs->virtual_buffers[cs->num_virtual_buffers] = bo; - - cs->virtual_buffer_hash_table[hash] = cs->num_virtual_buffers; - ++cs->num_virtual_buffers; -} - static void radv_amdgpu_cs_add_buffer(struct ac_cmdbuf *_cs, struct radeon_winsys_bo *_bo) { @@ -719,10 +655,8 @@ radv_amdgpu_cs_add_buffer(struct ac_cmdbuf *_cs, struct radeon_winsys_bo *_bo) if (cs->status != VK_SUCCESS) return; - if (bo->base.is_virtual) { - radv_amdgpu_cs_add_virtual_buffer(_cs, _bo); + if (bo->base.is_virtual) return; - } radv_amdgpu_cs_add_buffer_internal(cs, bo->bo_handle, bo->priority); } @@ -776,10 +710,6 @@ radv_amdgpu_cs_execute_secondary(struct ac_cmdbuf *_parent, struct ac_cmdbuf *_c radv_amdgpu_cs_add_buffer_internal(parent, child->handles[i].bo_handle, child->handles[i].bo_priority); } - for (unsigned i = 0; i < child->num_virtual_buffers; ++i) { - radv_amdgpu_cs_add_buffer(&parent->base, child->virtual_buffers[i]); - } - if (use_ib2) { radv_amdgpu_cs_emit_secondary_ib2(parent, child); } else { @@ -911,8 +841,6 @@ radv_amdgpu_count_cs_bo(struct radv_amdgpu_cs *start_cs) for (struct radv_amdgpu_cs *cs = start_cs; cs; cs = cs->chained_to) { num_bo += cs->num_buffers; - for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) - num_bo += radv_amdgpu_winsys_bo(cs->virtual_buffers[j])->bo_count; } return num_bo; @@ -936,7 +864,7 @@ radv_amdgpu_add_cs_to_bo_list(struct radv_amdgpu_cs *cs, struct drm_amdgpu_bo_li if (!cs->num_buffers) return num_handles; - if (num_handles == 0 && !cs->num_virtual_buffers) { + if (num_handles == 0) { memcpy(handles, cs->handles, cs->num_buffers * sizeof(struct drm_amdgpu_bo_list_entry)); return cs->num_buffers; } @@ -955,26 +883,6 @@ radv_amdgpu_add_cs_to_bo_list(struct radv_amdgpu_cs *cs, struct drm_amdgpu_bo_li ++num_handles; } } - for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) { - struct radv_amdgpu_winsys_bo *virtual_bo = radv_amdgpu_winsys_bo(cs->virtual_buffers[j]); - u_rwlock_rdlock(&virtual_bo->lock); - for (unsigned k = 0; k < virtual_bo->bo_count; ++k) { - struct radv_amdgpu_winsys_bo *bo = virtual_bo->bos[k]; - bool found = false; - for (unsigned m = 0; m < num_handles; ++m) { - if (handles[m].bo_handle == bo->bo_handle) { - found = true; - break; - } - } - if (!found) { - handles[num_handles].bo_handle = bo->bo_handle; - handles[num_handles].bo_priority = bo->priority; - ++num_handles; - } - } - u_rwlock_rdunlock(&virtual_bo->lock); - } return num_handles; } @@ -1020,8 +928,7 @@ radv_amdgpu_get_bo_list(struct radv_amdgpu_winsys *ws, struct ac_cmdbuf **cs_arr num_handles = radv_amdgpu_copy_global_bo_list(ws, handles); } else if (count == 1 && !num_initial_preambles && !num_continue_preambles && !num_postambles && - !radv_amdgpu_cs(cs_array[0])->num_virtual_buffers && !radv_amdgpu_cs(cs_array[0])->chained_to && - !ws->global_bo_list.count) { + !radv_amdgpu_cs(cs_array[0])->chained_to && !ws->global_bo_list.count) { struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)cs_array[0]; if (cs->num_buffers == 0) return VK_SUCCESS;