radv/amdgpu: remove the virtual BOs tracking logic

All BOs allocated by applications are local BOs, so this can be
removed completely.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40466>
This commit is contained in:
Samuel Pitoiset 2026-03-17 14:31:48 +01:00 committed by Marge Bot
parent 2cf84eedb9
commit fb195bd6bd
3 changed files with 6 additions and 304 deletions

View file

@ -80,52 +80,6 @@ radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, uint32_t bo_handle, uint64_t
return r;
}
static int
bo_comparator(const void *ap, const void *bp)
{
struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap;
struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp;
return (a > b) ? 1 : (a < b) ? -1 : 0;
}
static VkResult
radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
{
u_rwlock_wrlock(&bo->lock);
if (bo->bo_capacity < bo->range_count) {
uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count);
struct radv_amdgpu_winsys_bo **bos = realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
if (!bos) {
u_rwlock_wrunlock(&bo->lock);
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
bo->bos = bos;
bo->bo_capacity = new_count;
}
uint32_t temp_bo_count = 0;
for (uint32_t i = 0; i < bo->range_count; ++i)
if (bo->ranges[i].bo)
bo->bos[temp_bo_count++] = bo->ranges[i].bo;
qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator);
if (!temp_bo_count) {
bo->bo_count = 0;
} else {
uint32_t final_bo_count = 1;
for (uint32_t i = 1; i < temp_bo_count; ++i)
if (bo->bos[i] != bo->bos[i - 1])
bo->bos[final_bo_count++] = bo->bos[i];
bo->bo_count = final_bo_count;
}
u_rwlock_wrunlock(&bo->lock);
return VK_SUCCESS;
}
static uint64_t
radv_amdgpu_canonicalize_va(uint64_t va)
{
@ -231,10 +185,6 @@ radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_wins
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo *)_bo;
int range_count_delta, new_idx;
int first = 0, last;
struct radv_amdgpu_map_range new_first, new_last;
VkResult result;
int r;
assert(parent->base.is_virtual);
@ -254,116 +204,6 @@ radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_wins
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
}
/* Do not add the BO to the virtual BO list if it's already in the global list to avoid dangling
* BO references because it might have been destroyed without being previously unbound. Resetting
* it to NULL clears the old BO ranges if present.
*
* This is going to be clarified in the Vulkan spec:
* https://gitlab.khronos.org/vulkan/vulkan/-/issues/3125
*
* The issue still exists for non-global BO but it will be addressed later, once we are 100% it's
* RADV fault (mostly because the solution looks more complicated).
*/
if (bo && radv_buffer_is_resident(&bo->base)) {
bo = NULL;
bo_offset = 0;
}
/* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that
* contains the newly bound range). */
if (parent->range_capacity - parent->range_count < 2) {
uint32_t range_capacity = parent->range_capacity + 2;
struct radv_amdgpu_map_range *ranges =
realloc(parent->ranges, range_capacity * sizeof(struct radv_amdgpu_map_range));
if (!ranges)
return VK_ERROR_OUT_OF_HOST_MEMORY;
parent->ranges = ranges;
parent->range_capacity = range_capacity;
}
/*
* [first, last] is exactly the range of ranges that either overlap the
* new parent, or are adjacent to it. This corresponds to the bind ranges
* that may change.
*/
while (first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset)
++first;
last = first;
while (last + 1 < parent->range_count && parent->ranges[last + 1].offset <= offset + size)
++last;
/* Whether the first or last range are going to be totally removed or just
* resized/left alone. Note that in the case of first == last, we will split
* this into a part before and after the new range. The remove flag is then
* whether to not create the corresponding split part. */
bool remove_first = parent->ranges[first].offset == offset;
bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
assert(parent->ranges[first].offset <= offset);
assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
/* Try to merge the new range with the first range. */
if (parent->ranges[first].bo == bo &&
(!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
size += offset - parent->ranges[first].offset;
offset = parent->ranges[first].offset;
bo_offset = parent->ranges[first].bo_offset;
remove_first = true;
}
/* Try to merge the new range with the last range. */
if (parent->ranges[last].bo == bo &&
(!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
size = parent->ranges[last].offset + parent->ranges[last].size - offset;
remove_last = true;
}
range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
new_idx = first + !remove_first;
/* If the first/last range are not left alone we unmap then and optionally map
* them again after modifications. Not that this implicitly can do the splitting
* if first == last. */
new_first = parent->ranges[first];
new_last = parent->ranges[last];
if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
if (!remove_first) {
new_first.size = offset - new_first.offset;
}
}
if (parent->ranges[last].offset < offset + size || remove_last) {
if (!remove_last) {
new_last.size -= offset + size - new_last.offset;
new_last.bo_offset += (offset + size - new_last.offset);
new_last.offset = offset + size;
}
}
/* Moves the range list after last to account for the changed number of ranges. */
memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
if (!remove_first)
parent->ranges[first] = new_first;
if (!remove_last)
parent->ranges[new_idx + 1] = new_last;
/* Actually set up the new range. */
parent->ranges[new_idx].offset = offset;
parent->ranges[new_idx].size = size;
parent->ranges[new_idx].bo = bo;
parent->ranges[new_idx].bo_offset = bo_offset;
parent->range_count += range_count_delta;
result = radv_amdgpu_winsys_rebuild_bo_list(parent);
if (result != VK_SUCCESS)
return result;
return VK_SUCCESS;
}
@ -447,9 +287,6 @@ radv_amdgpu_winsys_virtual_bo_destroy(struct radeon_winsys *_ws, struct radeon_w
fprintf(stderr, "radv/amdgpu: Failed to clear a PRT VA region (%d).\n", r);
}
free(bo->bos);
free(bo->ranges);
u_rwlock_destroy(&bo->lock);
ac_drm_va_range_free(bo->va_handle);
FREE(bo);
}
@ -499,7 +336,6 @@ radv_amdgpu_winsys_virtual_bo_create(struct radeon_winsys *_ws, uint64_t size, u
{
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
struct radv_amdgpu_winsys_bo *bo;
struct radv_amdgpu_map_range *ranges = NULL;
uint64_t va = 0;
amdgpu_va_handle va_handle;
int r;
@ -537,23 +373,6 @@ radv_amdgpu_winsys_virtual_bo_create(struct radeon_winsys *_ws, uint64_t size, u
bo->va_handle = va_handle;
bo->base.is_virtual = true;
ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
if (!ranges) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto error_ranges_alloc;
}
u_rwlock_init(&bo->lock);
bo->ranges = ranges;
bo->range_count = 1;
bo->range_capacity = 1;
bo->ranges[0].offset = 0;
bo->ranges[0].size = size;
bo->ranges[0].bo = NULL;
bo->ranges[0].bo_offset = 0;
/* Reserve a PRT VA region. */
r = radv_amdgpu_virtual_bo_init_mapping(ws, bo, va_size);
if (r) {

View file

@ -25,40 +25,16 @@ struct radv_amdgpu_winsys_bo_log {
uint8_t virtual_mapping : 1;
};
struct radv_amdgpu_map_range {
uint64_t offset;
uint64_t size;
struct radv_amdgpu_winsys_bo *bo;
uint64_t bo_offset;
};
struct radv_amdgpu_winsys_bo {
struct radeon_winsys_bo base;
amdgpu_va_handle va_handle;
uint32_t flags;
uint8_t priority;
union {
/* physical bo */
struct {
ac_drm_bo bo;
uint32_t bo_handle;
ac_drm_bo bo;
uint32_t bo_handle;
void *cpu_map;
};
/* virtual bo */
struct {
struct u_rwlock lock;
struct radv_amdgpu_map_range *ranges;
uint32_t range_count;
uint32_t range_capacity;
struct radv_amdgpu_winsys_bo **bos;
uint32_t bo_count;
uint32_t bo_capacity;
};
};
void *cpu_map;
};
static inline struct radv_amdgpu_winsys_bo *

View file

@ -40,8 +40,6 @@
/* Maximum allowed total number of submitted IBs. */
#define RADV_MAX_IBS_PER_SUBMIT 192
enum { VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024 };
struct radv_amdgpu_ib {
struct radeon_winsys_bo *bo; /* NULL when not owned by the current CS object */
uint64_t va;
@ -79,11 +77,6 @@ struct radv_amdgpu_cs {
int buffer_hash_table[1024];
unsigned hw_ip;
unsigned num_virtual_buffers;
unsigned max_num_virtual_buffers;
struct radeon_winsys_bo **virtual_buffers;
int *virtual_buffer_hash_table;
struct hash_table *annotations;
};
@ -193,8 +186,6 @@ radv_amdgpu_cs_destroy(struct ac_cmdbuf *rcs)
cs->ws->base.buffer_destroy(&cs->ws->base, cs->ib_buffers[i].bo);
free(cs->ib_buffers);
free(cs->virtual_buffers);
free(cs->virtual_buffer_hash_table);
free(cs->handles);
free(cs);
}
@ -533,13 +524,7 @@ radv_amdgpu_cs_reset(struct ac_cmdbuf *_cs)
cs->buffer_hash_table[hash] = -1;
}
for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
unsigned hash = ((uintptr_t)cs->virtual_buffers[i] >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
cs->virtual_buffer_hash_table[hash] = -1;
}
cs->num_buffers = 0;
cs->num_virtual_buffers = 0;
/* When the CS is finalized and IBs are not allowed, use last IB. */
assert(cs->ib_buffer || cs->num_ib_buffers);
@ -661,55 +646,6 @@ radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs, uint32_t bo, uint8
++cs->num_buffers;
}
static void
radv_amdgpu_cs_add_virtual_buffer(struct ac_cmdbuf *_cs, struct radeon_winsys_bo *bo)
{
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
unsigned hash = ((uintptr_t)bo >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
if (!cs->virtual_buffer_hash_table) {
int *virtual_buffer_hash_table = malloc(VIRTUAL_BUFFER_HASH_TABLE_SIZE * sizeof(int));
if (!virtual_buffer_hash_table) {
cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
return;
}
cs->virtual_buffer_hash_table = virtual_buffer_hash_table;
for (int i = 0; i < VIRTUAL_BUFFER_HASH_TABLE_SIZE; ++i)
cs->virtual_buffer_hash_table[i] = -1;
}
if (cs->virtual_buffer_hash_table[hash] >= 0) {
int idx = cs->virtual_buffer_hash_table[hash];
if (cs->virtual_buffers[idx] == bo) {
return;
}
for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
if (cs->virtual_buffers[i] == bo) {
cs->virtual_buffer_hash_table[hash] = i;
return;
}
}
}
if (cs->max_num_virtual_buffers <= cs->num_virtual_buffers) {
unsigned max_num_virtual_buffers = MAX2(2, cs->max_num_virtual_buffers * 2);
struct radeon_winsys_bo **virtual_buffers =
realloc(cs->virtual_buffers, sizeof(struct radeon_winsys_bo *) * max_num_virtual_buffers);
if (!virtual_buffers) {
cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
return;
}
cs->max_num_virtual_buffers = max_num_virtual_buffers;
cs->virtual_buffers = virtual_buffers;
}
cs->virtual_buffers[cs->num_virtual_buffers] = bo;
cs->virtual_buffer_hash_table[hash] = cs->num_virtual_buffers;
++cs->num_virtual_buffers;
}
static void
radv_amdgpu_cs_add_buffer(struct ac_cmdbuf *_cs, struct radeon_winsys_bo *_bo)
{
@ -719,10 +655,8 @@ radv_amdgpu_cs_add_buffer(struct ac_cmdbuf *_cs, struct radeon_winsys_bo *_bo)
if (cs->status != VK_SUCCESS)
return;
if (bo->base.is_virtual) {
radv_amdgpu_cs_add_virtual_buffer(_cs, _bo);
if (bo->base.is_virtual)
return;
}
radv_amdgpu_cs_add_buffer_internal(cs, bo->bo_handle, bo->priority);
}
@ -776,10 +710,6 @@ radv_amdgpu_cs_execute_secondary(struct ac_cmdbuf *_parent, struct ac_cmdbuf *_c
radv_amdgpu_cs_add_buffer_internal(parent, child->handles[i].bo_handle, child->handles[i].bo_priority);
}
for (unsigned i = 0; i < child->num_virtual_buffers; ++i) {
radv_amdgpu_cs_add_buffer(&parent->base, child->virtual_buffers[i]);
}
if (use_ib2) {
radv_amdgpu_cs_emit_secondary_ib2(parent, child);
} else {
@ -911,8 +841,6 @@ radv_amdgpu_count_cs_bo(struct radv_amdgpu_cs *start_cs)
for (struct radv_amdgpu_cs *cs = start_cs; cs; cs = cs->chained_to) {
num_bo += cs->num_buffers;
for (unsigned j = 0; j < cs->num_virtual_buffers; ++j)
num_bo += radv_amdgpu_winsys_bo(cs->virtual_buffers[j])->bo_count;
}
return num_bo;
@ -936,7 +864,7 @@ radv_amdgpu_add_cs_to_bo_list(struct radv_amdgpu_cs *cs, struct drm_amdgpu_bo_li
if (!cs->num_buffers)
return num_handles;
if (num_handles == 0 && !cs->num_virtual_buffers) {
if (num_handles == 0) {
memcpy(handles, cs->handles, cs->num_buffers * sizeof(struct drm_amdgpu_bo_list_entry));
return cs->num_buffers;
}
@ -955,26 +883,6 @@ radv_amdgpu_add_cs_to_bo_list(struct radv_amdgpu_cs *cs, struct drm_amdgpu_bo_li
++num_handles;
}
}
for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) {
struct radv_amdgpu_winsys_bo *virtual_bo = radv_amdgpu_winsys_bo(cs->virtual_buffers[j]);
u_rwlock_rdlock(&virtual_bo->lock);
for (unsigned k = 0; k < virtual_bo->bo_count; ++k) {
struct radv_amdgpu_winsys_bo *bo = virtual_bo->bos[k];
bool found = false;
for (unsigned m = 0; m < num_handles; ++m) {
if (handles[m].bo_handle == bo->bo_handle) {
found = true;
break;
}
}
if (!found) {
handles[num_handles].bo_handle = bo->bo_handle;
handles[num_handles].bo_priority = bo->priority;
++num_handles;
}
}
u_rwlock_rdunlock(&virtual_bo->lock);
}
return num_handles;
}
@ -1020,8 +928,7 @@ radv_amdgpu_get_bo_list(struct radv_amdgpu_winsys *ws, struct ac_cmdbuf **cs_arr
num_handles = radv_amdgpu_copy_global_bo_list(ws, handles);
} else if (count == 1 && !num_initial_preambles && !num_continue_preambles && !num_postambles &&
!radv_amdgpu_cs(cs_array[0])->num_virtual_buffers && !radv_amdgpu_cs(cs_array[0])->chained_to &&
!ws->global_bo_list.count) {
!radv_amdgpu_cs(cs_array[0])->chained_to && !ws->global_bo_list.count) {
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)cs_array[0];
if (cs->num_buffers == 0)
return VK_SUCCESS;