mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 09:28:07 +02:00
radv/amdgpu: remove the virtual BOs tracking logic
All BOs allocated by applications are local BOs, so this can be removed completely. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40466>
This commit is contained in:
parent
2cf84eedb9
commit
fb195bd6bd
3 changed files with 6 additions and 304 deletions
|
|
@ -80,52 +80,6 @@ radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, uint32_t bo_handle, uint64_t
|
|||
return r;
|
||||
}
|
||||
|
||||
static int
|
||||
bo_comparator(const void *ap, const void *bp)
|
||||
{
|
||||
struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap;
|
||||
struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp;
|
||||
return (a > b) ? 1 : (a < b) ? -1 : 0;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
|
||||
{
|
||||
u_rwlock_wrlock(&bo->lock);
|
||||
|
||||
if (bo->bo_capacity < bo->range_count) {
|
||||
uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count);
|
||||
struct radv_amdgpu_winsys_bo **bos = realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
|
||||
if (!bos) {
|
||||
u_rwlock_wrunlock(&bo->lock);
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
bo->bos = bos;
|
||||
bo->bo_capacity = new_count;
|
||||
}
|
||||
|
||||
uint32_t temp_bo_count = 0;
|
||||
for (uint32_t i = 0; i < bo->range_count; ++i)
|
||||
if (bo->ranges[i].bo)
|
||||
bo->bos[temp_bo_count++] = bo->ranges[i].bo;
|
||||
|
||||
qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator);
|
||||
|
||||
if (!temp_bo_count) {
|
||||
bo->bo_count = 0;
|
||||
} else {
|
||||
uint32_t final_bo_count = 1;
|
||||
for (uint32_t i = 1; i < temp_bo_count; ++i)
|
||||
if (bo->bos[i] != bo->bos[i - 1])
|
||||
bo->bos[final_bo_count++] = bo->bos[i];
|
||||
|
||||
bo->bo_count = final_bo_count;
|
||||
}
|
||||
|
||||
u_rwlock_wrunlock(&bo->lock);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
radv_amdgpu_canonicalize_va(uint64_t va)
|
||||
{
|
||||
|
|
@ -231,10 +185,6 @@ radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_wins
|
|||
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
|
||||
struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
|
||||
struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo *)_bo;
|
||||
int range_count_delta, new_idx;
|
||||
int first = 0, last;
|
||||
struct radv_amdgpu_map_range new_first, new_last;
|
||||
VkResult result;
|
||||
int r;
|
||||
|
||||
assert(parent->base.is_virtual);
|
||||
|
|
@ -254,116 +204,6 @@ radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_wins
|
|||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
}
|
||||
|
||||
/* Do not add the BO to the virtual BO list if it's already in the global list to avoid dangling
|
||||
* BO references because it might have been destroyed without being previously unbound. Resetting
|
||||
* it to NULL clears the old BO ranges if present.
|
||||
*
|
||||
* This is going to be clarified in the Vulkan spec:
|
||||
* https://gitlab.khronos.org/vulkan/vulkan/-/issues/3125
|
||||
*
|
||||
* The issue still exists for non-global BO but it will be addressed later, once we are 100% it's
|
||||
* RADV fault (mostly because the solution looks more complicated).
|
||||
*/
|
||||
if (bo && radv_buffer_is_resident(&bo->base)) {
|
||||
bo = NULL;
|
||||
bo_offset = 0;
|
||||
}
|
||||
|
||||
/* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that
|
||||
* contains the newly bound range). */
|
||||
if (parent->range_capacity - parent->range_count < 2) {
|
||||
uint32_t range_capacity = parent->range_capacity + 2;
|
||||
struct radv_amdgpu_map_range *ranges =
|
||||
realloc(parent->ranges, range_capacity * sizeof(struct radv_amdgpu_map_range));
|
||||
if (!ranges)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
parent->ranges = ranges;
|
||||
parent->range_capacity = range_capacity;
|
||||
}
|
||||
|
||||
/*
|
||||
* [first, last] is exactly the range of ranges that either overlap the
|
||||
* new parent, or are adjacent to it. This corresponds to the bind ranges
|
||||
* that may change.
|
||||
*/
|
||||
while (first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset)
|
||||
++first;
|
||||
|
||||
last = first;
|
||||
while (last + 1 < parent->range_count && parent->ranges[last + 1].offset <= offset + size)
|
||||
++last;
|
||||
|
||||
/* Whether the first or last range are going to be totally removed or just
|
||||
* resized/left alone. Note that in the case of first == last, we will split
|
||||
* this into a part before and after the new range. The remove flag is then
|
||||
* whether to not create the corresponding split part. */
|
||||
bool remove_first = parent->ranges[first].offset == offset;
|
||||
bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
|
||||
|
||||
assert(parent->ranges[first].offset <= offset);
|
||||
assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
|
||||
|
||||
/* Try to merge the new range with the first range. */
|
||||
if (parent->ranges[first].bo == bo &&
|
||||
(!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
|
||||
size += offset - parent->ranges[first].offset;
|
||||
offset = parent->ranges[first].offset;
|
||||
bo_offset = parent->ranges[first].bo_offset;
|
||||
remove_first = true;
|
||||
}
|
||||
|
||||
/* Try to merge the new range with the last range. */
|
||||
if (parent->ranges[last].bo == bo &&
|
||||
(!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
|
||||
size = parent->ranges[last].offset + parent->ranges[last].size - offset;
|
||||
remove_last = true;
|
||||
}
|
||||
|
||||
range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
|
||||
new_idx = first + !remove_first;
|
||||
|
||||
/* If the first/last range are not left alone we unmap then and optionally map
|
||||
* them again after modifications. Not that this implicitly can do the splitting
|
||||
* if first == last. */
|
||||
new_first = parent->ranges[first];
|
||||
new_last = parent->ranges[last];
|
||||
|
||||
if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
|
||||
if (!remove_first) {
|
||||
new_first.size = offset - new_first.offset;
|
||||
}
|
||||
}
|
||||
|
||||
if (parent->ranges[last].offset < offset + size || remove_last) {
|
||||
if (!remove_last) {
|
||||
new_last.size -= offset + size - new_last.offset;
|
||||
new_last.bo_offset += (offset + size - new_last.offset);
|
||||
new_last.offset = offset + size;
|
||||
}
|
||||
}
|
||||
|
||||
/* Moves the range list after last to account for the changed number of ranges. */
|
||||
memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
|
||||
sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
|
||||
|
||||
if (!remove_first)
|
||||
parent->ranges[first] = new_first;
|
||||
|
||||
if (!remove_last)
|
||||
parent->ranges[new_idx + 1] = new_last;
|
||||
|
||||
/* Actually set up the new range. */
|
||||
parent->ranges[new_idx].offset = offset;
|
||||
parent->ranges[new_idx].size = size;
|
||||
parent->ranges[new_idx].bo = bo;
|
||||
parent->ranges[new_idx].bo_offset = bo_offset;
|
||||
|
||||
parent->range_count += range_count_delta;
|
||||
|
||||
result = radv_amdgpu_winsys_rebuild_bo_list(parent);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
@ -447,9 +287,6 @@ radv_amdgpu_winsys_virtual_bo_destroy(struct radeon_winsys *_ws, struct radeon_w
|
|||
fprintf(stderr, "radv/amdgpu: Failed to clear a PRT VA region (%d).\n", r);
|
||||
}
|
||||
|
||||
free(bo->bos);
|
||||
free(bo->ranges);
|
||||
u_rwlock_destroy(&bo->lock);
|
||||
ac_drm_va_range_free(bo->va_handle);
|
||||
FREE(bo);
|
||||
}
|
||||
|
|
@ -499,7 +336,6 @@ radv_amdgpu_winsys_virtual_bo_create(struct radeon_winsys *_ws, uint64_t size, u
|
|||
{
|
||||
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
|
||||
struct radv_amdgpu_winsys_bo *bo;
|
||||
struct radv_amdgpu_map_range *ranges = NULL;
|
||||
uint64_t va = 0;
|
||||
amdgpu_va_handle va_handle;
|
||||
int r;
|
||||
|
|
@ -537,23 +373,6 @@ radv_amdgpu_winsys_virtual_bo_create(struct radeon_winsys *_ws, uint64_t size, u
|
|||
bo->va_handle = va_handle;
|
||||
bo->base.is_virtual = true;
|
||||
|
||||
ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
|
||||
if (!ranges) {
|
||||
result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
goto error_ranges_alloc;
|
||||
}
|
||||
|
||||
u_rwlock_init(&bo->lock);
|
||||
|
||||
bo->ranges = ranges;
|
||||
bo->range_count = 1;
|
||||
bo->range_capacity = 1;
|
||||
|
||||
bo->ranges[0].offset = 0;
|
||||
bo->ranges[0].size = size;
|
||||
bo->ranges[0].bo = NULL;
|
||||
bo->ranges[0].bo_offset = 0;
|
||||
|
||||
/* Reserve a PRT VA region. */
|
||||
r = radv_amdgpu_virtual_bo_init_mapping(ws, bo, va_size);
|
||||
if (r) {
|
||||
|
|
|
|||
|
|
@ -25,40 +25,16 @@ struct radv_amdgpu_winsys_bo_log {
|
|||
uint8_t virtual_mapping : 1;
|
||||
};
|
||||
|
||||
struct radv_amdgpu_map_range {
|
||||
uint64_t offset;
|
||||
uint64_t size;
|
||||
struct radv_amdgpu_winsys_bo *bo;
|
||||
uint64_t bo_offset;
|
||||
};
|
||||
|
||||
struct radv_amdgpu_winsys_bo {
|
||||
struct radeon_winsys_bo base;
|
||||
amdgpu_va_handle va_handle;
|
||||
uint32_t flags;
|
||||
uint8_t priority;
|
||||
|
||||
union {
|
||||
/* physical bo */
|
||||
struct {
|
||||
ac_drm_bo bo;
|
||||
uint32_t bo_handle;
|
||||
ac_drm_bo bo;
|
||||
uint32_t bo_handle;
|
||||
|
||||
void *cpu_map;
|
||||
};
|
||||
/* virtual bo */
|
||||
struct {
|
||||
struct u_rwlock lock;
|
||||
|
||||
struct radv_amdgpu_map_range *ranges;
|
||||
uint32_t range_count;
|
||||
uint32_t range_capacity;
|
||||
|
||||
struct radv_amdgpu_winsys_bo **bos;
|
||||
uint32_t bo_count;
|
||||
uint32_t bo_capacity;
|
||||
};
|
||||
};
|
||||
void *cpu_map;
|
||||
};
|
||||
|
||||
static inline struct radv_amdgpu_winsys_bo *
|
||||
|
|
|
|||
|
|
@ -40,8 +40,6 @@
|
|||
/* Maximum allowed total number of submitted IBs. */
|
||||
#define RADV_MAX_IBS_PER_SUBMIT 192
|
||||
|
||||
enum { VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024 };
|
||||
|
||||
struct radv_amdgpu_ib {
|
||||
struct radeon_winsys_bo *bo; /* NULL when not owned by the current CS object */
|
||||
uint64_t va;
|
||||
|
|
@ -79,11 +77,6 @@ struct radv_amdgpu_cs {
|
|||
int buffer_hash_table[1024];
|
||||
unsigned hw_ip;
|
||||
|
||||
unsigned num_virtual_buffers;
|
||||
unsigned max_num_virtual_buffers;
|
||||
struct radeon_winsys_bo **virtual_buffers;
|
||||
int *virtual_buffer_hash_table;
|
||||
|
||||
struct hash_table *annotations;
|
||||
};
|
||||
|
||||
|
|
@ -193,8 +186,6 @@ radv_amdgpu_cs_destroy(struct ac_cmdbuf *rcs)
|
|||
cs->ws->base.buffer_destroy(&cs->ws->base, cs->ib_buffers[i].bo);
|
||||
|
||||
free(cs->ib_buffers);
|
||||
free(cs->virtual_buffers);
|
||||
free(cs->virtual_buffer_hash_table);
|
||||
free(cs->handles);
|
||||
free(cs);
|
||||
}
|
||||
|
|
@ -533,13 +524,7 @@ radv_amdgpu_cs_reset(struct ac_cmdbuf *_cs)
|
|||
cs->buffer_hash_table[hash] = -1;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
|
||||
unsigned hash = ((uintptr_t)cs->virtual_buffers[i] >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
|
||||
cs->virtual_buffer_hash_table[hash] = -1;
|
||||
}
|
||||
|
||||
cs->num_buffers = 0;
|
||||
cs->num_virtual_buffers = 0;
|
||||
|
||||
/* When the CS is finalized and IBs are not allowed, use last IB. */
|
||||
assert(cs->ib_buffer || cs->num_ib_buffers);
|
||||
|
|
@ -661,55 +646,6 @@ radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs, uint32_t bo, uint8
|
|||
++cs->num_buffers;
|
||||
}
|
||||
|
||||
static void
|
||||
radv_amdgpu_cs_add_virtual_buffer(struct ac_cmdbuf *_cs, struct radeon_winsys_bo *bo)
|
||||
{
|
||||
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
|
||||
unsigned hash = ((uintptr_t)bo >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
|
||||
|
||||
if (!cs->virtual_buffer_hash_table) {
|
||||
int *virtual_buffer_hash_table = malloc(VIRTUAL_BUFFER_HASH_TABLE_SIZE * sizeof(int));
|
||||
if (!virtual_buffer_hash_table) {
|
||||
cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
return;
|
||||
}
|
||||
cs->virtual_buffer_hash_table = virtual_buffer_hash_table;
|
||||
|
||||
for (int i = 0; i < VIRTUAL_BUFFER_HASH_TABLE_SIZE; ++i)
|
||||
cs->virtual_buffer_hash_table[i] = -1;
|
||||
}
|
||||
|
||||
if (cs->virtual_buffer_hash_table[hash] >= 0) {
|
||||
int idx = cs->virtual_buffer_hash_table[hash];
|
||||
if (cs->virtual_buffers[idx] == bo) {
|
||||
return;
|
||||
}
|
||||
for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
|
||||
if (cs->virtual_buffers[i] == bo) {
|
||||
cs->virtual_buffer_hash_table[hash] = i;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (cs->max_num_virtual_buffers <= cs->num_virtual_buffers) {
|
||||
unsigned max_num_virtual_buffers = MAX2(2, cs->max_num_virtual_buffers * 2);
|
||||
struct radeon_winsys_bo **virtual_buffers =
|
||||
realloc(cs->virtual_buffers, sizeof(struct radeon_winsys_bo *) * max_num_virtual_buffers);
|
||||
if (!virtual_buffers) {
|
||||
cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
return;
|
||||
}
|
||||
cs->max_num_virtual_buffers = max_num_virtual_buffers;
|
||||
cs->virtual_buffers = virtual_buffers;
|
||||
}
|
||||
|
||||
cs->virtual_buffers[cs->num_virtual_buffers] = bo;
|
||||
|
||||
cs->virtual_buffer_hash_table[hash] = cs->num_virtual_buffers;
|
||||
++cs->num_virtual_buffers;
|
||||
}
|
||||
|
||||
static void
|
||||
radv_amdgpu_cs_add_buffer(struct ac_cmdbuf *_cs, struct radeon_winsys_bo *_bo)
|
||||
{
|
||||
|
|
@ -719,10 +655,8 @@ radv_amdgpu_cs_add_buffer(struct ac_cmdbuf *_cs, struct radeon_winsys_bo *_bo)
|
|||
if (cs->status != VK_SUCCESS)
|
||||
return;
|
||||
|
||||
if (bo->base.is_virtual) {
|
||||
radv_amdgpu_cs_add_virtual_buffer(_cs, _bo);
|
||||
if (bo->base.is_virtual)
|
||||
return;
|
||||
}
|
||||
|
||||
radv_amdgpu_cs_add_buffer_internal(cs, bo->bo_handle, bo->priority);
|
||||
}
|
||||
|
|
@ -776,10 +710,6 @@ radv_amdgpu_cs_execute_secondary(struct ac_cmdbuf *_parent, struct ac_cmdbuf *_c
|
|||
radv_amdgpu_cs_add_buffer_internal(parent, child->handles[i].bo_handle, child->handles[i].bo_priority);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < child->num_virtual_buffers; ++i) {
|
||||
radv_amdgpu_cs_add_buffer(&parent->base, child->virtual_buffers[i]);
|
||||
}
|
||||
|
||||
if (use_ib2) {
|
||||
radv_amdgpu_cs_emit_secondary_ib2(parent, child);
|
||||
} else {
|
||||
|
|
@ -911,8 +841,6 @@ radv_amdgpu_count_cs_bo(struct radv_amdgpu_cs *start_cs)
|
|||
|
||||
for (struct radv_amdgpu_cs *cs = start_cs; cs; cs = cs->chained_to) {
|
||||
num_bo += cs->num_buffers;
|
||||
for (unsigned j = 0; j < cs->num_virtual_buffers; ++j)
|
||||
num_bo += radv_amdgpu_winsys_bo(cs->virtual_buffers[j])->bo_count;
|
||||
}
|
||||
|
||||
return num_bo;
|
||||
|
|
@ -936,7 +864,7 @@ radv_amdgpu_add_cs_to_bo_list(struct radv_amdgpu_cs *cs, struct drm_amdgpu_bo_li
|
|||
if (!cs->num_buffers)
|
||||
return num_handles;
|
||||
|
||||
if (num_handles == 0 && !cs->num_virtual_buffers) {
|
||||
if (num_handles == 0) {
|
||||
memcpy(handles, cs->handles, cs->num_buffers * sizeof(struct drm_amdgpu_bo_list_entry));
|
||||
return cs->num_buffers;
|
||||
}
|
||||
|
|
@ -955,26 +883,6 @@ radv_amdgpu_add_cs_to_bo_list(struct radv_amdgpu_cs *cs, struct drm_amdgpu_bo_li
|
|||
++num_handles;
|
||||
}
|
||||
}
|
||||
for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) {
|
||||
struct radv_amdgpu_winsys_bo *virtual_bo = radv_amdgpu_winsys_bo(cs->virtual_buffers[j]);
|
||||
u_rwlock_rdlock(&virtual_bo->lock);
|
||||
for (unsigned k = 0; k < virtual_bo->bo_count; ++k) {
|
||||
struct radv_amdgpu_winsys_bo *bo = virtual_bo->bos[k];
|
||||
bool found = false;
|
||||
for (unsigned m = 0; m < num_handles; ++m) {
|
||||
if (handles[m].bo_handle == bo->bo_handle) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
handles[num_handles].bo_handle = bo->bo_handle;
|
||||
handles[num_handles].bo_priority = bo->priority;
|
||||
++num_handles;
|
||||
}
|
||||
}
|
||||
u_rwlock_rdunlock(&virtual_bo->lock);
|
||||
}
|
||||
|
||||
return num_handles;
|
||||
}
|
||||
|
|
@ -1020,8 +928,7 @@ radv_amdgpu_get_bo_list(struct radv_amdgpu_winsys *ws, struct ac_cmdbuf **cs_arr
|
|||
|
||||
num_handles = radv_amdgpu_copy_global_bo_list(ws, handles);
|
||||
} else if (count == 1 && !num_initial_preambles && !num_continue_preambles && !num_postambles &&
|
||||
!radv_amdgpu_cs(cs_array[0])->num_virtual_buffers && !radv_amdgpu_cs(cs_array[0])->chained_to &&
|
||||
!ws->global_bo_list.count) {
|
||||
!radv_amdgpu_cs(cs_array[0])->chained_to && !ws->global_bo_list.count) {
|
||||
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)cs_array[0];
|
||||
if (cs->num_buffers == 0)
|
||||
return VK_SUCCESS;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue