radv: use the global BO list from the winsys

We had two different implements for the global BO list, one in RADV
and one in the winsys. This will also allow to make more BOs resident.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8868>
This commit is contained in:
Samuel Pitoiset 2021-02-04 18:21:19 +01:00 committed by Marge Bot
parent f6c28474a3
commit 96b03aaa17
5 changed files with 17 additions and 147 deletions

View file

@ -2437,77 +2437,6 @@ radv_queue_finish(struct radv_queue *queue)
vk_object_base_finish(&queue->base); vk_object_base_finish(&queue->base);
} }
static void
radv_bo_list_init(struct radv_bo_list *bo_list)
{
u_rwlock_init(&bo_list->rwlock);
bo_list->list.count = bo_list->capacity = 0;
bo_list->list.bos = NULL;
}
static void
radv_bo_list_finish(struct radv_bo_list *bo_list)
{
free(bo_list->list.bos);
u_rwlock_destroy(&bo_list->rwlock);
}
VkResult radv_bo_list_add(struct radv_device *device,
struct radeon_winsys_bo *bo)
{
struct radv_bo_list *bo_list = &device->bo_list;
if (bo->is_local)
return VK_SUCCESS;
if (unlikely(!device->use_global_bo_list))
return VK_SUCCESS;
u_rwlock_wrlock(&bo_list->rwlock);
if (bo_list->list.count == bo_list->capacity) {
unsigned capacity = MAX2(4, bo_list->capacity * 2);
void *data = realloc(bo_list->list.bos, capacity * sizeof(struct radeon_winsys_bo*));
if (!data) {
u_rwlock_wrunlock(&bo_list->rwlock);
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
bo_list->list.bos = (struct radeon_winsys_bo**)data;
bo_list->capacity = capacity;
}
bo_list->list.bos[bo_list->list.count++] = bo;
bo->use_global_list = true;
u_rwlock_wrunlock(&bo_list->rwlock);
return VK_SUCCESS;
}
void radv_bo_list_remove(struct radv_device *device,
struct radeon_winsys_bo *bo)
{
struct radv_bo_list *bo_list = &device->bo_list;
if (bo->is_local)
return;
if (unlikely(!device->use_global_bo_list))
return;
u_rwlock_wrlock(&bo_list->rwlock);
/* Loop the list backwards so we find the most recently added
* memory first. */
for(unsigned i = bo_list->list.count; i-- > 0;) {
if (bo_list->list.bos[i] == bo) {
bo_list->list.bos[i] = bo_list->list.bos[bo_list->list.count - 1];
bo->use_global_list = false;
--bo_list->list.count;
break;
}
}
u_rwlock_wrunlock(&bo_list->rwlock);
}
static void static void
radv_device_init_gs_info(struct radv_device *device) radv_device_init_gs_info(struct radv_device *device)
{ {
@ -2739,8 +2668,6 @@ VkResult radv_CreateDevice(
device->overallocation_disallowed = overallocation_disallowed; device->overallocation_disallowed = overallocation_disallowed;
mtx_init(&device->overallocation_mutex, mtx_plain); mtx_init(&device->overallocation_mutex, mtx_plain);
radv_bo_list_init(&device->bo_list);
/* Create one context per queue priority. */ /* Create one context per queue priority. */
for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i]; const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
@ -2968,8 +2895,6 @@ fail_mem_cache:
fail_meta: fail_meta:
radv_device_finish_meta(device); radv_device_finish_meta(device);
fail: fail:
radv_bo_list_finish(&device->bo_list);
radv_thread_trace_finish(device); radv_thread_trace_finish(device);
free(device->thread_trace.trigger_file); free(device->thread_trace.trigger_file);
@ -3041,7 +2966,6 @@ void radv_DestroyDevice(
radv_destroy_shader_slabs(device); radv_destroy_shader_slabs(device);
u_cnd_monotonic_destroy(&device->timeline_cond); u_cnd_monotonic_destroy(&device->timeline_cond);
radv_bo_list_finish(&device->bo_list);
free(device->thread_trace.trigger_file); free(device->thread_trace.trigger_file);
radv_thread_trace_finish(device); radv_thread_trace_finish(device);
@ -4707,7 +4631,7 @@ radv_queue_submit_deferred(struct radv_deferred_queue_submission *submission,
result = queue->device->ws->cs_submit(ctx, queue->queue_idx, result = queue->device->ws->cs_submit(ctx, queue->queue_idx,
&queue->device->empty_cs[queue->queue_family_index], &queue->device->empty_cs[queue->queue_family_index],
1, NULL, NULL, 1, NULL, NULL,
&sem_info, NULL, &sem_info,
false, base_fence); false, base_fence);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
goto fail; goto fail;
@ -4728,8 +4652,6 @@ radv_queue_submit_deferred(struct radv_deferred_queue_submission *submission,
for (uint32_t j = 0; j < submission->cmd_buffer_count; j += advance) { for (uint32_t j = 0; j < submission->cmd_buffer_count; j += advance) {
struct radeon_cmdbuf *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs; struct radeon_cmdbuf *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
const struct radv_winsys_bo_list *bo_list = NULL;
advance = MIN2(max_cs_submission, advance = MIN2(max_cs_submission,
submission->cmd_buffer_count - j); submission->cmd_buffer_count - j);
@ -4739,19 +4661,10 @@ radv_queue_submit_deferred(struct radv_deferred_queue_submission *submission,
sem_info.cs_emit_wait = j == 0; sem_info.cs_emit_wait = j == 0;
sem_info.cs_emit_signal = j + advance == submission->cmd_buffer_count; sem_info.cs_emit_signal = j + advance == submission->cmd_buffer_count;
if (unlikely(queue->device->use_global_bo_list)) {
u_rwlock_rdlock(&queue->device->bo_list.rwlock);
bo_list = &queue->device->bo_list.list;
}
result = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, result = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
advance, initial_preamble, continue_preamble_cs, advance, initial_preamble, continue_preamble_cs,
&sem_info, bo_list, &sem_info,
can_patch, base_fence); can_patch, base_fence);
if (unlikely(queue->device->use_global_bo_list))
u_rwlock_rdunlock(&queue->device->bo_list.rwlock);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
goto fail; goto fail;
@ -4979,7 +4892,7 @@ radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs)
return false; return false;
result = queue->device->ws->cs_submit(ctx, queue->queue_idx, &cs, 1, result = queue->device->ws->cs_submit(ctx, queue->queue_idx, &cs, 1,
NULL, NULL, &sem_info, NULL, NULL, NULL, &sem_info,
false, NULL); false, NULL);
radv_free_sem_info(&sem_info); radv_free_sem_info(&sem_info);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
@ -5232,7 +5145,8 @@ radv_free_memory(struct radv_device *device,
mtx_unlock(&device->overallocation_mutex); mtx_unlock(&device->overallocation_mutex);
} }
radv_bo_list_remove(device, mem->bo); if (device->use_global_bo_list)
device->ws->buffer_make_resident(device->ws, mem->bo, false);
device->ws->buffer_destroy(device->ws, mem->bo); device->ws->buffer_destroy(device->ws, mem->bo);
mem->bo = NULL; mem->bo = NULL;
} }
@ -5413,9 +5327,11 @@ static VkResult radv_alloc_memory(struct radv_device *device,
} }
if (!wsi_info) { if (!wsi_info) {
result = radv_bo_list_add(device, mem->bo); if (device->use_global_bo_list) {
if (result != VK_SUCCESS) result = device->ws->buffer_make_resident(device->ws, mem->bo, true);
goto fail; if (result != VK_SUCCESS)
goto fail;
}
} }
*pMem = radv_device_memory_to_handle(mem); *pMem = radv_device_memory_to_handle(mem);

View file

@ -713,17 +713,6 @@ struct radv_queue {
bool cond_created; bool cond_created;
}; };
struct radv_bo_list {
struct radv_winsys_bo_list list;
unsigned capacity;
struct u_rwlock rwlock;
};
VkResult radv_bo_list_add(struct radv_device *device,
struct radeon_winsys_bo *bo);
void radv_bo_list_remove(struct radv_device *device,
struct radeon_winsys_bo *bo);
#define RADV_BORDER_COLOR_COUNT 4096 #define RADV_BORDER_COLOR_COUNT 4096
#define RADV_BORDER_COLOR_BUFFER_SIZE (sizeof(VkClearColorValue) * RADV_BORDER_COLOR_COUNT) #define RADV_BORDER_COLOR_BUFFER_SIZE (sizeof(VkClearColorValue) * RADV_BORDER_COLOR_COUNT)
@ -807,8 +796,6 @@ struct radv_device {
/* Whether the driver uses a global BO list. */ /* Whether the driver uses a global BO list. */
bool use_global_bo_list; bool use_global_bo_list;
struct radv_bo_list bo_list;
/* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */ /* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */
int force_aniso; int force_aniso;

View file

@ -292,7 +292,6 @@ struct radeon_winsys {
struct radeon_cmdbuf *initial_preamble_cs, struct radeon_cmdbuf *initial_preamble_cs,
struct radeon_cmdbuf *continue_preamble_cs, struct radeon_cmdbuf *continue_preamble_cs,
struct radv_winsys_sem_info *sem_info, struct radv_winsys_sem_info *sem_info,
const struct radv_winsys_bo_list *bo_list, /* optional */
bool can_patch, bool can_patch,
struct radeon_winsys_fence *fence); struct radeon_winsys_fence *fence);

View file

@ -44,10 +44,7 @@ radv_wsi_set_memory_ownership(VkDevice _device,
RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_device_memory, mem, _mem); RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
if (ownership) device->ws->buffer_make_resident(device->ws, mem->bo, ownership);
radv_bo_list_add(device, mem->bo);
else
radv_bo_list_remove(device, mem->bo);
} }
VkResult VkResult

View file

@ -693,7 +693,6 @@ radv_amdgpu_get_bo_list(struct radv_amdgpu_winsys *ws,
struct radv_amdgpu_winsys_bo **extra_bo_array, struct radv_amdgpu_winsys_bo **extra_bo_array,
unsigned num_extra_bo, unsigned num_extra_bo,
struct radeon_cmdbuf *extra_cs, struct radeon_cmdbuf *extra_cs,
const struct radv_winsys_bo_list *radv_bo_list,
unsigned *rnum_handles, unsigned *rnum_handles,
struct drm_amdgpu_bo_list_entry **rhandles) struct drm_amdgpu_bo_list_entry **rhandles)
{ {
@ -711,7 +710,7 @@ radv_amdgpu_get_bo_list(struct radv_amdgpu_winsys *ws,
handles[i].bo_priority = ws->global_bo_list.bos[i]->priority; handles[i].bo_priority = ws->global_bo_list.bos[i]->priority;
num_handles++; num_handles++;
} }
} else if (count == 1 && !num_extra_bo && !extra_cs && !radv_bo_list && } else if (count == 1 && !num_extra_bo && !extra_cs &&
!radv_amdgpu_cs(cs_array[0])->num_virtual_buffers && !radv_amdgpu_cs(cs_array[0])->num_virtual_buffers &&
!ws->global_bo_list.count) { !ws->global_bo_list.count) {
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0]; struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0];
@ -739,10 +738,6 @@ radv_amdgpu_get_bo_list(struct radv_amdgpu_winsys *ws,
total_buffer_count += ((struct radv_amdgpu_cs*)extra_cs)->num_buffers; total_buffer_count += ((struct radv_amdgpu_cs*)extra_cs)->num_buffers;
} }
if (radv_bo_list) {
total_buffer_count += radv_bo_list->count;
}
total_buffer_count += ws->global_bo_list.count; total_buffer_count += ws->global_bo_list.count;
if (total_buffer_count == 0) if (total_buffer_count == 0)
@ -807,25 +802,6 @@ radv_amdgpu_get_bo_list(struct radv_amdgpu_winsys *ws,
} }
} }
if (radv_bo_list) {
unsigned unique_bo_so_far = num_handles;
for (unsigned i = 0; i < radv_bo_list->count; ++i) {
struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(radv_bo_list->bos[i]);
bool found = false;
for (unsigned j = 0; j < unique_bo_so_far; ++j) {
if (bo->bo_handle == handles[j].bo_handle) {
found = true;
break;
}
}
if (!found) {
handles[num_handles].bo_handle = bo->bo_handle;
handles[num_handles].bo_priority = bo->priority;
++num_handles;
}
}
}
unsigned unique_bo_so_far = num_handles; unsigned unique_bo_so_far = num_handles;
for (unsigned i = 0; i < ws->global_bo_list.count; ++i) { for (unsigned i = 0; i < ws->global_bo_list.count; ++i) {
struct radv_amdgpu_winsys_bo *bo = ws->global_bo_list.bos[i]; struct radv_amdgpu_winsys_bo *bo = ws->global_bo_list.bos[i];
@ -862,7 +838,6 @@ static VkResult
radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
int queue_idx, int queue_idx,
struct radv_winsys_sem_info *sem_info, struct radv_winsys_sem_info *sem_info,
const struct radv_winsys_bo_list *radv_bo_list,
struct radeon_cmdbuf **cs_array, struct radeon_cmdbuf **cs_array,
unsigned cs_count, unsigned cs_count,
struct radeon_cmdbuf *initial_preamble_cs, struct radeon_cmdbuf *initial_preamble_cs,
@ -905,7 +880,7 @@ radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
/* Get the BO list. */ /* Get the BO list. */
result = radv_amdgpu_get_bo_list(cs0->ws, cs_array, cs_count, NULL, 0, result = radv_amdgpu_get_bo_list(cs0->ws, cs_array, cs_count, NULL, 0,
initial_preamble_cs, radv_bo_list, initial_preamble_cs,
&num_handles, &handles); &num_handles, &handles);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
goto fail; goto fail;
@ -949,7 +924,6 @@ static VkResult
radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
int queue_idx, int queue_idx,
struct radv_winsys_sem_info *sem_info, struct radv_winsys_sem_info *sem_info,
const struct radv_winsys_bo_list *radv_bo_list,
struct radeon_cmdbuf **cs_array, struct radeon_cmdbuf **cs_array,
unsigned cs_count, unsigned cs_count,
struct radeon_cmdbuf *initial_preamble_cs, struct radeon_cmdbuf *initial_preamble_cs,
@ -977,7 +951,7 @@ radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
/* Get the BO list. */ /* Get the BO list. */
result = radv_amdgpu_get_bo_list(cs0->ws, &cs_array[0], cs_count, NULL, 0, result = radv_amdgpu_get_bo_list(cs0->ws, &cs_array[0], cs_count, NULL, 0,
initial_preamble_cs, radv_bo_list, initial_preamble_cs,
&num_handles, &handles); &num_handles, &handles);
if (result != VK_SUCCESS) { if (result != VK_SUCCESS) {
goto fail; goto fail;
@ -1036,7 +1010,6 @@ static VkResult
radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
int queue_idx, int queue_idx,
struct radv_winsys_sem_info *sem_info, struct radv_winsys_sem_info *sem_info,
const struct radv_winsys_bo_list *radv_bo_list,
struct radeon_cmdbuf **cs_array, struct radeon_cmdbuf **cs_array,
unsigned cs_count, unsigned cs_count,
struct radeon_cmdbuf *initial_preamble_cs, struct radeon_cmdbuf *initial_preamble_cs,
@ -1191,7 +1164,6 @@ radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
result = radv_amdgpu_get_bo_list(cs0->ws, &cs_array[i], cnt, result = radv_amdgpu_get_bo_list(cs0->ws, &cs_array[i], cnt,
(struct radv_amdgpu_winsys_bo **)bos, (struct radv_amdgpu_winsys_bo **)bos,
number_of_ibs, preamble_cs, number_of_ibs, preamble_cs,
radv_bo_list,
&num_handles, &handles); &num_handles, &handles);
if (result != VK_SUCCESS) { if (result != VK_SUCCESS) {
free(ibs); free(ibs);
@ -1241,7 +1213,6 @@ static VkResult radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
struct radeon_cmdbuf *initial_preamble_cs, struct radeon_cmdbuf *initial_preamble_cs,
struct radeon_cmdbuf *continue_preamble_cs, struct radeon_cmdbuf *continue_preamble_cs,
struct radv_winsys_sem_info *sem_info, struct radv_winsys_sem_info *sem_info,
const struct radv_winsys_bo_list *bo_list,
bool can_patch, bool can_patch,
struct radeon_winsys_fence *_fence) struct radeon_winsys_fence *_fence)
{ {
@ -1251,13 +1222,13 @@ static VkResult radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
assert(sem_info); assert(sem_info);
if (!cs->ws->use_ib_bos) { if (!cs->ws->use_ib_bos) {
result = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, sem_info, bo_list, cs_array, result = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, sem_info, cs_array,
cs_count, initial_preamble_cs, continue_preamble_cs, _fence); cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
} else if (can_patch) { } else if (can_patch) {
result = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, sem_info, bo_list, cs_array, result = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, sem_info, cs_array,
cs_count, initial_preamble_cs, _fence); cs_count, initial_preamble_cs, _fence);
} else { } else {
result = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, sem_info, bo_list, cs_array, result = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, sem_info, cs_array,
cs_count, initial_preamble_cs, _fence); cs_count, initial_preamble_cs, _fence);
} }