From c4ba003e2fd97b4dc3771d01fe15fb02fd6ce98f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 22 Oct 2021 03:24:02 -0400 Subject: [PATCH] winsys/amdgpu: move BO fence array updates to the CS thread We always wait for num_active_ioctls == 0 before we use the fence, so we can just add fences to BOs in the CS thread. Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 44 ++++++++++++----------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index e65348bbbe3..b6f231d2c98 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -1224,9 +1224,9 @@ static void amdgpu_cs_add_fence_dependency(struct radeon_cmdbuf *rws, } static void amdgpu_add_bo_fence_dependencies(struct amdgpu_cs *acs, + struct amdgpu_cs_context *cs, struct amdgpu_cs_buffer *buffer) { - struct amdgpu_cs_context *cs = acs->csc; struct amdgpu_winsys_bo *bo = buffer->bo; unsigned new_num_fences = 0; @@ -1294,7 +1294,15 @@ void amdgpu_add_fences(struct amdgpu_winsys_bo *bo, } } +static void amdgpu_inc_bo_num_active_ioctls(unsigned num_buffers, + struct amdgpu_cs_buffer *buffers) +{ + for (unsigned i = 0; i < num_buffers; i++) + p_atomic_inc(&buffers[i].bo->num_active_ioctls); +} + static void amdgpu_add_fence_dependencies_bo_list(struct amdgpu_cs *acs, + struct amdgpu_cs_context *cs, struct pipe_fence_handle *fence, unsigned num_buffers, struct amdgpu_cs_buffer *buffers) @@ -1303,8 +1311,7 @@ static void amdgpu_add_fence_dependencies_bo_list(struct amdgpu_cs *acs, struct amdgpu_cs_buffer *buffer = &buffers[i]; struct amdgpu_winsys_bo *bo = buffer->bo; - amdgpu_add_bo_fence_dependencies(acs, buffer); - p_atomic_inc(&bo->num_active_ioctls); + amdgpu_add_bo_fence_dependencies(acs, cs, buffer); amdgpu_add_fences(bo, 1, &fence); } } @@ -1312,13 +1319,12 @@ static void amdgpu_add_fence_dependencies_bo_list(struct amdgpu_cs *acs, /* Since the kernel driver doesn't synchronize execution between different * rings automatically, we have to add fence dependencies manually. */ -static void amdgpu_add_fence_dependencies_bo_lists(struct amdgpu_cs *acs) +static void amdgpu_add_fence_dependencies_bo_lists(struct amdgpu_cs *acs, + struct amdgpu_cs_context *cs) { - struct amdgpu_cs_context *cs = acs->csc; - - amdgpu_add_fence_dependencies_bo_list(acs, cs->fence, cs->num_real_buffers, cs->real_buffers); - amdgpu_add_fence_dependencies_bo_list(acs, cs->fence, cs->num_slab_buffers, cs->slab_buffers); - amdgpu_add_fence_dependencies_bo_list(acs, cs->fence, cs->num_sparse_buffers, cs->sparse_buffers); + amdgpu_add_fence_dependencies_bo_list(acs, cs, cs->fence, cs->num_real_buffers, cs->real_buffers); + amdgpu_add_fence_dependencies_bo_list(acs, cs, cs->fence, cs->num_slab_buffers, cs->slab_buffers); + amdgpu_add_fence_dependencies_bo_list(acs, cs, cs->fence, cs->num_sparse_buffers, cs->sparse_buffers); } static void amdgpu_cs_add_syncobj_signal(struct radeon_cmdbuf *rws, @@ -1378,6 +1384,10 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index) struct drm_amdgpu_bo_list_in bo_list_in; unsigned initial_num_real_buffers = cs->num_real_buffers; + simple_mtx_lock(&ws->bo_fence_lock); + amdgpu_add_fence_dependencies_bo_lists(acs, cs); + simple_mtx_unlock(&ws->bo_fence_lock); + #if DEBUG /* Prepare the buffer list. */ if (ws->debug_all_bos) { @@ -1713,16 +1723,11 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs, if (fence) amdgpu_fence_reference(fence, cur->fence); - amdgpu_cs_sync_flush(rcs); + amdgpu_inc_bo_num_active_ioctls(cur->num_real_buffers, cur->real_buffers); + amdgpu_inc_bo_num_active_ioctls(cur->num_slab_buffers, cur->slab_buffers); + amdgpu_inc_bo_num_active_ioctls(cur->num_sparse_buffers, cur->sparse_buffers); - /* Prepare buffers. - * - * This fence must be held until the submission is queued to ensure - * that the order of fence dependency updates matches the order of - * submissions. - */ - simple_mtx_lock(&ws->bo_fence_lock); - amdgpu_add_fence_dependencies_bo_lists(cs); + amdgpu_cs_sync_flush(rcs); /* Swap command streams. "cst" is going to be submitted. */ rcs->csc = cs->csc = cs->cst; @@ -1737,9 +1742,6 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs, else cs->csc->secure = cs->cst->secure; - /* The submission has been queued, unlock the fence now. */ - simple_mtx_unlock(&ws->bo_fence_lock); - if (!(flags & PIPE_FLUSH_ASYNC)) { amdgpu_cs_sync_flush(rcs); error_code = cur->error_code;