diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index dd8b2e4b73a..b8ddb89e2d8 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -3762,7 +3762,7 @@ static VkResult radv_alloc_sem_counts(struct radv_device *device, VkFence _fence, bool is_signal) { - int syncobj_idx = 0, sem_idx = 0; + int syncobj_idx = 0, non_reset_idx = 0, sem_idx = 0; if (num_sems == 0 && _fence == VK_NULL_HANDLE) return VK_SUCCESS; @@ -3771,6 +3771,7 @@ static VkResult radv_alloc_sem_counts(struct radv_device *device, switch(sems[i]->kind) { case RADV_SEMAPHORE_SYNCOBJ: counts->syncobj_count++; + counts->syncobj_reset_count++; break; case RADV_SEMAPHORE_WINSYS: counts->sem_count++; @@ -3807,6 +3808,8 @@ static VkResult radv_alloc_sem_counts(struct radv_device *device, } } + non_reset_idx = counts->syncobj_reset_count; + for (uint32_t i = 0; i < num_sems; i++) { switch(sems[i]->kind) { case RADV_SEMAPHORE_NONE: @@ -3830,7 +3833,7 @@ static VkResult radv_alloc_sem_counts(struct radv_device *device, pthread_mutex_unlock(&sems[i]->timeline.mutex); if (point) { - counts->syncobj[syncobj_idx++] = point->syncobj; + counts->syncobj[non_reset_idx++] = point->syncobj; } else { /* Explicitly remove the semaphore so we might not find * a point later post-submit. */ @@ -3848,11 +3851,11 @@ static VkResult radv_alloc_sem_counts(struct radv_device *device, fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent; if (part->kind == RADV_FENCE_SYNCOBJ) - counts->syncobj[syncobj_idx++] = part->syncobj; + counts->syncobj[non_reset_idx++] = part->syncobj; } - assert(syncobj_idx <= counts->syncobj_count); - counts->syncobj_count = syncobj_idx; + assert(MAX2(syncobj_idx, non_reset_idx) <= counts->syncobj_count); + counts->syncobj_count = MAX2(syncobj_idx, non_reset_idx); return VK_SUCCESS; } diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h index ac5555e957d..37576327dfd 100644 --- a/src/amd/vulkan/radv_radeon_winsys.h +++ b/src/amd/vulkan/radv_radeon_winsys.h @@ -168,6 +168,7 @@ struct radeon_winsys_bo { }; struct radv_winsys_sem_counts { uint32_t syncobj_count; + uint32_t syncobj_reset_count; /* for wait only, whether to reset the syncobj */ uint32_t sem_count; uint32_t *syncobj; struct radeon_winsys_sem **sem; diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index b024ef97104..2cce43c2460 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -1451,15 +1451,17 @@ static int radv_amdgpu_signal_sems(struct radv_amdgpu_ctx *ctx, } static struct drm_amdgpu_cs_chunk_sem *radv_amdgpu_cs_alloc_syncobj_chunk(struct radv_winsys_sem_counts *counts, + const uint32_t *syncobj_override, struct drm_amdgpu_cs_chunk *chunk, int chunk_id) { + const uint32_t *src = syncobj_override ? syncobj_override : counts->syncobj; struct drm_amdgpu_cs_chunk_sem *syncobj = malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * counts->syncobj_count); if (!syncobj) return NULL; for (unsigned i = 0; i < counts->syncobj_count; i++) { struct drm_amdgpu_cs_chunk_sem *sem = &syncobj[i]; - sem->handle = counts->syncobj[i]; + sem->handle = src[i]; } chunk->chunk_id = chunk_id; @@ -1468,6 +1470,101 @@ static struct drm_amdgpu_cs_chunk_sem *radv_amdgpu_cs_alloc_syncobj_chunk(struct return syncobj; } +static int radv_amdgpu_cache_alloc_syncobjs(struct radv_amdgpu_winsys *ws, unsigned count, uint32_t *dst) +{ + pthread_mutex_lock(&ws->syncobj_lock); + if (count > ws->syncobj_capacity) { + if (ws->syncobj_capacity > UINT32_MAX / 2) + goto fail; + + unsigned new_capacity = MAX2(count, ws->syncobj_capacity * 2); + uint32_t *n = realloc(ws->syncobj, new_capacity * sizeof(*ws->syncobj)); + if (!n) + goto fail; + ws->syncobj_capacity = new_capacity; + ws->syncobj = n; + } + + while(ws->syncobj_count < count) { + int r = amdgpu_cs_create_syncobj(ws->dev, ws->syncobj + ws->syncobj_count); + if (r) + goto fail; + ++ws->syncobj_count; + } + + for (unsigned i = 0; i < count; ++i) + dst[i] = ws->syncobj[--ws->syncobj_count]; + + pthread_mutex_unlock(&ws->syncobj_lock); + return 0; + +fail: + pthread_mutex_unlock(&ws->syncobj_lock); + return -ENOMEM; +} + +static void radv_amdgpu_cache_free_syncobjs(struct radv_amdgpu_winsys *ws, unsigned count, uint32_t *src) +{ + pthread_mutex_lock(&ws->syncobj_lock); + + uint32_t cache_count = MIN2(count, UINT32_MAX - ws->syncobj_count); + if (cache_count + ws->syncobj_count > ws->syncobj_capacity) { + unsigned new_capacity = MAX2(ws->syncobj_count + cache_count, ws->syncobj_capacity * 2); + uint32_t* n = realloc(ws->syncobj, new_capacity * sizeof(*ws->syncobj)); + if (n) { + ws->syncobj_capacity = new_capacity; + ws->syncobj = n; + } + } + + for (unsigned i = 0; i < count; ++i) { + if (ws->syncobj_count < ws->syncobj_capacity) + ws->syncobj[ws->syncobj_count++] = src[i]; + else + amdgpu_cs_destroy_syncobj(ws->dev, src[i]); + } + + pthread_mutex_unlock(&ws->syncobj_lock); + +} + +static int radv_amdgpu_cs_prepare_syncobjs(struct radv_amdgpu_winsys *ws, + struct radv_winsys_sem_counts *counts, + uint32_t **out_syncobjs) +{ + int r = 0; + + if (!ws->info.has_timeline_syncobj || !counts->syncobj_count) { + *out_syncobjs = NULL; + return 0; + } + + *out_syncobjs = malloc(counts->syncobj_count * sizeof(**out_syncobjs)); + if (!*out_syncobjs) + return -ENOMEM; + + r = radv_amdgpu_cache_alloc_syncobjs(ws, counts->syncobj_count, *out_syncobjs); + if (r) + return r; + + for (unsigned i = 0; i < counts->syncobj_count; ++i) { + r = amdgpu_cs_syncobj_transfer(ws->dev, (*out_syncobjs)[i], 0, counts->syncobj[i], 0, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT); + if (r) + goto fail; + } + + r = amdgpu_cs_syncobj_reset(ws->dev, counts->syncobj, counts->syncobj_reset_count); + if (r) + goto fail; + + return 0; +fail: + radv_amdgpu_cache_free_syncobjs(ws, counts->syncobj_count, *out_syncobjs); + free(*out_syncobjs); + *out_syncobjs = NULL; + return r; +} + static VkResult radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_cs_request *request, @@ -1483,6 +1580,7 @@ radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, struct drm_amdgpu_cs_chunk_sem *wait_syncobj = NULL, *signal_syncobj = NULL; bool use_bo_list_create = ctx->ws->info.drm_minor < 27; struct drm_amdgpu_bo_list_in bo_list_in; + uint32_t *in_syncobjs = NULL; int i; struct amdgpu_cs_fence *sem; uint32_t bo_list = 0; @@ -1533,7 +1631,12 @@ radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, } if (sem_info->wait.syncobj_count && sem_info->cs_emit_wait) { + r = radv_amdgpu_cs_prepare_syncobjs(ctx->ws, &sem_info->wait, &in_syncobjs); + if (r) + goto error_out; + wait_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->wait, + in_syncobjs, &chunks[num_chunks], AMDGPU_CHUNK_ID_SYNCOBJ_IN); if (!wait_syncobj) { @@ -1578,6 +1681,7 @@ radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, if (sem_info->signal.syncobj_count && sem_info->cs_emit_signal) { signal_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->signal, + NULL, &chunks[num_chunks], AMDGPU_CHUNK_ID_SYNCOBJ_OUT); if (!signal_syncobj) { @@ -1642,6 +1746,10 @@ radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, } error_out: + if (in_syncobjs) { + radv_amdgpu_cache_free_syncobjs(ctx->ws, sem_info->wait.syncobj_count, in_syncobjs); + free(in_syncobjs); + } free(chunks); free(chunk_data); free(sem_dependencies); diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c index 29e7b20b886..c6deedcb32c 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c @@ -162,6 +162,10 @@ static void radv_amdgpu_winsys_destroy(struct radeon_winsys *rws) { struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys*)rws; + for (unsigned i = 0; i < ws->syncobj_count; ++i) + amdgpu_cs_destroy_syncobj(ws->dev, ws->syncobj[i]); + free(ws->syncobj); + ac_addrlib_destroy(ws->addrlib); amdgpu_device_deinitialize(ws->dev); FREE(rws); @@ -197,6 +201,7 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags) ws->use_llvm = debug_flags & RADV_DEBUG_LLVM; list_inithead(&ws->global_bo_list); pthread_mutex_init(&ws->global_bo_list_lock, NULL); + pthread_mutex_init(&ws->syncobj_lock, NULL); ws->base.query_info = radv_amdgpu_winsys_query_info; ws->base.query_value = radv_amdgpu_winsys_query_value; ws->base.read_registers = radv_amdgpu_winsys_read_registers; diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h index 0601481625c..bb8ce8863ba 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h @@ -55,6 +55,11 @@ struct radv_amdgpu_winsys { uint64_t allocated_vram; uint64_t allocated_vram_vis; uint64_t allocated_gtt; + + /* syncobj cache */ + pthread_mutex_t syncobj_lock; + uint32_t *syncobj; + uint32_t syncobj_count, syncobj_capacity; }; static inline struct radv_amdgpu_winsys *