From 03e5a63058509229ff0d84b08460b38f9f072294 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Mon, 30 Jun 2025 15:49:35 -0400 Subject: [PATCH] zink: fix signaling multiple API semaphores it's possible for multiple user semaphores to be signaled in one batch, and these all have the same mechanics as wait semaphores, which means they unfortunately need their own submit in order to preserve ownership when resetting the batch state Part-of: --- src/gallium/drivers/zink/zink_batch.c | 36 +++++++++++++++++---------- src/gallium/drivers/zink/zink_fence.c | 9 ++++--- src/gallium/drivers/zink/zink_types.h | 3 ++- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/zink/zink_batch.c b/src/gallium/drivers/zink/zink_batch.c index e2d56addf7e..3a2bd2895d6 100644 --- a/src/gallium/drivers/zink/zink_batch.c +++ b/src/gallium/drivers/zink/zink_batch.c @@ -128,6 +128,7 @@ zink_reset_batch_state(struct zink_context *ctx, struct zink_batch_state *bs) bs->sparse_semaphore = VK_NULL_HANDLE; util_dynarray_clear(&bs->wait_semaphore_stages); util_dynarray_clear(&bs->wait_semaphores); + util_dynarray_clear(&bs->user_signal_semaphores); bs->present = VK_NULL_HANDLE; /* check the arrays first to avoid locking unnecessarily */ @@ -261,6 +262,7 @@ zink_batch_state_destroy(struct zink_screen *screen, struct zink_batch_state *bs util_dynarray_fini(&bs->bindless_releases[1]); util_dynarray_fini(&bs->acquires); util_dynarray_fini(&bs->signal_semaphores); + util_dynarray_fini(&bs->user_signal_semaphores); util_dynarray_fini(&bs->wait_semaphores); util_dynarray_fini(&bs->wait_semaphore_stages); util_dynarray_fini(&bs->fd_wait_semaphores); @@ -363,6 +365,7 @@ create_batch_state(struct zink_context *ctx) SET_CREATE_OR_FAIL(&bs->active_queries); SET_CREATE_OR_FAIL(&bs->dmabuf_exports); util_dynarray_init(&bs->signal_semaphores, NULL); + util_dynarray_init(&bs->user_signal_semaphores, NULL); util_dynarray_init(&bs->wait_semaphores, NULL); util_dynarray_init(&bs->tracked_semaphores, NULL); util_dynarray_init(&bs->fd_wait_semaphores, NULL); @@ -589,7 +592,8 @@ typedef enum { ZINK_SUBMIT_WAIT_ACQUIRE, ZINK_SUBMIT_WAIT_FD, ZINK_SUBMIT_CMDBUF, - ZINK_SUBMIT_SIGNAL, + ZINK_SUBMIT_SIGNAL_INTERNAL, + ZINK_SUBMIT_SIGNAL_USER, ZINK_SUBMIT_MAX } zink_submit; @@ -662,25 +666,34 @@ submit_queue(void *data, void *gdata, int thread_index) /* then the signal submit with the timeline (fence) semaphore */ VkSemaphore signals[ZINK_MAX_SIGNALS]; - si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount = !!bs->signal_semaphore; + si[ZINK_SUBMIT_SIGNAL_INTERNAL].signalSemaphoreCount = !!bs->signal_semaphore; signals[0] = bs->signal_semaphore; - si[ZINK_SUBMIT_SIGNAL].pSignalSemaphores = signals; + si[ZINK_SUBMIT_SIGNAL_INTERNAL].pSignalSemaphores = signals; VkTimelineSemaphoreSubmitInfo tsi = {0}; uint64_t signal_values[ZINK_MAX_SIGNALS] = {0}; tsi.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO; - si[ZINK_SUBMIT_SIGNAL].pNext = &tsi; + si[ZINK_SUBMIT_SIGNAL_INTERNAL].pNext = &tsi; tsi.pSignalSemaphoreValues = signal_values; - signal_values[si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount] = batch_id; - signals[si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount++] = screen->sem; - tsi.signalSemaphoreValueCount = si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount; + signal_values[si[ZINK_SUBMIT_SIGNAL_INTERNAL].signalSemaphoreCount] = batch_id; + signals[si[ZINK_SUBMIT_SIGNAL_INTERNAL].signalSemaphoreCount++] = screen->sem; + tsi.signalSemaphoreValueCount = si[ZINK_SUBMIT_SIGNAL_INTERNAL].signalSemaphoreCount; if (bs->present) - signals[si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount++] = bs->present; - tsi.signalSemaphoreValueCount = si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount; + signals[si[ZINK_SUBMIT_SIGNAL_INTERNAL].signalSemaphoreCount++] = bs->present; + tsi.signalSemaphoreValueCount = si[ZINK_SUBMIT_SIGNAL_INTERNAL].signalSemaphoreCount; - assert(si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount <= ZINK_MAX_SIGNALS); + assert(si[ZINK_SUBMIT_SIGNAL_INTERNAL].signalSemaphoreCount <= ZINK_MAX_SIGNALS); assert(tsi.signalSemaphoreValueCount <= ZINK_MAX_SIGNALS); + if (util_dynarray_num_elements(&bs->user_signal_semaphores, VkSemaphore)) { + si[ZINK_SUBMIT_SIGNAL_USER].signalSemaphoreCount = util_dynarray_num_elements(&bs->user_signal_semaphores, VkSemaphore); + si[ZINK_SUBMIT_SIGNAL_USER].pSignalSemaphores = bs->user_signal_semaphores.data; + } else { + num_si--; + if (!si[ZINK_SUBMIT_SIGNAL_INTERNAL].signalSemaphoreCount) + num_si--; + } + VkResult result; if (bs->has_work) { VRAM_ALLOC_LOOP(result, @@ -724,9 +737,6 @@ submit_queue(void *data, void *gdata, int thread_index) ); } - if (!si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount) - num_si--; - simple_mtx_lock(&screen->queue_lock); VRAM_ALLOC_LOOP(result, VKSCR(QueueSubmit)(screen->queue, num_si, submit, VK_NULL_HANDLE), diff --git a/src/gallium/drivers/zink/zink_fence.c b/src/gallium/drivers/zink/zink_fence.c index d04001abb5c..af794be833e 100644 --- a/src/gallium/drivers/zink/zink_fence.c +++ b/src/gallium/drivers/zink/zink_fence.c @@ -236,11 +236,12 @@ zink_fence_server_signal(struct pipe_context *pctx, struct pipe_fence_handle *pf { struct zink_context *ctx = zink_context(pctx); struct zink_tc_fence *mfence = (struct zink_tc_fence *)pfence; - - assert(!ctx->bs->signal_semaphore); - ctx->bs->signal_semaphore = mfence->sem; - ctx->bs->has_work = true; struct zink_batch_state *bs = ctx->bs; + + util_dynarray_append(&ctx->bs->user_signal_semaphores, VkSemaphore, mfence->sem); + bs->has_work = true; + + /* this must produce a synchronous flush that completes before the function returns */ pctx->flush(pctx, NULL, 0); if (zink_screen(ctx->base.screen)->threaded_submit) diff --git a/src/gallium/drivers/zink/zink_types.h b/src/gallium/drivers/zink/zink_types.h index 86d63a10cf8..20cf8288158 100644 --- a/src/gallium/drivers/zink/zink_types.h +++ b/src/gallium/drivers/zink/zink_types.h @@ -600,7 +600,8 @@ struct zink_batch_state { VkCommandPool unsynchronized_cmdpool; VkCommandBuffer unsynchronized_cmdbuf; VkSemaphore signal_semaphore; //external signal semaphore - struct util_dynarray signal_semaphores; //external signal semaphores + struct util_dynarray signal_semaphores; //internal signal semaphores + struct util_dynarray user_signal_semaphores; //api signal semaphores struct util_dynarray wait_semaphores; //external wait semaphores struct util_dynarray wait_semaphore_stages; //external wait semaphores struct util_dynarray fd_wait_semaphores; //dmabuf wait semaphores