zink: fix signaling multiple API semaphores

it's possible for multiple user semaphores to be signaled in one batch,
and these all have the same mechanics as wait semaphores, which means
they unfortunately need their own submit in order to preserve ownership
when resetting the batch state

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35866>
This commit is contained in:
Mike Blumenkrantz 2025-06-30 15:49:35 -04:00 committed by Marge Bot
parent d65c37f72e
commit 03e5a63058
3 changed files with 30 additions and 18 deletions

View file

@ -128,6 +128,7 @@ zink_reset_batch_state(struct zink_context *ctx, struct zink_batch_state *bs)
bs->sparse_semaphore = VK_NULL_HANDLE;
util_dynarray_clear(&bs->wait_semaphore_stages);
util_dynarray_clear(&bs->wait_semaphores);
util_dynarray_clear(&bs->user_signal_semaphores);
bs->present = VK_NULL_HANDLE;
/* check the arrays first to avoid locking unnecessarily */
@ -261,6 +262,7 @@ zink_batch_state_destroy(struct zink_screen *screen, struct zink_batch_state *bs
util_dynarray_fini(&bs->bindless_releases[1]);
util_dynarray_fini(&bs->acquires);
util_dynarray_fini(&bs->signal_semaphores);
util_dynarray_fini(&bs->user_signal_semaphores);
util_dynarray_fini(&bs->wait_semaphores);
util_dynarray_fini(&bs->wait_semaphore_stages);
util_dynarray_fini(&bs->fd_wait_semaphores);
@ -363,6 +365,7 @@ create_batch_state(struct zink_context *ctx)
SET_CREATE_OR_FAIL(&bs->active_queries);
SET_CREATE_OR_FAIL(&bs->dmabuf_exports);
util_dynarray_init(&bs->signal_semaphores, NULL);
util_dynarray_init(&bs->user_signal_semaphores, NULL);
util_dynarray_init(&bs->wait_semaphores, NULL);
util_dynarray_init(&bs->tracked_semaphores, NULL);
util_dynarray_init(&bs->fd_wait_semaphores, NULL);
@ -589,7 +592,8 @@ typedef enum {
ZINK_SUBMIT_WAIT_ACQUIRE,
ZINK_SUBMIT_WAIT_FD,
ZINK_SUBMIT_CMDBUF,
ZINK_SUBMIT_SIGNAL,
ZINK_SUBMIT_SIGNAL_INTERNAL,
ZINK_SUBMIT_SIGNAL_USER,
ZINK_SUBMIT_MAX
} zink_submit;
@ -662,25 +666,34 @@ submit_queue(void *data, void *gdata, int thread_index)
/* then the signal submit with the timeline (fence) semaphore */
VkSemaphore signals[ZINK_MAX_SIGNALS];
si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount = !!bs->signal_semaphore;
si[ZINK_SUBMIT_SIGNAL_INTERNAL].signalSemaphoreCount = !!bs->signal_semaphore;
signals[0] = bs->signal_semaphore;
si[ZINK_SUBMIT_SIGNAL].pSignalSemaphores = signals;
si[ZINK_SUBMIT_SIGNAL_INTERNAL].pSignalSemaphores = signals;
VkTimelineSemaphoreSubmitInfo tsi = {0};
uint64_t signal_values[ZINK_MAX_SIGNALS] = {0};
tsi.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO;
si[ZINK_SUBMIT_SIGNAL].pNext = &tsi;
si[ZINK_SUBMIT_SIGNAL_INTERNAL].pNext = &tsi;
tsi.pSignalSemaphoreValues = signal_values;
signal_values[si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount] = batch_id;
signals[si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount++] = screen->sem;
tsi.signalSemaphoreValueCount = si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount;
signal_values[si[ZINK_SUBMIT_SIGNAL_INTERNAL].signalSemaphoreCount] = batch_id;
signals[si[ZINK_SUBMIT_SIGNAL_INTERNAL].signalSemaphoreCount++] = screen->sem;
tsi.signalSemaphoreValueCount = si[ZINK_SUBMIT_SIGNAL_INTERNAL].signalSemaphoreCount;
if (bs->present)
signals[si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount++] = bs->present;
tsi.signalSemaphoreValueCount = si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount;
signals[si[ZINK_SUBMIT_SIGNAL_INTERNAL].signalSemaphoreCount++] = bs->present;
tsi.signalSemaphoreValueCount = si[ZINK_SUBMIT_SIGNAL_INTERNAL].signalSemaphoreCount;
assert(si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount <= ZINK_MAX_SIGNALS);
assert(si[ZINK_SUBMIT_SIGNAL_INTERNAL].signalSemaphoreCount <= ZINK_MAX_SIGNALS);
assert(tsi.signalSemaphoreValueCount <= ZINK_MAX_SIGNALS);
if (util_dynarray_num_elements(&bs->user_signal_semaphores, VkSemaphore)) {
si[ZINK_SUBMIT_SIGNAL_USER].signalSemaphoreCount = util_dynarray_num_elements(&bs->user_signal_semaphores, VkSemaphore);
si[ZINK_SUBMIT_SIGNAL_USER].pSignalSemaphores = bs->user_signal_semaphores.data;
} else {
num_si--;
if (!si[ZINK_SUBMIT_SIGNAL_INTERNAL].signalSemaphoreCount)
num_si--;
}
VkResult result;
if (bs->has_work) {
VRAM_ALLOC_LOOP(result,
@ -724,9 +737,6 @@ submit_queue(void *data, void *gdata, int thread_index)
);
}
if (!si[ZINK_SUBMIT_SIGNAL].signalSemaphoreCount)
num_si--;
simple_mtx_lock(&screen->queue_lock);
VRAM_ALLOC_LOOP(result,
VKSCR(QueueSubmit)(screen->queue, num_si, submit, VK_NULL_HANDLE),

View file

@ -236,11 +236,12 @@ zink_fence_server_signal(struct pipe_context *pctx, struct pipe_fence_handle *pf
{
struct zink_context *ctx = zink_context(pctx);
struct zink_tc_fence *mfence = (struct zink_tc_fence *)pfence;
assert(!ctx->bs->signal_semaphore);
ctx->bs->signal_semaphore = mfence->sem;
ctx->bs->has_work = true;
struct zink_batch_state *bs = ctx->bs;
util_dynarray_append(&ctx->bs->user_signal_semaphores, VkSemaphore, mfence->sem);
bs->has_work = true;
/* this must produce a synchronous flush that completes before the function returns */
pctx->flush(pctx, NULL, 0);
if (zink_screen(ctx->base.screen)->threaded_submit)

View file

@ -600,7 +600,8 @@ struct zink_batch_state {
VkCommandPool unsynchronized_cmdpool;
VkCommandBuffer unsynchronized_cmdbuf;
VkSemaphore signal_semaphore; //external signal semaphore
struct util_dynarray signal_semaphores; //external signal semaphores
struct util_dynarray signal_semaphores; //internal signal semaphores
struct util_dynarray user_signal_semaphores; //api signal semaphores
struct util_dynarray wait_semaphores; //external wait semaphores
struct util_dynarray wait_semaphore_stages; //external wait semaphores
struct util_dynarray fd_wait_semaphores; //dmabuf wait semaphores