From 68d31c28d4115ff06dc4e8a272f1a76130ced2a7 Mon Sep 17 00:00:00 2001 From: Yiwei Zhang Date: Tue, 18 Nov 2025 19:20:42 -0800 Subject: [PATCH] venus: fix racy semaphore feedback counter update Previously, we update the sfb dst slot upon vn_SignalSemaphore so that vn_GetSemaphoreCounterValue can poll just the feedback slot itself. However, that can race with pending sfb cmds that are going to update the slot value, ending up with stuck sync progression. This change fixes it by disallowing vn_SignalSemaphore to touch the sfb dst slot. To ensure counter query being monotonic, vn_GetSemaphoreCounterValue now takes the greater of signaled counter and the sfb counter read. Test with dEQP-VK.synchronization* group: - w/o this: stuck shows up within 2 min with 8 parallel deqp runs - with this: no stuck for multiple full runs of the same Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/14304 Fixes: 5c7e60362cb ("venus: enable timeline semaphore feedback") (cherry picked from commit 829bd406c04566962268138195ecb2c4d78da5cf) Part-of: --- src/virtio/vulkan/vn_queue.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/virtio/vulkan/vn_queue.c b/src/virtio/vulkan/vn_queue.c index b231fbfa457..9aa9524dc12 100644 --- a/src/virtio/vulkan/vn_queue.c +++ b/src/virtio/vulkan/vn_queue.c @@ -2158,7 +2158,7 @@ vn_GetSemaphoreCounterValue(VkDevice device, if (sem->feedback.slot) { simple_mtx_lock(&sem->feedback.async_wait_mtx); - const uint64_t counter = vn_feedback_get_counter(sem->feedback.slot); + uint64_t counter = vn_feedback_get_counter(sem->feedback.slot); if (sem->feedback.signaled_counter < counter) { /* When the timeline semaphore feedback slot gets signaled, the real * semaphore signal operation follows after but the signaling isr can @@ -2206,6 +2206,11 @@ vn_GetSemaphoreCounterValue(VkDevice device, sem->feedback.signaled_counter = counter; } + + /* vn_SignalSemaphore writes the sfb signaled_counter without updating + * the slot. So the semaphore counter query here must consider both. + */ + counter = MAX2(counter, sem->feedback.signaled_counter); simple_mtx_unlock(&sem->feedback.async_wait_mtx); *pValue = counter; @@ -2227,9 +2232,13 @@ vn_SignalSemaphore(VkDevice device, const VkSemaphoreSignalInfo *pSignalInfo) vn_async_vkSignalSemaphore(dev->primary_ring, device, pSignalInfo); if (sem->feedback.slot) { + /* Must not update the sfb dst slot here because there's no followed + * submission to flush the cache (implicit sync guarantee) before the + * pending sfb cmd to update the slot. Otherwise, the slot update can be + * written by the racy update here. + */ simple_mtx_lock(&sem->feedback.async_wait_mtx); - vn_feedback_set_counter(sem->feedback.slot, pSignalInfo->value); /* Update async counters. Since we're signaling, we're aligned with * the renderer. */