venus: implement async present

Brief:
1. present info is deep-copied and passed to async present thread
2. normal queue present always waits for async present to take over
3. queue access is protected by async present queue lock
4. chain access is protected by chain locks
5. no perf gain in practice since we haven't allowed parallel queue
   submit or acquire image yet

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39283>
This commit is contained in:
Yiwei Zhang 2026-01-02 20:16:25 -08:00 committed by Marge Bot
parent 84e2623300
commit 8392e82298
4 changed files with 134 additions and 2 deletions

View file

@ -22,7 +22,6 @@
#include <string.h>
#include <vulkan/vulkan.h>
#include "c11/threads.h"
#include "util/bitscan.h"
#include "util/bitset.h"
#include "util/compiler.h"
@ -34,6 +33,7 @@
#include "util/simple_mtx.h"
#include "util/u_atomic.h"
#include "util/u_math.h"
#include "util/u_thread.h"
#include "util/xmlconfig.h"
#include "vk_alloc.h"
#include "vk_command_buffer.h"

View file

@ -27,6 +27,18 @@ vn_queue_fini(struct vn_queue *queue)
{
VkDevice dev_handle = vk_device_to_handle(queue->base.vk.base.device);
if (queue->async_present.initialized) {
mtx_lock(&queue->async_present.mutex);
queue->async_present.join = true;
cnd_signal(&queue->async_present.cond);
mtx_unlock(&queue->async_present.mutex);
thrd_join(queue->async_present.thread, NULL);
mtx_destroy(&queue->async_present.mutex);
cnd_destroy(&queue->async_present.cond);
}
if (queue->wait_fence != VK_NULL_HANDLE) {
vn_DestroyFence(dev_handle, queue->wait_fence, NULL);
}

View file

@ -36,6 +36,31 @@ struct vn_queue {
/* for vn_queue_submission storage */
struct vn_cached_storage storage;
/* for async queue present */
struct {
/* This mutex protects below:
* - state transitions: initialized, pending and join
* - VkQueue host access being externally synchronized
*/
mtx_t mutex;
/* Wake up async present thread upon presentation. */
cnd_t cond;
/* This is the async present thread. */
thrd_t thread;
/* Avoid extra locking on async present thread. */
pid_t tid;
/* Track whether the async present thread has been initialized. */
bool initialized;
/* Track whether the present is still pending acquired. */
bool pending;
/* Track whether to join the async present thread. */
bool join;
/* This is a deep copy of the requested presentation. */
VkPresentInfoKHR *info;
/* Track the result of the presentation. */
VkResult result;
} async_present;
};
VK_DEFINE_HANDLE_CASTS(vn_queue, base.vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)

View file

@ -326,7 +326,19 @@ vn_wsi_sync_wait(struct vn_device *dev, int fd)
void
vn_wsi_flush(struct vn_queue *queue)
{
/* TODO */
/* No need to flush if there's no present. */
if (!queue->async_present.initialized)
return;
/* Should not flush on the async present thread. */
if (queue->async_present.tid == vn_gettid())
return;
/* Being able to acquire the lock ensures async present queue access
* has completed.
*/
mtx_lock(&queue->async_present.mutex);
mtx_unlock(&queue->async_present.mutex);
}
static VkPresentInfoKHR *
@ -551,6 +563,89 @@ vn_wsi_clone_present_info(struct vn_device *dev, const VkPresentInfoKHR *pi)
return _pi;
}
static int
vn_wsi_present_thread(void *data)
{
struct vn_queue *queue = data;
struct vk_queue *queue_vk = &queue->base.vk;
struct vn_device *dev = vn_device_from_vk(queue_vk->base.device);
const VkAllocationCallbacks *alloc = &dev->base.vk.alloc;
char thread_name[16];
snprintf(thread_name, ARRAY_SIZE(thread_name), "vn_wsi[%u,%u]",
queue_vk->queue_family_index, queue_vk->index_in_family);
u_thread_setname(thread_name);
queue->async_present.tid = vn_gettid();
queue->async_present.initialized = true;
mtx_lock(&queue->async_present.mutex);
while (true) {
while (!queue->async_present.info && !queue->async_present.join)
cnd_wait(&queue->async_present.cond, &queue->async_present.mutex);
if (queue->async_present.join)
break;
vn_wsi_chains_lock(dev, queue->async_present.info, /*all=*/true);
queue->async_present.pending = false;
queue->async_present.result =
wsi_common_queue_present(queue_vk->base.device->physical->wsi_device,
queue_vk, queue->async_present.info);
vn_wsi_chains_unlock(dev, queue->async_present.info, /*all=*/true);
vk_free(alloc, queue->async_present.info);
queue->async_present.info = NULL;
}
mtx_unlock(&queue->async_present.mutex);
return 0;
}
static VkResult
vn_wsi_present_async(struct vn_device *dev,
struct vn_queue *queue,
const VkPresentInfoKHR *pi)
{
VkResult result = VK_SUCCESS;
if (unlikely(!queue->async_present.initialized)) {
mtx_init(&queue->async_present.mutex, mtx_plain);
cnd_init(&queue->async_present.cond);
if (u_thread_create(&queue->async_present.thread, vn_wsi_present_thread,
queue) != thrd_success)
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
mtx_lock(&queue->async_present.mutex);
assert(!queue->async_present.info);
assert(!queue->async_present.pending);
result = queue->async_present.result;
if (result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR) {
queue->async_present.info = vn_wsi_clone_present_info(dev, pi);
queue->async_present.pending = true;
cnd_signal(&queue->async_present.cond);
}
queue->async_present.result = VK_SUCCESS;
mtx_unlock(&queue->async_present.mutex);
/* Ensure async present thread has acquired the present lock. */
while (queue->async_present.pending)
thrd_yield();
if (pi->pResults) {
/* TODO: fill present result for the corresponding chain */
for (uint32_t i = 0; i < pi->swapchainCount; i++)
pi->pResults[i] = result;
}
return result;
}
/* swapchain commands */
VKAPI_ATTR VkResult VKAPI_CALL