From 85df907ad8692d1448ba12735c2981e47add34aa Mon Sep 17 00:00:00 2001 From: Yiwei Zhang Date: Thu, 11 Jan 2024 17:12:13 -0800 Subject: [PATCH] venus: allow tls ring submission to utilize the entire ring shmem Ring submissions on tls ring are synchronous and single threaded, thus without perf degradation, a single cmd can use the entire ring shmem. Signed-off-by: Yiwei Zhang Part-of: --- src/virtio/vulkan/vn_common.c | 5 ++++- src/virtio/vulkan/vn_instance.c | 15 ++++++++++----- src/virtio/vulkan/vn_ring.c | 13 +++++++++++-- src/virtio/vulkan/vn_ring.h | 3 ++- 4 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/virtio/vulkan/vn_common.c b/src/virtio/vulkan/vn_common.c index a225dbea23f..8e0d215ae30 100644 --- a/src/virtio/vulkan/vn_common.c +++ b/src/virtio/vulkan/vn_common.c @@ -276,10 +276,13 @@ vn_tls_get_ring(struct vn_instance *instance) /* only need a small ring for synchronous cmds on tls ring */ static const size_t buf_size = 16 * 1024; + /* single cmd can use the entire ring shmem on tls ring */ + static const uint8_t direct_order = 0; + struct vn_ring_layout layout; vn_ring_get_layout(buf_size, extra_size, &layout); - tls_ring->ring = vn_ring_create(instance, &layout); + tls_ring->ring = vn_ring_create(instance, &layout, direct_order); if (!tls_ring->ring) { free(tls_ring); return NULL; diff --git a/src/virtio/vulkan/vn_instance.c b/src/virtio/vulkan/vn_instance.c index 06627aeeb5b..157566a11a3 100644 --- a/src/virtio/vulkan/vn_instance.c +++ b/src/virtio/vulkan/vn_instance.c @@ -20,8 +20,6 @@ #include "vn_renderer.h" #include "vn_ring.h" -#define VN_INSTANCE_RING_SIZE (128 * 1024) - /* * Instance extensions add instance-level or physical-device-level * functionalities. It seems renderer support is either unnecessary or @@ -130,10 +128,17 @@ vn_instance_init_ring(struct vn_instance *instance) { /* 32-bit seqno for renderer roundtrips */ static const size_t extra_size = sizeof(uint32_t); - struct vn_ring_layout layout; - vn_ring_get_layout(VN_INSTANCE_RING_SIZE, extra_size, &layout); - instance->ring.ring = vn_ring_create(instance, &layout); + /* default instance ring size */ + static const size_t buf_size = 128 * 1024; + + /* order of 4 for performant async cmd enqueue */ + static const uint8_t direct_order = 4; + + struct vn_ring_layout layout; + vn_ring_get_layout(buf_size, extra_size, &layout); + + instance->ring.ring = vn_ring_create(instance, &layout, direct_order); if (!instance->ring.ring) return VK_ERROR_OUT_OF_HOST_MEMORY; diff --git a/src/virtio/vulkan/vn_ring.c b/src/virtio/vulkan/vn_ring.c index 085a24f1551..830ac009b53 100644 --- a/src/virtio/vulkan/vn_ring.c +++ b/src/virtio/vulkan/vn_ring.c @@ -40,6 +40,11 @@ struct vn_ring { */ mtx_t mutex; + /* size limit for cmd submission via ring shmem, derived from + * (buffer_size >> direct_order) upon vn_ring_create + */ + uint32_t direct_size; + /* used for indirect submission of large command (non-VkCommandBuffer) */ struct vn_cs_encoder upload; @@ -251,7 +256,8 @@ vn_ring_get_layout(size_t buf_size, struct vn_ring * vn_ring_create(struct vn_instance *instance, - const struct vn_ring_layout *layout) + const struct vn_ring_layout *layout, + uint8_t direct_order) { VN_TRACE_FUNC(); @@ -289,6 +295,9 @@ vn_ring_create(struct vn_instance *instance, mtx_init(&ring->mutex, mtx_plain); + ring->direct_size = layout->buffer_size >> direct_order; + assert(ring->direct_size); + vn_cs_encoder_init(&ring->upload, instance, VN_CS_ENCODER_STORAGE_SHMEM_ARRAY, 1 * 1024 * 1024); @@ -529,7 +538,7 @@ static inline bool vn_ring_submission_can_direct(const struct vn_ring *ring, const struct vn_cs_encoder *cs) { - return vn_cs_encoder_get_len(cs) <= (ring->buffer_size >> 4); + return vn_cs_encoder_get_len(cs) <= ring->direct_size; } static struct vn_cs_encoder * diff --git a/src/virtio/vulkan/vn_ring.h b/src/virtio/vulkan/vn_ring.h index b452e77ebbb..edecc70246d 100644 --- a/src/virtio/vulkan/vn_ring.h +++ b/src/virtio/vulkan/vn_ring.h @@ -46,7 +46,8 @@ vn_ring_get_layout(size_t buf_size, struct vn_ring * vn_ring_create(struct vn_instance *instance, - const struct vn_ring_layout *layout); + const struct vn_ring_layout *layout, + uint8_t direct_order); void vn_ring_destroy(struct vn_ring *ring);