From d17ddcc84773899ef8b022d94969ffc0909db0bc Mon Sep 17 00:00:00 2001 From: Yiwei Zhang Date: Tue, 5 Dec 2023 20:17:44 -0800 Subject: [PATCH] venus: dispatch background shader tasks to secondary ring Summary: - Add a perf option to force primary ring submission - Let device own secondary ring(s) for ad-hoc spawn - For threads where swapchain and command pool are created, track with TLS to instruct ring dispatch. - If the pipeline creation or cache retrieval happens on the background threads not on the hot paths, force synchronous and dispatch to the secondary ring after waiting for primary ring becoming current. - If the pipeline creation or cache retrieval happens on the hot paths threads, dispatch to the primary ring to avoid being blocked by those tasks on the secondary ring. Signed-off-by: Yiwei Zhang Part-of: --- src/virtio/vulkan/vn_android.c | 2 + src/virtio/vulkan/vn_command_buffer.c | 2 + src/virtio/vulkan/vn_common.c | 38 ++++++++++++++++ src/virtio/vulkan/vn_common.h | 31 +++++++++++++ src/virtio/vulkan/vn_device.c | 46 +++++++++++++++++++ src/virtio/vulkan/vn_device.h | 7 +++ src/virtio/vulkan/vn_pipeline.c | 63 +++++++++++++++++++++------ src/virtio/vulkan/vn_ring.c | 9 ++++ src/virtio/vulkan/vn_ring.h | 3 ++ src/virtio/vulkan/vn_wsi.c | 2 + 10 files changed, 189 insertions(+), 14 deletions(-) diff --git a/src/virtio/vulkan/vn_android.c b/src/virtio/vulkan/vn_android.c index 25cba04a0a0..de7c4b61810 100644 --- a/src/virtio/vulkan/vn_android.c +++ b/src/virtio/vulkan/vn_android.c @@ -355,6 +355,8 @@ vn_GetSwapchainGrallocUsage2ANDROID( if (swapchainImageUsage & VK_SWAPCHAIN_IMAGE_USAGE_SHARED_BIT_ANDROID) *grallocProducerUsage |= vn_android_gralloc_get_shared_present_usage(); + vn_tls_set_primary_ring_submission(); + return VK_SUCCESS; } diff --git a/src/virtio/vulkan/vn_command_buffer.c b/src/virtio/vulkan/vn_command_buffer.c index c446f4fabe9..8c275335ca1 100644 --- a/src/virtio/vulkan/vn_command_buffer.c +++ b/src/virtio/vulkan/vn_command_buffer.c @@ -687,6 +687,8 @@ vn_CreateCommandPool(VkDevice device, vn_async_vkCreateCommandPool(dev->primary_ring, device, pCreateInfo, NULL, &pool_handle); + vn_tls_set_primary_ring_submission(); + *pCommandPool = pool_handle; return VK_SUCCESS; diff --git a/src/virtio/vulkan/vn_common.c b/src/virtio/vulkan/vn_common.c index 00962790443..2b3a858202a 100644 --- a/src/virtio/vulkan/vn_common.c +++ b/src/virtio/vulkan/vn_common.c @@ -51,6 +51,7 @@ static const struct debug_control vn_perf_options[] = { { "no_query_feedback", VN_PERF_NO_QUERY_FEEDBACK }, { "no_async_mem_alloc", VN_PERF_NO_ASYNC_MEM_ALLOC }, { "no_tiled_wsi_image", VN_PERF_NO_TILED_WSI_IMAGE }, + { "no_multi_ring", VN_PERF_NO_MULTI_RING }, { NULL, 0 }, /* clang-format on */ }; @@ -238,3 +239,40 @@ vn_relax(struct vn_relax_state *state) const uint32_t shift = util_last_bit(*iter) - busy_wait_order - 1; os_time_sleep(base_sleep_us << shift); } + +static void +vn_tls_free(void *tls) +{ + free(tls); +} + +static tss_t vn_tls_key; +static bool vn_tls_key_valid; + +static void +vn_tls_key_create_once(void) +{ + vn_tls_key_valid = tss_create(&vn_tls_key, vn_tls_free) == thrd_success; + if (!vn_tls_key_valid && VN_DEBUG(INIT)) + vn_log(NULL, "WARNING: failed to create vn_tls_key"); +} + +struct vn_tls * +vn_tls_get(void) +{ + static once_flag once = ONCE_FLAG_INIT; + call_once(&once, vn_tls_key_create_once); + if (unlikely(!vn_tls_key_valid)) + return NULL; + + struct vn_tls *tls = tss_get(vn_tls_key); + if (likely(tls)) + return tls; + + tls = calloc(1, sizeof(*tls)); + if (tls && tss_set(vn_tls_key, tls) == thrd_success) + return tls; + + free(tls); + return NULL; +} diff --git a/src/virtio/vulkan/vn_common.h b/src/virtio/vulkan/vn_common.h index 6b65cf547d6..8c92be469b8 100644 --- a/src/virtio/vulkan/vn_common.h +++ b/src/virtio/vulkan/vn_common.h @@ -124,6 +124,7 @@ enum vn_perf { VN_PERF_NO_QUERY_FEEDBACK = 1ull << 8, VN_PERF_NO_ASYNC_MEM_ALLOC = 1ull << 9, VN_PERF_NO_TILED_WSI_IMAGE = 1ull << 10, + VN_PERF_NO_MULTI_RING = 1ull << 11, }; typedef uint64_t vn_object_id; @@ -208,6 +209,16 @@ struct vn_relax_state { const char *reason; }; +struct vn_tls { + /* Track swapchain and command pool creations on threads so dispatch of the + * following on non-tracked threads can be routed as synchronous on the + * secondary ring: + * - pipeline creations + * - pipeline cache retrievals + */ + bool primary_ring_submission; +}; + void vn_env_init(void); @@ -469,4 +480,24 @@ vn_gettid(void) #endif } +struct vn_tls * +vn_tls_get(void); + +static inline void +vn_tls_set_primary_ring_submission(void) +{ + struct vn_tls *tls = vn_tls_get(); + if (likely(tls)) + tls->primary_ring_submission = true; +} + +static inline bool +vn_tls_get_primary_ring_submission(void) +{ + const struct vn_tls *tls = vn_tls_get(); + if (likely(tls)) + return tls->primary_ring_submission; + return true; +} + #endif /* VN_COMMON_H */ diff --git a/src/virtio/vulkan/vn_device.c b/src/virtio/vulkan/vn_device.c index 7f124972763..d6d131bbf83 100644 --- a/src/virtio/vulkan/vn_device.c +++ b/src/virtio/vulkan/vn_device.c @@ -436,6 +436,41 @@ vn_device_update_shader_cache_id(struct vn_device *dev) #endif } +bool +vn_device_secondary_ring_init_once(struct vn_device *dev) +{ + VN_TRACE_FUNC(); + + assert(!dev->force_primary_ring_submission); + + static bool ok = true; + if (!ok) + return ok; + + mtx_lock(&dev->ring_mutex); + /* allows caller to check secondary ring without holding a lock */ + if (dev->secondary_ring) + goto out_unlock; + + /* keep the extra for potential roundtrip sync on secondary ring */ + static const size_t extra_size = sizeof(uint32_t); + + /* only need a small ring for synchronous cmds on secondary ring */ + static const size_t buf_size = 16 * 1024; + + struct vn_ring_layout layout; + vn_ring_get_layout(buf_size, extra_size, &layout); + + dev->secondary_ring = vn_ring_create(dev->instance, &layout); + if (!dev->secondary_ring) { + ok = false; + vn_log(dev->instance, "WARNING: failed to create secondary ring"); + } +out_unlock: + mtx_unlock(&dev->ring_mutex); + return ok; +} + static VkResult vn_device_init(struct vn_device *dev, struct vn_physical_device *physical_dev, @@ -454,6 +489,9 @@ vn_device_init(struct vn_device *dev, dev->renderer = instance->renderer; dev->primary_ring = instance->ring.ring; + /* can be extended for app compat purpose */ + dev->force_primary_ring_submission = VN_PERF(NO_MULTI_RING); + create_info = vn_device_fix_create_info(dev, create_info, alloc, &local_create_info); if (!create_info) @@ -469,6 +507,8 @@ vn_device_init(struct vn_device *dev, if (result != VK_SUCCESS) return result; + mtx_init(&dev->ring_mutex, mtx_plain); + result = vn_device_memory_report_init(dev, create_info); if (result != VK_SUCCESS) goto out_destroy_device; @@ -520,6 +560,7 @@ out_memory_report_fini: vn_device_memory_report_fini(dev); out_destroy_device: + mtx_destroy(&dev->ring_mutex); vn_call_vkDestroyDevice(dev->primary_ring, dev_handle, NULL); return result; @@ -617,6 +658,11 @@ vn_DestroyDevice(VkDevice device, const VkAllocationCallbacks *pAllocator) } } + if (dev->secondary_ring) + vn_ring_destroy(dev->secondary_ring); + + mtx_destroy(&dev->ring_mutex); + vk_free(alloc, dev->queues); vn_device_base_fini(&dev->base); diff --git a/src/virtio/vulkan/vn_device.h b/src/virtio/vulkan/vn_device.h index 8187b3eb46f..65fdd62aa1d 100644 --- a/src/virtio/vulkan/vn_device.h +++ b/src/virtio/vulkan/vn_device.h @@ -29,6 +29,10 @@ struct vn_device { struct vn_physical_device *physical_device; struct vn_renderer *renderer; struct vn_ring *primary_ring; + bool force_primary_ring_submission; + + mtx_t ring_mutex; + struct vn_ring *secondary_ring; struct vn_device_memory_report *memory_reports; uint32_t memory_report_count; @@ -80,4 +84,7 @@ vn_device_emit_device_memory_report(struct vn_device *dev, dev->memory_reports[i].callback(&report, dev->memory_reports[i].data); } +bool +vn_device_secondary_ring_init_once(struct vn_device *dev); + #endif /* VN_DEVICE_H */ diff --git a/src/virtio/vulkan/vn_pipeline.c b/src/virtio/vulkan/vn_pipeline.c index 50da47debd1..11e7d323f58 100644 --- a/src/virtio/vulkan/vn_pipeline.c +++ b/src/virtio/vulkan/vn_pipeline.c @@ -417,6 +417,34 @@ vn_DestroyPipelineCache(VkDevice device, vk_free(alloc, cache); } +static struct vn_ring * +vn_get_target_ring(struct vn_device *dev) +{ + if (dev->force_primary_ring_submission) + return dev->primary_ring; + + if (vn_tls_get_primary_ring_submission()) + return dev->primary_ring; + + if (!dev->secondary_ring) { + if (!vn_device_secondary_ring_init_once(dev)) { + /* fallback to primary ring submission */ + return dev->primary_ring; + } + } + + /* Ensure pipeline cache and pipeline deps are ready in the renderer. + * + * TODO: + * - For cache retrieval, track ring seqno of cache obj and only wait + * for that seqno once. + * - For pipeline creation, track ring seqnos of pipeline layout and + * renderpass objs it depends on, and only wait for those seqnos once. + */ + vn_ring_wait_all(dev->primary_ring); + return dev->secondary_ring; +} + VkResult vn_GetPipelineCacheData(VkDevice device, VkPipelineCache pipelineCache, @@ -427,10 +455,13 @@ vn_GetPipelineCacheData(VkDevice device, struct vn_device *dev = vn_device_from_handle(device); struct vn_physical_device *physical_dev = dev->physical_device; + struct vn_ring *target_ring = vn_get_target_ring(dev); + assert(target_ring); + struct vk_pipeline_cache_header *header = pData; VkResult result; if (!pData) { - result = vn_call_vkGetPipelineCacheData(dev->primary_ring, device, + result = vn_call_vkGetPipelineCacheData(target_ring, device, pipelineCache, pDataSize, NULL); if (result != VK_SUCCESS) return vn_error(dev->instance, result); @@ -454,7 +485,7 @@ vn_GetPipelineCacheData(VkDevice device, *pDataSize -= header->header_size; result = - vn_call_vkGetPipelineCacheData(dev->primary_ring, device, pipelineCache, + vn_call_vkGetPipelineCacheData(target_ring, device, pipelineCache, pDataSize, pData + header->header_size); if (result < VK_SUCCESS) return vn_error(dev->instance, result); @@ -1404,16 +1435,18 @@ vn_CreateGraphicsPipelines(VkDevice device, (const VkBaseInStructure *)pCreateInfos[i].pNext); } - if (want_sync) { + struct vn_ring *target_ring = vn_get_target_ring(dev); + assert(target_ring); + if (want_sync || target_ring == dev->secondary_ring) { result = vn_call_vkCreateGraphicsPipelines( - dev->primary_ring, device, pipelineCache, createInfoCount, - pCreateInfos, NULL, pPipelines); + target_ring, device, pipelineCache, createInfoCount, pCreateInfos, + NULL, pPipelines); if (result != VK_SUCCESS) vn_destroy_failed_pipelines(dev, createInfoCount, pPipelines, alloc); } else { - vn_async_vkCreateGraphicsPipelines(dev->primary_ring, device, - pipelineCache, createInfoCount, - pCreateInfos, NULL, pPipelines); + vn_async_vkCreateGraphicsPipelines(target_ring, device, pipelineCache, + createInfoCount, pCreateInfos, NULL, + pPipelines); result = VK_SUCCESS; } @@ -1458,16 +1491,18 @@ vn_CreateComputePipelines(VkDevice device, (const VkBaseInStructure *)pCreateInfos[i].pNext); } - if (want_sync) { + struct vn_ring *target_ring = vn_get_target_ring(dev); + assert(target_ring); + if (want_sync || target_ring == dev->secondary_ring) { result = vn_call_vkCreateComputePipelines( - dev->primary_ring, device, pipelineCache, createInfoCount, - pCreateInfos, NULL, pPipelines); + target_ring, device, pipelineCache, createInfoCount, pCreateInfos, + NULL, pPipelines); if (result != VK_SUCCESS) vn_destroy_failed_pipelines(dev, createInfoCount, pPipelines, alloc); } else { - vn_async_vkCreateComputePipelines(dev->primary_ring, device, - pipelineCache, createInfoCount, - pCreateInfos, NULL, pPipelines); + vn_async_vkCreateComputePipelines(target_ring, device, pipelineCache, + createInfoCount, pCreateInfos, NULL, + pPipelines); result = VK_SUCCESS; } diff --git a/src/virtio/vulkan/vn_ring.c b/src/virtio/vulkan/vn_ring.c index 8dbd92beeda..f44d4ea6211 100644 --- a/src/virtio/vulkan/vn_ring.c +++ b/src/virtio/vulkan/vn_ring.c @@ -172,6 +172,15 @@ vn_ring_wait_seqno(struct vn_ring *ring, uint32_t seqno) } while (true); } +void +vn_ring_wait_all(struct vn_ring *ring) +{ + /* load from tail rather than ring->cur for atomicity */ + const uint32_t pending_seqno = + atomic_load_explicit(ring->shared.tail, memory_order_relaxed); + vn_ring_wait_seqno(ring, pending_seqno); +} + static bool vn_ring_has_space(const struct vn_ring *ring, uint32_t size, diff --git a/src/virtio/vulkan/vn_ring.h b/src/virtio/vulkan/vn_ring.h index ac8acb7b7fd..a3961cdc965 100644 --- a/src/virtio/vulkan/vn_ring.h +++ b/src/virtio/vulkan/vn_ring.h @@ -63,6 +63,9 @@ vn_ring_unset_status_bits(struct vn_ring *ring, uint32_t mask); bool vn_ring_get_seqno_status(struct vn_ring *ring, uint32_t seqno); +void +vn_ring_wait_all(struct vn_ring *ring); + struct vn_ring_submit_command { /* empty command implies errors */ struct vn_cs_encoder command; diff --git a/src/virtio/vulkan/vn_wsi.c b/src/virtio/vulkan/vn_wsi.c index b070226d938..f46627984bf 100644 --- a/src/virtio/vulkan/vn_wsi.c +++ b/src/virtio/vulkan/vn_wsi.c @@ -270,6 +270,8 @@ vn_CreateSwapchainKHR(VkDevice device, VN_WSI_PTR(pCreateInfo->oldSwapchain)); } + vn_tls_set_primary_ring_submission(); + return vn_result(dev->instance, result); }