diff --git a/src/virtio/vulkan/vn_android.c b/src/virtio/vulkan/vn_android.c index 25cba04a0a0..de7c4b61810 100644 --- a/src/virtio/vulkan/vn_android.c +++ b/src/virtio/vulkan/vn_android.c @@ -355,6 +355,8 @@ vn_GetSwapchainGrallocUsage2ANDROID( if (swapchainImageUsage & VK_SWAPCHAIN_IMAGE_USAGE_SHARED_BIT_ANDROID) *grallocProducerUsage |= vn_android_gralloc_get_shared_present_usage(); + vn_tls_set_primary_ring_submission(); + return VK_SUCCESS; } diff --git a/src/virtio/vulkan/vn_command_buffer.c b/src/virtio/vulkan/vn_command_buffer.c index c446f4fabe9..8c275335ca1 100644 --- a/src/virtio/vulkan/vn_command_buffer.c +++ b/src/virtio/vulkan/vn_command_buffer.c @@ -687,6 +687,8 @@ vn_CreateCommandPool(VkDevice device, vn_async_vkCreateCommandPool(dev->primary_ring, device, pCreateInfo, NULL, &pool_handle); + vn_tls_set_primary_ring_submission(); + *pCommandPool = pool_handle; return VK_SUCCESS; diff --git a/src/virtio/vulkan/vn_common.c b/src/virtio/vulkan/vn_common.c index 00962790443..2b3a858202a 100644 --- a/src/virtio/vulkan/vn_common.c +++ b/src/virtio/vulkan/vn_common.c @@ -51,6 +51,7 @@ static const struct debug_control vn_perf_options[] = { { "no_query_feedback", VN_PERF_NO_QUERY_FEEDBACK }, { "no_async_mem_alloc", VN_PERF_NO_ASYNC_MEM_ALLOC }, { "no_tiled_wsi_image", VN_PERF_NO_TILED_WSI_IMAGE }, + { "no_multi_ring", VN_PERF_NO_MULTI_RING }, { NULL, 0 }, /* clang-format on */ }; @@ -238,3 +239,40 @@ vn_relax(struct vn_relax_state *state) const uint32_t shift = util_last_bit(*iter) - busy_wait_order - 1; os_time_sleep(base_sleep_us << shift); } + +static void +vn_tls_free(void *tls) +{ + free(tls); +} + +static tss_t vn_tls_key; +static bool vn_tls_key_valid; + +static void +vn_tls_key_create_once(void) +{ + vn_tls_key_valid = tss_create(&vn_tls_key, vn_tls_free) == thrd_success; + if (!vn_tls_key_valid && VN_DEBUG(INIT)) + vn_log(NULL, "WARNING: failed to create vn_tls_key"); +} + +struct vn_tls * +vn_tls_get(void) +{ + static once_flag once = ONCE_FLAG_INIT; + call_once(&once, vn_tls_key_create_once); + if (unlikely(!vn_tls_key_valid)) + return NULL; + + struct vn_tls *tls = tss_get(vn_tls_key); + if (likely(tls)) + return tls; + + tls = calloc(1, sizeof(*tls)); + if (tls && tss_set(vn_tls_key, tls) == thrd_success) + return tls; + + free(tls); + return NULL; +} diff --git a/src/virtio/vulkan/vn_common.h b/src/virtio/vulkan/vn_common.h index 6b65cf547d6..8c92be469b8 100644 --- a/src/virtio/vulkan/vn_common.h +++ b/src/virtio/vulkan/vn_common.h @@ -124,6 +124,7 @@ enum vn_perf { VN_PERF_NO_QUERY_FEEDBACK = 1ull << 8, VN_PERF_NO_ASYNC_MEM_ALLOC = 1ull << 9, VN_PERF_NO_TILED_WSI_IMAGE = 1ull << 10, + VN_PERF_NO_MULTI_RING = 1ull << 11, }; typedef uint64_t vn_object_id; @@ -208,6 +209,16 @@ struct vn_relax_state { const char *reason; }; +struct vn_tls { + /* Track swapchain and command pool creations on threads so dispatch of the + * following on non-tracked threads can be routed as synchronous on the + * secondary ring: + * - pipeline creations + * - pipeline cache retrievals + */ + bool primary_ring_submission; +}; + void vn_env_init(void); @@ -469,4 +480,24 @@ vn_gettid(void) #endif } +struct vn_tls * +vn_tls_get(void); + +static inline void +vn_tls_set_primary_ring_submission(void) +{ + struct vn_tls *tls = vn_tls_get(); + if (likely(tls)) + tls->primary_ring_submission = true; +} + +static inline bool +vn_tls_get_primary_ring_submission(void) +{ + const struct vn_tls *tls = vn_tls_get(); + if (likely(tls)) + return tls->primary_ring_submission; + return true; +} + #endif /* VN_COMMON_H */ diff --git a/src/virtio/vulkan/vn_device.c b/src/virtio/vulkan/vn_device.c index 7f124972763..d6d131bbf83 100644 --- a/src/virtio/vulkan/vn_device.c +++ b/src/virtio/vulkan/vn_device.c @@ -436,6 +436,41 @@ vn_device_update_shader_cache_id(struct vn_device *dev) #endif } +bool +vn_device_secondary_ring_init_once(struct vn_device *dev) +{ + VN_TRACE_FUNC(); + + assert(!dev->force_primary_ring_submission); + + static bool ok = true; + if (!ok) + return ok; + + mtx_lock(&dev->ring_mutex); + /* allows caller to check secondary ring without holding a lock */ + if (dev->secondary_ring) + goto out_unlock; + + /* keep the extra for potential roundtrip sync on secondary ring */ + static const size_t extra_size = sizeof(uint32_t); + + /* only need a small ring for synchronous cmds on secondary ring */ + static const size_t buf_size = 16 * 1024; + + struct vn_ring_layout layout; + vn_ring_get_layout(buf_size, extra_size, &layout); + + dev->secondary_ring = vn_ring_create(dev->instance, &layout); + if (!dev->secondary_ring) { + ok = false; + vn_log(dev->instance, "WARNING: failed to create secondary ring"); + } +out_unlock: + mtx_unlock(&dev->ring_mutex); + return ok; +} + static VkResult vn_device_init(struct vn_device *dev, struct vn_physical_device *physical_dev, @@ -454,6 +489,9 @@ vn_device_init(struct vn_device *dev, dev->renderer = instance->renderer; dev->primary_ring = instance->ring.ring; + /* can be extended for app compat purpose */ + dev->force_primary_ring_submission = VN_PERF(NO_MULTI_RING); + create_info = vn_device_fix_create_info(dev, create_info, alloc, &local_create_info); if (!create_info) @@ -469,6 +507,8 @@ vn_device_init(struct vn_device *dev, if (result != VK_SUCCESS) return result; + mtx_init(&dev->ring_mutex, mtx_plain); + result = vn_device_memory_report_init(dev, create_info); if (result != VK_SUCCESS) goto out_destroy_device; @@ -520,6 +560,7 @@ out_memory_report_fini: vn_device_memory_report_fini(dev); out_destroy_device: + mtx_destroy(&dev->ring_mutex); vn_call_vkDestroyDevice(dev->primary_ring, dev_handle, NULL); return result; @@ -617,6 +658,11 @@ vn_DestroyDevice(VkDevice device, const VkAllocationCallbacks *pAllocator) } } + if (dev->secondary_ring) + vn_ring_destroy(dev->secondary_ring); + + mtx_destroy(&dev->ring_mutex); + vk_free(alloc, dev->queues); vn_device_base_fini(&dev->base); diff --git a/src/virtio/vulkan/vn_device.h b/src/virtio/vulkan/vn_device.h index 8187b3eb46f..65fdd62aa1d 100644 --- a/src/virtio/vulkan/vn_device.h +++ b/src/virtio/vulkan/vn_device.h @@ -29,6 +29,10 @@ struct vn_device { struct vn_physical_device *physical_device; struct vn_renderer *renderer; struct vn_ring *primary_ring; + bool force_primary_ring_submission; + + mtx_t ring_mutex; + struct vn_ring *secondary_ring; struct vn_device_memory_report *memory_reports; uint32_t memory_report_count; @@ -80,4 +84,7 @@ vn_device_emit_device_memory_report(struct vn_device *dev, dev->memory_reports[i].callback(&report, dev->memory_reports[i].data); } +bool +vn_device_secondary_ring_init_once(struct vn_device *dev); + #endif /* VN_DEVICE_H */ diff --git a/src/virtio/vulkan/vn_pipeline.c b/src/virtio/vulkan/vn_pipeline.c index 50da47debd1..11e7d323f58 100644 --- a/src/virtio/vulkan/vn_pipeline.c +++ b/src/virtio/vulkan/vn_pipeline.c @@ -417,6 +417,34 @@ vn_DestroyPipelineCache(VkDevice device, vk_free(alloc, cache); } +static struct vn_ring * +vn_get_target_ring(struct vn_device *dev) +{ + if (dev->force_primary_ring_submission) + return dev->primary_ring; + + if (vn_tls_get_primary_ring_submission()) + return dev->primary_ring; + + if (!dev->secondary_ring) { + if (!vn_device_secondary_ring_init_once(dev)) { + /* fallback to primary ring submission */ + return dev->primary_ring; + } + } + + /* Ensure pipeline cache and pipeline deps are ready in the renderer. + * + * TODO: + * - For cache retrieval, track ring seqno of cache obj and only wait + * for that seqno once. + * - For pipeline creation, track ring seqnos of pipeline layout and + * renderpass objs it depends on, and only wait for those seqnos once. + */ + vn_ring_wait_all(dev->primary_ring); + return dev->secondary_ring; +} + VkResult vn_GetPipelineCacheData(VkDevice device, VkPipelineCache pipelineCache, @@ -427,10 +455,13 @@ vn_GetPipelineCacheData(VkDevice device, struct vn_device *dev = vn_device_from_handle(device); struct vn_physical_device *physical_dev = dev->physical_device; + struct vn_ring *target_ring = vn_get_target_ring(dev); + assert(target_ring); + struct vk_pipeline_cache_header *header = pData; VkResult result; if (!pData) { - result = vn_call_vkGetPipelineCacheData(dev->primary_ring, device, + result = vn_call_vkGetPipelineCacheData(target_ring, device, pipelineCache, pDataSize, NULL); if (result != VK_SUCCESS) return vn_error(dev->instance, result); @@ -454,7 +485,7 @@ vn_GetPipelineCacheData(VkDevice device, *pDataSize -= header->header_size; result = - vn_call_vkGetPipelineCacheData(dev->primary_ring, device, pipelineCache, + vn_call_vkGetPipelineCacheData(target_ring, device, pipelineCache, pDataSize, pData + header->header_size); if (result < VK_SUCCESS) return vn_error(dev->instance, result); @@ -1404,16 +1435,18 @@ vn_CreateGraphicsPipelines(VkDevice device, (const VkBaseInStructure *)pCreateInfos[i].pNext); } - if (want_sync) { + struct vn_ring *target_ring = vn_get_target_ring(dev); + assert(target_ring); + if (want_sync || target_ring == dev->secondary_ring) { result = vn_call_vkCreateGraphicsPipelines( - dev->primary_ring, device, pipelineCache, createInfoCount, - pCreateInfos, NULL, pPipelines); + target_ring, device, pipelineCache, createInfoCount, pCreateInfos, + NULL, pPipelines); if (result != VK_SUCCESS) vn_destroy_failed_pipelines(dev, createInfoCount, pPipelines, alloc); } else { - vn_async_vkCreateGraphicsPipelines(dev->primary_ring, device, - pipelineCache, createInfoCount, - pCreateInfos, NULL, pPipelines); + vn_async_vkCreateGraphicsPipelines(target_ring, device, pipelineCache, + createInfoCount, pCreateInfos, NULL, + pPipelines); result = VK_SUCCESS; } @@ -1458,16 +1491,18 @@ vn_CreateComputePipelines(VkDevice device, (const VkBaseInStructure *)pCreateInfos[i].pNext); } - if (want_sync) { + struct vn_ring *target_ring = vn_get_target_ring(dev); + assert(target_ring); + if (want_sync || target_ring == dev->secondary_ring) { result = vn_call_vkCreateComputePipelines( - dev->primary_ring, device, pipelineCache, createInfoCount, - pCreateInfos, NULL, pPipelines); + target_ring, device, pipelineCache, createInfoCount, pCreateInfos, + NULL, pPipelines); if (result != VK_SUCCESS) vn_destroy_failed_pipelines(dev, createInfoCount, pPipelines, alloc); } else { - vn_async_vkCreateComputePipelines(dev->primary_ring, device, - pipelineCache, createInfoCount, - pCreateInfos, NULL, pPipelines); + vn_async_vkCreateComputePipelines(target_ring, device, pipelineCache, + createInfoCount, pCreateInfos, NULL, + pPipelines); result = VK_SUCCESS; } diff --git a/src/virtio/vulkan/vn_ring.c b/src/virtio/vulkan/vn_ring.c index 8dbd92beeda..f44d4ea6211 100644 --- a/src/virtio/vulkan/vn_ring.c +++ b/src/virtio/vulkan/vn_ring.c @@ -172,6 +172,15 @@ vn_ring_wait_seqno(struct vn_ring *ring, uint32_t seqno) } while (true); } +void +vn_ring_wait_all(struct vn_ring *ring) +{ + /* load from tail rather than ring->cur for atomicity */ + const uint32_t pending_seqno = + atomic_load_explicit(ring->shared.tail, memory_order_relaxed); + vn_ring_wait_seqno(ring, pending_seqno); +} + static bool vn_ring_has_space(const struct vn_ring *ring, uint32_t size, diff --git a/src/virtio/vulkan/vn_ring.h b/src/virtio/vulkan/vn_ring.h index ac8acb7b7fd..a3961cdc965 100644 --- a/src/virtio/vulkan/vn_ring.h +++ b/src/virtio/vulkan/vn_ring.h @@ -63,6 +63,9 @@ vn_ring_unset_status_bits(struct vn_ring *ring, uint32_t mask); bool vn_ring_get_seqno_status(struct vn_ring *ring, uint32_t seqno); +void +vn_ring_wait_all(struct vn_ring *ring); + struct vn_ring_submit_command { /* empty command implies errors */ struct vn_cs_encoder command; diff --git a/src/virtio/vulkan/vn_wsi.c b/src/virtio/vulkan/vn_wsi.c index b070226d938..f46627984bf 100644 --- a/src/virtio/vulkan/vn_wsi.c +++ b/src/virtio/vulkan/vn_wsi.c @@ -270,6 +270,8 @@ vn_CreateSwapchainKHR(VkDevice device, VN_WSI_PTR(pCreateInfo->oldSwapchain)); } + vn_tls_set_primary_ring_submission(); + return vn_result(dev->instance, result); }