From 479a1c405e46e16e2fe2d537a1dbfe7eea0f57fd Mon Sep 17 00:00:00 2001 From: Hyunjun Ko Date: Wed, 12 Jan 2022 02:11:13 +0000 Subject: [PATCH] turnip: Porting to common vulkan implementation for synchronization. This patch ports to common code for VkSemaphore, VkFence and relevant APIs like vkCreate(Destroy)Semaphore/Fence, vkGetSemaphoreFdKHR, etc. Accordingly, starts using common vkQueueSubmit with implementing driver-specific hook. Also remove all timeline semaphore codes so that we could use common code in the following patches. This way we could easily see what's modified in the following patch. Note that kgsl is not ported in this patch. Signed-off-by: Hyunjun Ko Reviewed-by: Danylo Piliaiev Part-of: --- src/freedreno/vulkan/tu_device.c | 32 +- src/freedreno/vulkan/tu_drm.c | 1198 +++---------------------- src/freedreno/vulkan/tu_kgsl.c | 15 +- src/freedreno/vulkan/tu_private.h | 20 +- src/freedreno/vulkan/tu_wsi.c | 8 +- src/freedreno/vulkan/tu_wsi_display.c | 18 +- 6 files changed, 156 insertions(+), 1135 deletions(-) diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index b86404f37ac..aaf8143f101 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -159,7 +159,7 @@ get_device_extensions(const struct tu_physical_device *device, .KHR_buffer_device_address = true, .KHR_shader_integer_dot_product = true, #ifndef TU_USE_KGSL - .KHR_timeline_semaphore = true, + .KHR_timeline_semaphore = false, #endif #ifdef VK_USE_PLATFORM_DISPLAY_KHR /* This extension is supported by common code across drivers, but it is @@ -562,7 +562,7 @@ tu_get_physical_device_features_1_2(struct tu_physical_device *pdevice, features->shaderSubgroupExtendedTypes = true; features->separateDepthStencilLayouts = true; features->hostQueryReset = true; - features->timelineSemaphore = true; + features->timelineSemaphore = false; features->bufferDeviceAddress = true; features->bufferDeviceAddressCaptureReplay = false; features->bufferDeviceAddressMultiDevice = false; @@ -757,7 +757,7 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: { VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *features = (VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *) ext; - features->timelineSemaphore = true; + features->timelineSemaphore = false; break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: { @@ -1328,8 +1328,9 @@ tu_queue_init(struct tu_device *device, return result; queue->device = device; - - list_inithead(&queue->queued_submits); +#ifndef TU_USE_KGSL + queue->vk.driver_submit = tu_queue_submit; +#endif int ret = tu_drm_submitqueue_new(device, 0, &queue->msm_queue_id); if (ret) @@ -1573,6 +1574,10 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, mtx_init(&device->bo_mutex, mtx_plain); pthread_mutex_init(&device->submit_mutex, NULL); +#ifndef TU_USE_KGSL + vk_device_set_drm_fd(&device->vk, device->fd); +#endif + for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i]; @@ -1871,6 +1876,8 @@ tu_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, return VK_SUCCESS; } +/* Only used for kgsl since drm started using common implementation */ +#ifdef TU_USE_KGSL VKAPI_ATTR VkResult VKAPI_CALL tu_QueueWaitIdle(VkQueue _queue) { @@ -1882,20 +1889,6 @@ tu_QueueWaitIdle(VkQueue _queue) if (queue->fence < 0) return VK_SUCCESS; - pthread_mutex_lock(&queue->device->submit_mutex); - - do { - tu_device_submit_deferred_locked(queue->device); - - if (list_is_empty(&queue->queued_submits)) - break; - - pthread_cond_wait(&queue->device->timeline_cond, - &queue->device->submit_mutex); - } while (!list_is_empty(&queue->queued_submits)); - - pthread_mutex_unlock(&queue->device->submit_mutex); - struct pollfd fds = { .fd = queue->fence, .events = POLLIN }; int ret; do { @@ -1909,6 +1902,7 @@ tu_QueueWaitIdle(VkQueue _queue) queue->fence = -1; return VK_SUCCESS; } +#endif VKAPI_ATTR VkResult VKAPI_CALL tu_EnumerateInstanceExtensionProperties(const char *pLayerName, diff --git a/src/freedreno/vulkan/tu_drm.c b/src/freedreno/vulkan/tu_drm.c index 7dc3aaa8dc5..577cc22164f 100644 --- a/src/freedreno/vulkan/tu_drm.c +++ b/src/freedreno/vulkan/tu_drm.c @@ -40,80 +40,19 @@ #include "tu_cs.h" -struct tu_binary_syncobj { - uint32_t permanent, temporary; -}; - -struct tu_timeline_point { - struct list_head link; - - uint64_t value; - uint32_t syncobj; - uint32_t wait_count; -}; - -struct tu_timeline { - uint64_t highest_submitted; - uint64_t highest_signaled; - - /* A timeline can have multiple timeline points */ - struct list_head points; - - /* A list containing points that has been already submited. - * A point will be moved to 'points' when new point is required - * at submit time. - */ - struct list_head free_points; -}; - -typedef enum { - TU_SEMAPHORE_BINARY, - TU_SEMAPHORE_TIMELINE, -} tu_semaphore_type; - - -struct tu_syncobj { - struct vk_object_base base; - - tu_semaphore_type type; - union { - struct tu_binary_syncobj binary; - struct tu_timeline timeline; - }; -}; - struct tu_queue_submit { - struct list_head link; - - VkCommandBuffer *cmd_buffers; + struct vk_queue_submit *vk_submit; struct tu_u_trace_cmd_data *cmd_buffer_trace_data; - uint32_t cmd_buffer_count; - struct tu_syncobj **wait_semaphores; - uint32_t wait_semaphore_count; - struct tu_syncobj **signal_semaphores; - uint32_t signal_semaphore_count; + struct drm_msm_gem_submit_cmd *cmds; + struct drm_msm_gem_submit_syncobj *in_syncobjs; + struct drm_msm_gem_submit_syncobj *out_syncobjs; - struct tu_syncobj **wait_timelines; - uint64_t *wait_timeline_values; - uint32_t wait_timeline_count; - uint32_t wait_timeline_array_length; - - struct tu_syncobj **signal_timelines; - uint64_t *signal_timeline_values; - uint32_t signal_timeline_count; - uint32_t signal_timeline_array_length; - - struct drm_msm_gem_submit_cmd *cmds; - struct drm_msm_gem_submit_syncobj *in_syncobjs; uint32_t nr_in_syncobjs; - struct drm_msm_gem_submit_syncobj *out_syncobjs; uint32_t nr_out_syncobjs; - - bool last_submit; uint32_t entry_count; - uint32_t counter_pass_index; + uint32_t perf_pass_index; }; struct tu_u_trace_syncobj @@ -487,11 +426,18 @@ tu_drm_device_init(struct tu_physical_device *device, goto fail; } + device->syncobj_type = vk_drm_syncobj_get_type(fd); + + device->sync_types[0] = &device->syncobj_type; + device->sync_types[1] = NULL; + device->heap.size = tu_get_system_heap_size(); device->heap.used = 0u; device->heap.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT; result = tu_physical_device_init(device, instance); + device->vk.supported_sync_types = device->sync_types; + if (result == VK_SUCCESS) return result; @@ -543,421 +489,34 @@ tu_enumerate_devices(struct tu_instance *instance) return result; } -static void -tu_timeline_finish(struct tu_device *device, - struct tu_timeline *timeline) -{ - list_for_each_entry_safe(struct tu_timeline_point, point, - &timeline->free_points, link) { - list_del(&point->link); - drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY, - &(struct drm_syncobj_destroy) { .handle = point->syncobj }); - - vk_free(&device->vk.alloc, point); - } - list_for_each_entry_safe(struct tu_timeline_point, point, - &timeline->points, link) { - list_del(&point->link); - drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY, - &(struct drm_syncobj_destroy) { .handle = point->syncobj }); - vk_free(&device->vk.alloc, point); - } -} - -static VkResult -sync_create(VkDevice _device, - bool signaled, - bool fence, - bool binary, - uint64_t timeline_value, - const VkAllocationCallbacks *pAllocator, - void **p_sync) -{ - TU_FROM_HANDLE(tu_device, device, _device); - - struct tu_syncobj *sync = - vk_object_alloc(&device->vk, pAllocator, sizeof(*sync), - fence ? VK_OBJECT_TYPE_FENCE : VK_OBJECT_TYPE_SEMAPHORE); - if (!sync) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - if (binary) { - struct drm_syncobj_create create = {}; - if (signaled) - create.flags |= DRM_SYNCOBJ_CREATE_SIGNALED; - - int ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_CREATE, &create); - if (ret) { - vk_free2(&device->vk.alloc, pAllocator, sync); - return VK_ERROR_OUT_OF_HOST_MEMORY; - } - - sync->binary.permanent = create.handle; - sync->binary.temporary = 0; - sync->type = TU_SEMAPHORE_BINARY; - } else { - sync->type = TU_SEMAPHORE_TIMELINE; - sync->timeline.highest_signaled = sync->timeline.highest_submitted = - timeline_value; - list_inithead(&sync->timeline.points); - list_inithead(&sync->timeline.free_points); - } - - *p_sync = sync; - - return VK_SUCCESS; -} - -static void -sync_set_temporary(struct tu_device *device, struct tu_syncobj *sync, uint32_t syncobj) -{ - if (sync->binary.temporary) { - drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY, - &(struct drm_syncobj_destroy) { .handle = sync->binary.temporary }); - } - sync->binary.temporary = syncobj; -} - -static void -sync_destroy(VkDevice _device, struct tu_syncobj *sync, const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_device, device, _device); - - if (!sync) - return; - - if (sync->type == TU_SEMAPHORE_BINARY) { - sync_set_temporary(device, sync, 0); - drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY, - &(struct drm_syncobj_destroy) { .handle = sync->binary.permanent }); - } else { - tu_timeline_finish(device, &sync->timeline); - } - - vk_object_free(&device->vk, pAllocator, sync); -} - -static VkResult -sync_import(VkDevice _device, struct tu_syncobj *sync, bool temporary, bool sync_fd, int fd) -{ - TU_FROM_HANDLE(tu_device, device, _device); - int ret; - - if (!sync_fd) { - uint32_t *dst = temporary ? &sync->binary.temporary : &sync->binary.permanent; - - struct drm_syncobj_handle handle = { .fd = fd }; - ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &handle); - if (ret) - return VK_ERROR_INVALID_EXTERNAL_HANDLE; - - if (*dst) { - drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY, - &(struct drm_syncobj_destroy) { .handle = *dst }); - } - *dst = handle.handle; - close(fd); - } else { - /* Note: SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT is always temporary, but the - * user doesn't have to specify the temporary bit because that's only - * needed for choosing a permanence when there's an option. - * - * "VK_SEMAPHORE_IMPORT_TEMPORARY_BIT specifies that the semaphore payload - * will be imported only temporarily, as described in Importing Semaphore - * Payloads, regardless of the permanence of handleType" - * - * https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/2701 - */ - - struct drm_syncobj_create create = {}; - - if (fd == -1) - create.flags |= DRM_SYNCOBJ_CREATE_SIGNALED; - - ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_CREATE, &create); - if (ret) - return VK_ERROR_INVALID_EXTERNAL_HANDLE; - - if (fd != -1) { - ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &(struct drm_syncobj_handle) { - .fd = fd, - .handle = create.handle, - .flags = DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE, - }); - if (ret) { - drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY, - &(struct drm_syncobj_destroy) { .handle = create.handle }); - return VK_ERROR_INVALID_EXTERNAL_HANDLE; - } - close(fd); - } - - sync_set_temporary(device, sync, create.handle); - } - - return VK_SUCCESS; -} - -static VkResult -sync_export(VkDevice _device, struct tu_syncobj *sync, bool sync_fd, int *p_fd) -{ - TU_FROM_HANDLE(tu_device, device, _device); - - struct drm_syncobj_handle handle = { - .handle = sync->binary.temporary ?: sync->binary.permanent, - .flags = COND(sync_fd, DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE), - .fd = -1, - }; - int ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &handle); - if (ret) - return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); - - /* restore permanent payload on export */ - sync_set_temporary(device, sync, 0); - - *p_fd = handle.fd; - return VK_SUCCESS; -} - -static VkSemaphoreTypeKHR -get_semaphore_type(const void *pNext, uint64_t *initial_value) -{ - const VkSemaphoreTypeCreateInfoKHR *type_info = - vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO_KHR); - - if (!type_info) - return VK_SEMAPHORE_TYPE_BINARY_KHR; - - if (initial_value) - *initial_value = type_info->initialValue; - return type_info->semaphoreType; -} - -VKAPI_ATTR VkResult VKAPI_CALL -tu_CreateSemaphore(VkDevice device, - const VkSemaphoreCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkSemaphore *pSemaphore) -{ - uint64_t timeline_value = 0; - VkSemaphoreTypeKHR sem_type = get_semaphore_type(pCreateInfo->pNext, &timeline_value); - - return sync_create(device, false, false, (sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR), - timeline_value, pAllocator, (void**) pSemaphore); -} - -VKAPI_ATTR void VKAPI_CALL -tu_DestroySemaphore(VkDevice device, VkSemaphore sem, const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_syncobj, sync, sem); - sync_destroy(device, sync, pAllocator); -} - -VKAPI_ATTR VkResult VKAPI_CALL -tu_ImportSemaphoreFdKHR(VkDevice device, const VkImportSemaphoreFdInfoKHR *info) -{ - TU_FROM_HANDLE(tu_syncobj, sync, info->semaphore); - return sync_import(device, sync, info->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT, - info->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, info->fd); -} - -VKAPI_ATTR VkResult VKAPI_CALL -tu_GetSemaphoreFdKHR(VkDevice device, const VkSemaphoreGetFdInfoKHR *info, int *pFd) -{ - TU_FROM_HANDLE(tu_syncobj, sync, info->semaphore); - return sync_export(device, sync, - info->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, pFd); -} - -VKAPI_ATTR void VKAPI_CALL -tu_GetPhysicalDeviceExternalSemaphoreProperties( - VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo, - VkExternalSemaphoreProperties *pExternalSemaphoreProperties) -{ - VkSemaphoreTypeKHR type = get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL); - - if (type != VK_SEMAPHORE_TYPE_TIMELINE && - (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT || - pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT )) { - pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; - pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; - pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT | - VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT; - } else { - pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0; - pExternalSemaphoreProperties->compatibleHandleTypes = 0; - pExternalSemaphoreProperties->externalSemaphoreFeatures = 0; - } -} - -static VkResult -tu_queue_submit_add_timeline_wait_locked(struct tu_queue_submit* submit, - struct tu_device *device, - struct tu_syncobj *timeline, - uint64_t value) -{ - if (submit->wait_timeline_count >= submit->wait_timeline_array_length) { - uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64); - - submit->wait_timelines = vk_realloc(&device->vk.alloc, - submit->wait_timelines, - new_len * sizeof(*submit->wait_timelines), - 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - - if (submit->wait_timelines == NULL) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - submit->wait_timeline_values = vk_realloc(&device->vk.alloc, - submit->wait_timeline_values, - new_len * sizeof(*submit->wait_timeline_values), - 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - - if (submit->wait_timeline_values == NULL) { - vk_free(&device->vk.alloc, submit->wait_timelines); - return VK_ERROR_OUT_OF_HOST_MEMORY; - } - - submit->wait_timeline_array_length = new_len; - } - - submit->wait_timelines[submit->wait_timeline_count] = timeline; - submit->wait_timeline_values[submit->wait_timeline_count] = value; - - submit->wait_timeline_count++; - - return VK_SUCCESS; -} - -static VkResult -tu_queue_submit_add_timeline_signal_locked(struct tu_queue_submit* submit, - struct tu_device *device, - struct tu_syncobj *timeline, - uint64_t value) -{ - if (submit->signal_timeline_count >= submit->signal_timeline_array_length) { - uint32_t new_len = MAX2(submit->signal_timeline_array_length * 2, 32); - - submit->signal_timelines = vk_realloc(&device->vk.alloc, - submit->signal_timelines, - new_len * sizeof(*submit->signal_timelines), - 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - - if (submit->signal_timelines == NULL) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - submit->signal_timeline_values = vk_realloc(&device->vk.alloc, - submit->signal_timeline_values, - new_len * sizeof(*submit->signal_timeline_values), - 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - - if (submit->signal_timeline_values == NULL) { - vk_free(&device->vk.alloc, submit->signal_timelines); - return VK_ERROR_OUT_OF_HOST_MEMORY; - } - - submit->signal_timeline_array_length = new_len; - } - - submit->signal_timelines[submit->signal_timeline_count] = timeline; - submit->signal_timeline_values[submit->signal_timeline_count] = value; - - submit->signal_timeline_count++; - - return VK_SUCCESS; -} - static VkResult tu_queue_submit_create_locked(struct tu_queue *queue, - const VkSubmitInfo *submit_info, + struct vk_queue_submit *vk_submit, const uint32_t nr_in_syncobjs, const uint32_t nr_out_syncobjs, - const bool last_submit, - const VkPerformanceQuerySubmitInfoKHR *perf_info, + uint32_t perf_pass_index, struct tu_queue_submit **submit) { VkResult result; - const VkTimelineSemaphoreSubmitInfoKHR *timeline_info = - vk_find_struct_const(submit_info->pNext, - TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR); - - const uint32_t wait_values_count = - timeline_info ? timeline_info->waitSemaphoreValueCount : 0; - const uint32_t signal_values_count = - timeline_info ? timeline_info->signalSemaphoreValueCount : 0; - - const uint64_t *wait_values = - wait_values_count ? timeline_info->pWaitSemaphoreValues : NULL; - const uint64_t *signal_values = - signal_values_count ? timeline_info->pSignalSemaphoreValues : NULL; - struct tu_queue_submit *new_submit = vk_zalloc(&queue->device->vk.alloc, sizeof(*new_submit), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - - new_submit->cmd_buffer_count = submit_info->commandBufferCount; - new_submit->cmd_buffers = vk_zalloc(&queue->device->vk.alloc, - new_submit->cmd_buffer_count * sizeof(*new_submit->cmd_buffers), 8, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - - if (new_submit->cmd_buffers == NULL) { + if (new_submit == NULL) { result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail_cmd_buffers; - } - - memcpy(new_submit->cmd_buffers, submit_info->pCommandBuffers, - new_submit->cmd_buffer_count * sizeof(*new_submit->cmd_buffers)); - - new_submit->wait_semaphores = vk_zalloc(&queue->device->vk.alloc, - submit_info->waitSemaphoreCount * sizeof(*new_submit->wait_semaphores), - 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - if (new_submit->wait_semaphores == NULL) { - result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail_wait_semaphores; - } - new_submit->wait_semaphore_count = submit_info->waitSemaphoreCount; - - new_submit->signal_semaphores = vk_zalloc(&queue->device->vk.alloc, - submit_info->signalSemaphoreCount *sizeof(*new_submit->signal_semaphores), - 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - if (new_submit->signal_semaphores == NULL) { - result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail_signal_semaphores; - } - new_submit->signal_semaphore_count = submit_info->signalSemaphoreCount; - - for (uint32_t i = 0; i < submit_info->waitSemaphoreCount; i++) { - TU_FROM_HANDLE(tu_syncobj, sem, submit_info->pWaitSemaphores[i]); - new_submit->wait_semaphores[i] = sem; - - if (sem->type == TU_SEMAPHORE_TIMELINE) { - result = tu_queue_submit_add_timeline_wait_locked(new_submit, - queue->device, sem, wait_values[i]); - if (result != VK_SUCCESS) - goto fail_wait_timelines; - } - } - - for (uint32_t i = 0; i < submit_info->signalSemaphoreCount; i++) { - TU_FROM_HANDLE(tu_syncobj, sem, submit_info->pSignalSemaphores[i]); - new_submit->signal_semaphores[i] = sem; - - if (sem->type == TU_SEMAPHORE_TIMELINE) { - result = tu_queue_submit_add_timeline_signal_locked(new_submit, - queue->device, sem, signal_values[i]); - if (result != VK_SUCCESS) - goto fail_signal_timelines; - } + goto fail_new_submit; } bool u_trace_enabled = u_trace_context_actively_tracing(&queue->device->trace_context); bool has_trace_points = false; - uint32_t entry_count = 0; - for (uint32_t j = 0; j < new_submit->cmd_buffer_count; ++j) { - TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, new_submit->cmd_buffers[j]); + struct vk_command_buffer **vk_cmd_buffers = vk_submit->command_buffers; + struct tu_cmd_buffer **cmd_buffers = (void *)vk_cmd_buffers; - if (perf_info) + uint32_t entry_count = 0; + for (uint32_t j = 0; j < vk_submit->command_buffer_count; ++j) { + struct tu_cmd_buffer *cmdbuf = cmd_buffers[j]; + + if (perf_pass_index != ~0) entry_count++; entry_count += cmdbuf->cs.entry_count; @@ -981,16 +540,16 @@ tu_queue_submit_create_locked(struct tu_queue *queue, if (has_trace_points) { new_submit->cmd_buffer_trace_data = vk_zalloc(&queue->device->vk.alloc, - new_submit->cmd_buffer_count * sizeof(struct tu_u_trace_cmd_data), 8, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + vk_submit->command_buffer_count * sizeof(struct tu_u_trace_cmd_data), + 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); if (new_submit->cmd_buffer_trace_data == NULL) { result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); goto fail_cmd_trace_data; } - for (uint32_t i = 0; i < new_submit->cmd_buffer_count; ++i) { - TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, new_submit->cmd_buffers[i]); + for (uint32_t i = 0; i < vk_submit->command_buffer_count; ++i) { + struct tu_cmd_buffer *cmdbuf = cmd_buffers[i]; if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) && u_trace_has_points(&cmdbuf->trace)) { @@ -1014,8 +573,8 @@ tu_queue_submit_create_locked(struct tu_queue *queue, /* Allocate without wait timeline semaphores */ new_submit->in_syncobjs = vk_zalloc(&queue->device->vk.alloc, - (nr_in_syncobjs - new_submit->wait_timeline_count) * - sizeof(*new_submit->in_syncobjs), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + nr_in_syncobjs * sizeof(*new_submit->in_syncobjs), 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); if (new_submit->in_syncobjs == NULL) { result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -1035,10 +594,8 @@ tu_queue_submit_create_locked(struct tu_queue *queue, new_submit->entry_count = entry_count; new_submit->nr_in_syncobjs = nr_in_syncobjs; new_submit->nr_out_syncobjs = nr_out_syncobjs; - new_submit->last_submit = last_submit; - new_submit->counter_pass_index = perf_info ? perf_info->counterPassIndex : ~0; - - list_inithead(&new_submit->link); + new_submit->perf_pass_index = perf_pass_index; + new_submit->vk_submit = vk_submit; *submit = new_submit; @@ -1049,56 +606,35 @@ fail_out_syncobjs: fail_in_syncobjs: if (new_submit->cmd_buffer_trace_data) tu_u_trace_cmd_data_finish(queue->device, new_submit->cmd_buffer_trace_data, - new_submit->cmd_buffer_count); + new_submit->vk_submit->command_buffer_count); fail_copy_timestamp_cs: vk_free(&queue->device->vk.alloc, new_submit->cmd_buffer_trace_data); fail_cmd_trace_data: vk_free(&queue->device->vk.alloc, new_submit->cmds); fail_cmds: -fail_signal_timelines: -fail_wait_timelines: - vk_free(&queue->device->vk.alloc, new_submit->signal_semaphores); -fail_signal_semaphores: - vk_free(&queue->device->vk.alloc, new_submit->wait_semaphores); -fail_wait_semaphores: - vk_free(&queue->device->vk.alloc, new_submit->cmd_buffers); -fail_cmd_buffers: + vk_free(&queue->device->vk.alloc, new_submit); +fail_new_submit: return result; } -static void -tu_queue_submit_free(struct tu_queue *queue, struct tu_queue_submit *submit) -{ - vk_free(&queue->device->vk.alloc, submit->wait_semaphores); - vk_free(&queue->device->vk.alloc, submit->signal_semaphores); - - vk_free(&queue->device->vk.alloc, submit->wait_timelines); - vk_free(&queue->device->vk.alloc, submit->wait_timeline_values); - vk_free(&queue->device->vk.alloc, submit->signal_timelines); - vk_free(&queue->device->vk.alloc, submit->signal_timeline_values); - - vk_free(&queue->device->vk.alloc, submit->cmds); - vk_free(&queue->device->vk.alloc, submit->in_syncobjs); - vk_free(&queue->device->vk.alloc, submit->out_syncobjs); - vk_free(&queue->device->vk.alloc, submit->cmd_buffers); - vk_free(&queue->device->vk.alloc, submit); -} - static void tu_queue_build_msm_gem_submit_cmds(struct tu_queue *queue, struct tu_queue_submit *submit) { struct drm_msm_gem_submit_cmd *cmds = submit->cmds; - uint32_t entry_idx = 0; - for (uint32_t j = 0; j < submit->cmd_buffer_count; ++j) { - TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, submit->cmd_buffers[j]); - struct tu_cs *cs = &cmdbuf->cs; - struct tu_device *dev = queue->device; + struct vk_command_buffer **vk_cmd_buffers = submit->vk_submit->command_buffers; + struct tu_cmd_buffer **cmd_buffers = (void *)vk_cmd_buffers; - if (submit->counter_pass_index != ~0) { + uint32_t entry_idx = 0; + for (uint32_t j = 0; j < submit->vk_submit->command_buffer_count; ++j) { + struct tu_device *dev = queue->device; + struct tu_cmd_buffer *cmdbuf = cmd_buffers[j]; + struct tu_cs *cs = &cmdbuf->cs; + + if (submit->perf_pass_index != ~0) { struct tu_cs_entry *perf_cs_entry = - &dev->perfcntrs_pass_cs_entries[submit->counter_pass_index]; + &dev->perfcntrs_pass_cs_entries[submit->perf_pass_index]; cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF; cmds[entry_idx].submit_idx = @@ -1151,15 +687,12 @@ tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit) uint32_t flags = MSM_PIPE_3D0; - if (submit->nr_in_syncobjs) + if (submit->vk_submit->wait_count) flags |= MSM_SUBMIT_SYNCOBJ_IN; - if (submit->nr_out_syncobjs) + if (submit->vk_submit->signal_count) flags |= MSM_SUBMIT_SYNCOBJ_OUT; - if (submit->last_submit) - flags |= MSM_SUBMIT_FENCE_FD_OUT; - mtx_lock(&queue->device->bo_mutex); /* drm_msm_gem_submit_cmd requires index of bo which could change at any @@ -1177,7 +710,7 @@ tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit) .nr_cmds = submit->entry_count, .in_syncobjs = (uint64_t)(uintptr_t)submit->in_syncobjs, .out_syncobjs = (uint64_t)(uintptr_t)submit->out_syncobjs, - .nr_in_syncobjs = submit->nr_in_syncobjs - submit->wait_timeline_count, + .nr_in_syncobjs = submit->nr_in_syncobjs, .nr_out_syncobjs = submit->nr_out_syncobjs, .syncobj_stride = sizeof(struct drm_msm_gem_submit_syncobj), }; @@ -1189,31 +722,7 @@ tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit) mtx_unlock(&queue->device->bo_mutex); if (ret) - return vk_device_set_lost(&queue->device->vk, "submit failed: %s\n", - strerror(errno)); - - /* restore permanent payload on wait */ - for (uint32_t i = 0; i < submit->wait_semaphore_count; i++) { - TU_FROM_HANDLE(tu_syncobj, sem, submit->wait_semaphores[i]); - if(sem->type == TU_SEMAPHORE_BINARY) - sync_set_temporary(queue->device, sem, 0); - } - - if (submit->last_submit) { - if (queue->fence >= 0) - close(queue->fence); - queue->fence = req.fence_fd; - } - - /* Update highest_submitted values in the timeline. */ - for (uint32_t i = 0; i < submit->signal_timeline_count; i++) { - struct tu_syncobj *sem = submit->signal_timelines[i]; - uint64_t signal_value = submit->signal_timeline_values[i]; - - assert(signal_value > sem->timeline.highest_submitted); - - sem->timeline.highest_submitted = signal_value; - } + return vk_device_set_lost(&queue->device->vk, "submit failed: %m"); if (submit->cmd_buffer_trace_data) { struct tu_u_trace_flush_data *flush_data = @@ -1227,143 +736,18 @@ tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit) flush_data->syncobj->msm_queue_id = queue->msm_queue_id; flush_data->cmd_trace_data = submit->cmd_buffer_trace_data; - flush_data->trace_count = submit->cmd_buffer_count; + flush_data->trace_count = submit->vk_submit->command_buffer_count; submit->cmd_buffer_trace_data = NULL; - for (uint32_t i = 0; i < submit->cmd_buffer_count; i++) { - bool free_data = i == (submit->cmd_buffer_count - 1); + for (uint32_t i = 0; i < submit->vk_submit->command_buffer_count; i++) { + bool free_data = i == (submit->vk_submit->command_buffer_count - 1); u_trace_flush(flush_data->cmd_trace_data[i].trace, flush_data, free_data); } } - pthread_cond_broadcast(&queue->device->timeline_cond); - return VK_SUCCESS; } - -static bool -tu_queue_submit_ready_locked(struct tu_queue_submit *submit) -{ - for (uint32_t i = 0; i < submit->wait_timeline_count; i++) { - if (submit->wait_timeline_values[i] > - submit->wait_timelines[i]->timeline.highest_submitted) { - return false; - } - } - - return true; -} - -static VkResult -tu_timeline_add_point_locked(struct tu_device *device, - struct tu_timeline *timeline, - uint64_t value, - struct tu_timeline_point **point) -{ - - if (list_is_empty(&timeline->free_points)) { - *point = vk_zalloc(&device->vk.alloc, sizeof(**point), 8, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - - if (!(*point)) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - struct drm_syncobj_create create = {}; - - int ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_CREATE, &create); - if (ret) { - vk_free(&device->vk.alloc, *point); - return vk_error(device, VK_ERROR_DEVICE_LOST); - } - - (*point)->syncobj = create.handle; - - } else { - *point = list_first_entry(&timeline->free_points, - struct tu_timeline_point, link); - list_del(&(*point)->link); - } - - (*point)->value = value; - list_addtail(&(*point)->link, &timeline->points); - - return VK_SUCCESS; -} - -static VkResult -tu_queue_submit_timeline_locked(struct tu_queue *queue, - struct tu_queue_submit *submit) -{ - VkResult result; - uint32_t timeline_idx = - submit->nr_out_syncobjs - submit->signal_timeline_count; - - for (uint32_t i = 0; i < submit->signal_timeline_count; i++) { - struct tu_timeline *timeline = &submit->signal_timelines[i]->timeline; - uint64_t signal_value = submit->signal_timeline_values[i]; - struct tu_timeline_point *point; - - result = tu_timeline_add_point_locked(queue->device, timeline, - signal_value, &point); - if (result != VK_SUCCESS) - return result; - - submit->out_syncobjs[timeline_idx + i] = - (struct drm_msm_gem_submit_syncobj) { - .handle = point->syncobj, - .flags = 0, - }; - } - - return tu_queue_submit_locked(queue, submit); -} - -static VkResult -tu_queue_submit_deferred_locked(struct tu_queue *queue, uint32_t *advance) -{ - VkResult result = VK_SUCCESS; - - list_for_each_entry_safe(struct tu_queue_submit, submit, - &queue->queued_submits, link) { - if (!tu_queue_submit_ready_locked(submit)) - break; - - (*advance)++; - - result = tu_queue_submit_timeline_locked(queue, submit); - - list_del(&submit->link); - tu_queue_submit_free(queue, submit); - - if (result != VK_SUCCESS) - break; - } - - return result; -} - -VkResult -tu_device_submit_deferred_locked(struct tu_device *dev) -{ - VkResult result = VK_SUCCESS; - - uint32_t advance = 0; - do { - advance = 0; - for (uint32_t i = 0; i < dev->queue_count[0]; i++) { - /* Try again if there's signaled submission. */ - result = tu_queue_submit_deferred_locked(&dev->queues[0][i], - &advance); - if (result != VK_SUCCESS) - return result; - } - - } while(advance); - - return result; -} - static inline void get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns) { @@ -1372,7 +756,6 @@ get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns) tv->tv_sec = t.tv_sec + ns / 1000000000; tv->tv_nsec = t.tv_nsec + ns % 1000000000; } - VkResult tu_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj) { @@ -1393,453 +776,90 @@ tu_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj return VK_SUCCESS; } -VKAPI_ATTR VkResult VKAPI_CALL -tu_QueueSubmit(VkQueue _queue, - uint32_t submitCount, - const VkSubmitInfo *pSubmits, - VkFence _fence) +VkResult +tu_queue_submit(struct vk_queue *vk_queue, struct vk_queue_submit *submit) { - TU_FROM_HANDLE(tu_queue, queue, _queue); - TU_FROM_HANDLE(tu_syncobj, fence, _fence); + struct tu_queue *queue = container_of(vk_queue, struct tu_queue, vk); + uint32_t perf_pass_index = queue->device->perfcntrs_pass_cs ? + submit->perf_pass_index : ~0; + struct tu_queue_submit *submit_req = NULL; - for (uint32_t i = 0; i < submitCount; ++i) { - const VkSubmitInfo *submit = pSubmits + i; - const bool last_submit = (i == submitCount - 1); - uint32_t out_syncobjs_size = submit->signalSemaphoreCount; + pthread_mutex_lock(&queue->device->submit_mutex); - const VkPerformanceQuerySubmitInfoKHR *perf_info = - vk_find_struct_const(pSubmits[i].pNext, - PERFORMANCE_QUERY_SUBMIT_INFO_KHR); + VkResult ret = tu_queue_submit_create_locked(queue, submit, + submit->wait_count, submit->signal_count, + perf_pass_index, &submit_req); - if (last_submit && fence) - out_syncobjs_size += 1; + if (ret != VK_SUCCESS) { + pthread_mutex_unlock(&queue->device->submit_mutex); + return ret; + } - pthread_mutex_lock(&queue->device->submit_mutex); - struct tu_queue_submit *submit_req = NULL; + /* note: assuming there won't be any very large semaphore counts */ + struct drm_msm_gem_submit_syncobj *in_syncobjs = submit_req->in_syncobjs; + struct drm_msm_gem_submit_syncobj *out_syncobjs = submit_req->out_syncobjs; - VkResult ret = tu_queue_submit_create_locked(queue, submit, - submit->waitSemaphoreCount, out_syncobjs_size, - last_submit, perf_info, &submit_req); + uint32_t nr_in_syncobjs = 0, nr_out_syncobjs = 0; - if (ret != VK_SUCCESS) { - pthread_mutex_unlock(&queue->device->submit_mutex); - return ret; - } + for (uint32_t i = 0; i < submit->wait_count; i++) { + struct vk_sync *sync = submit->waits[i].sync; - /* note: assuming there won't be any very large semaphore counts */ - struct drm_msm_gem_submit_syncobj *in_syncobjs = submit_req->in_syncobjs; - struct drm_msm_gem_submit_syncobj *out_syncobjs = submit_req->out_syncobjs; - uint32_t nr_in_syncobjs = 0, nr_out_syncobjs = 0; - - for (uint32_t i = 0; i < submit->waitSemaphoreCount; i++) { - TU_FROM_HANDLE(tu_syncobj, sem, submit->pWaitSemaphores[i]); - if (sem->type == TU_SEMAPHORE_TIMELINE) - continue; + if (vk_sync_type_is_drm_syncobj(sync->type)) { + struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(sync); in_syncobjs[nr_in_syncobjs++] = (struct drm_msm_gem_submit_syncobj) { - .handle = sem->binary.temporary ?: sem->binary.permanent, - .flags = MSM_SUBMIT_SYNCOBJ_RESET, - }; - } - - for (uint32_t i = 0; i < submit->signalSemaphoreCount; i++) { - TU_FROM_HANDLE(tu_syncobj, sem, submit->pSignalSemaphores[i]); - - /* In case of timeline semaphores, we can defer the creation of syncobj - * and adding it at real submit time. - */ - if (sem->type == TU_SEMAPHORE_TIMELINE) - continue; - - out_syncobjs[nr_out_syncobjs++] = (struct drm_msm_gem_submit_syncobj) { - .handle = sem->binary.temporary ?: sem->binary.permanent, + .handle = syncobj->syncobj, .flags = 0, }; } + } + + for (uint32_t i = 0; i < submit->signal_count; i++) { + struct vk_sync *sync = submit->signals[i].sync; + + if (vk_sync_type_is_drm_syncobj(sync->type)) { + struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(sync); - if (last_submit && fence) { out_syncobjs[nr_out_syncobjs++] = (struct drm_msm_gem_submit_syncobj) { - .handle = fence->binary.temporary ?: fence->binary.permanent, + .handle = syncobj->syncobj, .flags = 0, }; } + } - /* Queue the current submit */ - list_addtail(&submit_req->link, &queue->queued_submits); - ret = tu_device_submit_deferred_locked(queue->device); + ret = tu_queue_submit_locked(queue, submit_req); + + pthread_mutex_unlock(&queue->device->submit_mutex); + if (ret != VK_SUCCESS) + return ret; + + return VK_SUCCESS; +} + +VkResult +tu_signal_syncs(struct tu_device *device, + struct vk_sync *sync1, struct vk_sync *sync2) +{ + VkResult ret = VK_SUCCESS; + + if (sync1) { + ret = vk_sync_signal(&device->vk, sync1, 0); - pthread_mutex_unlock(&queue->device->submit_mutex); if (ret != VK_SUCCESS) - return ret; + return ret; } - if (!submitCount && fence) { - /* signal fence imemediately since we don't have a submit to do it */ - drmIoctl(queue->device->fd, DRM_IOCTL_SYNCOBJ_SIGNAL, &(struct drm_syncobj_array) { - .handles = (uintptr_t) (uint32_t[]) { fence->binary.temporary ?: fence->binary.permanent }, - .count_handles = 1, - }); - } + if (sync2) + ret = vk_sync_signal(&device->vk, sync2, 0); - return VK_SUCCESS; -} - -VKAPI_ATTR VkResult VKAPI_CALL -tu_CreateFence(VkDevice device, - const VkFenceCreateInfo *info, - const VkAllocationCallbacks *pAllocator, - VkFence *pFence) -{ - return sync_create(device, info->flags & VK_FENCE_CREATE_SIGNALED_BIT, true, true, 0, - pAllocator, (void**) pFence); -} - -VKAPI_ATTR void VKAPI_CALL -tu_DestroyFence(VkDevice device, VkFence fence, const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_syncobj, sync, fence); - sync_destroy(device, sync, pAllocator); -} - -VKAPI_ATTR VkResult VKAPI_CALL -tu_ImportFenceFdKHR(VkDevice device, const VkImportFenceFdInfoKHR *info) -{ - TU_FROM_HANDLE(tu_syncobj, sync, info->fence); - return sync_import(device, sync, info->flags & VK_FENCE_IMPORT_TEMPORARY_BIT, - info->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, info->fd); -} - -VKAPI_ATTR VkResult VKAPI_CALL -tu_GetFenceFdKHR(VkDevice device, const VkFenceGetFdInfoKHR *info, int *pFd) -{ - TU_FROM_HANDLE(tu_syncobj, sync, info->fence); - return sync_export(device, sync, - info->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, pFd); -} - -static VkResult -drm_syncobj_wait(struct tu_device *device, - const uint32_t *handles, uint32_t count_handles, - int64_t timeout_nsec, bool wait_all) -{ - int ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_WAIT, &(struct drm_syncobj_wait) { - .handles = (uint64_t) (uintptr_t) handles, - .count_handles = count_handles, - .timeout_nsec = timeout_nsec, - .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | - COND(wait_all, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) - }); - if (ret) { - if (errno == ETIME) - return VK_TIMEOUT; - - assert(0); - return VK_ERROR_DEVICE_LOST; /* TODO */ - } - return VK_SUCCESS; -} - -static uint64_t -gettime_ns(void) -{ - struct timespec current; - clock_gettime(CLOCK_MONOTONIC, ¤t); - return (uint64_t)current.tv_sec * 1000000000 + current.tv_nsec; -} - -/* and the kernel converts it right back to relative timeout - very smart UAPI */ -static uint64_t -absolute_timeout(uint64_t timeout) -{ - if (timeout == 0) - return 0; - uint64_t current_time = gettime_ns(); - uint64_t max_timeout = (uint64_t) INT64_MAX - current_time; - - timeout = MIN2(max_timeout, timeout); - - return (current_time + timeout); -} - -VKAPI_ATTR VkResult VKAPI_CALL -tu_WaitForFences(VkDevice _device, - uint32_t fenceCount, - const VkFence *pFences, - VkBool32 waitAll, - uint64_t timeout) -{ - TU_FROM_HANDLE(tu_device, device, _device); - - if (vk_device_is_lost(&device->vk)) - return VK_ERROR_DEVICE_LOST; - - uint32_t handles[fenceCount]; - for (unsigned i = 0; i < fenceCount; ++i) { - TU_FROM_HANDLE(tu_syncobj, fence, pFences[i]); - handles[i] = fence->binary.temporary ?: fence->binary.permanent; - } - - return drm_syncobj_wait(device, handles, fenceCount, absolute_timeout(timeout), waitAll); -} - -VKAPI_ATTR VkResult VKAPI_CALL -tu_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences) -{ - TU_FROM_HANDLE(tu_device, device, _device); - int ret; - - uint32_t handles[fenceCount]; - for (unsigned i = 0; i < fenceCount; ++i) { - TU_FROM_HANDLE(tu_syncobj, fence, pFences[i]); - sync_set_temporary(device, fence, 0); - handles[i] = fence->binary.permanent; - } - - ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_RESET, &(struct drm_syncobj_array) { - .handles = (uint64_t) (uintptr_t) handles, - .count_handles = fenceCount, - }); - if (ret) { - vk_device_set_lost(&device->vk, "DRM_IOCTL_SYNCOBJ_RESET failure: %s", - strerror(errno)); - } - - return VK_SUCCESS; -} - -VKAPI_ATTR VkResult VKAPI_CALL -tu_GetFenceStatus(VkDevice _device, VkFence _fence) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_syncobj, fence, _fence); - VkResult result; - - result = drm_syncobj_wait(device, (uint32_t[]){fence->binary.temporary ?: fence->binary.permanent}, 1, 0, false); - if (result == VK_TIMEOUT) - result = VK_NOT_READY; - return result; + return ret; } int -tu_signal_fences(struct tu_device *device, struct tu_syncobj *fence1, struct tu_syncobj *fence2) +tu_syncobj_to_fd(struct tu_device *device, struct vk_sync *sync) { - uint32_t handles[2], count = 0; - if (fence1) - handles[count++] = fence1->binary.temporary ?: fence1->binary.permanent; - - if (fence2) - handles[count++] = fence2->binary.temporary ?: fence2->binary.permanent; - - if (!count) - return 0; - - return drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_SIGNAL, &(struct drm_syncobj_array) { - .handles = (uintptr_t) handles, - .count_handles = count - }); -} - -int -tu_syncobj_to_fd(struct tu_device *device, struct tu_syncobj *sync) -{ - struct drm_syncobj_handle handle = { .handle = sync->binary.permanent }; - int ret; - - ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &handle); - - return ret ? -1 : handle.fd; -} - -static VkResult -tu_timeline_gc_locked(struct tu_device *dev, struct tu_timeline *timeline) -{ - VkResult result = VK_SUCCESS; - - /* Go through every point in the timeline and check if any signaled point */ - list_for_each_entry_safe(struct tu_timeline_point, point, - &timeline->points, link) { - - /* If the value of the point is higher than highest_submitted, - * the point has not been submited yet. - */ - if (point->wait_count || point->value > timeline->highest_submitted) - return VK_SUCCESS; - - result = drm_syncobj_wait(dev, (uint32_t[]){point->syncobj}, 1, 0, true); - - if (result == VK_TIMEOUT) { - /* This means the syncobj is still busy and it should wait - * with timeout specified by users via vkWaitSemaphores. - */ - result = VK_SUCCESS; - } else { - timeline->highest_signaled = - MAX2(timeline->highest_signaled, point->value); - list_del(&point->link); - list_add(&point->link, &timeline->free_points); - } - } - - return result; -} - - -static VkResult -tu_timeline_wait_locked(struct tu_device *device, - struct tu_timeline *timeline, - uint64_t value, - uint64_t abs_timeout) -{ - VkResult result; - - while(timeline->highest_submitted < value) { - struct timespec abstime; - timespec_from_nsec(&abstime, abs_timeout); - - pthread_cond_timedwait(&device->timeline_cond, &device->submit_mutex, - &abstime); - - if (os_time_get_nano() >= abs_timeout && - timeline->highest_submitted < value) - return VK_TIMEOUT; - } - - /* Visit every point in the timeline and wait until - * the highest_signaled reaches the value. - */ - while (1) { - result = tu_timeline_gc_locked(device, timeline); - if (result != VK_SUCCESS) - return result; - - if (timeline->highest_signaled >= value) - return VK_SUCCESS; - - struct tu_timeline_point *point = - list_first_entry(&timeline->points, - struct tu_timeline_point, link); - - point->wait_count++; - pthread_mutex_unlock(&device->submit_mutex); - result = drm_syncobj_wait(device, (uint32_t[]){point->syncobj}, 1, - abs_timeout, true); - - pthread_mutex_lock(&device->submit_mutex); - point->wait_count--; - - if (result != VK_SUCCESS) - return result; - } - - return result; -} - -static VkResult -tu_wait_timelines(struct tu_device *device, - const VkSemaphoreWaitInfoKHR* pWaitInfo, - uint64_t abs_timeout) -{ - if ((pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR) && - pWaitInfo->semaphoreCount > 1) { - pthread_mutex_lock(&device->submit_mutex); - - /* Visit every timline semaphore in the queue until timeout */ - while (1) { - for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) { - TU_FROM_HANDLE(tu_syncobj, semaphore, pWaitInfo->pSemaphores[i]); - VkResult result = tu_timeline_wait_locked(device, - &semaphore->timeline, pWaitInfo->pValues[i], 0); - - /* Returns result values including VK_SUCCESS except for VK_TIMEOUT */ - if (result != VK_TIMEOUT) { - pthread_mutex_unlock(&device->submit_mutex); - return result; - } - } - - if (os_time_get_nano() > abs_timeout) { - pthread_mutex_unlock(&device->submit_mutex); - return VK_TIMEOUT; - } - } - } else { - VkResult result = VK_SUCCESS; - - pthread_mutex_lock(&device->submit_mutex); - for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) { - TU_FROM_HANDLE(tu_syncobj, semaphore, pWaitInfo->pSemaphores[i]); - assert(semaphore->type == TU_SEMAPHORE_TIMELINE); - - result = tu_timeline_wait_locked(device, &semaphore->timeline, - pWaitInfo->pValues[i], abs_timeout); - if (result != VK_SUCCESS) - break; - } - pthread_mutex_unlock(&device->submit_mutex); - - return result; - } -} - - -VKAPI_ATTR VkResult VKAPI_CALL -tu_GetSemaphoreCounterValue(VkDevice _device, - VkSemaphore _semaphore, - uint64_t* pValue) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_syncobj, semaphore, _semaphore); - - assert(semaphore->type == TU_SEMAPHORE_TIMELINE); - - VkResult result; - - pthread_mutex_lock(&device->submit_mutex); - - result = tu_timeline_gc_locked(device, &semaphore->timeline); - *pValue = semaphore->timeline.highest_signaled; - - pthread_mutex_unlock(&device->submit_mutex); - - return result; -} - - -VKAPI_ATTR VkResult VKAPI_CALL -tu_WaitSemaphores(VkDevice _device, - const VkSemaphoreWaitInfoKHR* pWaitInfo, - uint64_t timeout) -{ - TU_FROM_HANDLE(tu_device, device, _device); - - return tu_wait_timelines(device, pWaitInfo, absolute_timeout(timeout)); -} - -VKAPI_ATTR VkResult VKAPI_CALL -tu_SignalSemaphore(VkDevice _device, - const VkSemaphoreSignalInfoKHR* pSignalInfo) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_syncobj, semaphore, pSignalInfo->semaphore); - VkResult result; - - assert(semaphore->type == TU_SEMAPHORE_TIMELINE); - - pthread_mutex_lock(&device->submit_mutex); - - result = tu_timeline_gc_locked(device, &semaphore->timeline); - if (result != VK_SUCCESS) { - pthread_mutex_unlock(&device->submit_mutex); - return result; - } - - semaphore->timeline.highest_submitted = pSignalInfo->value; - semaphore->timeline.highest_signaled = pSignalInfo->value; - - result = tu_device_submit_deferred_locked(device); - - pthread_cond_broadcast(&device->timeline_cond); - pthread_mutex_unlock(&device->submit_mutex); - - return result; + VkResult ret; + int fd; + ret = vk_sync_export_opaque_fd(&device->vk, sync, &fd); + return ret ? -1 : fd; } diff --git a/src/freedreno/vulkan/tu_kgsl.c b/src/freedreno/vulkan/tu_kgsl.c index eaf23d04430..046cb3fe319 100644 --- a/src/freedreno/vulkan/tu_kgsl.c +++ b/src/freedreno/vulkan/tu_kgsl.c @@ -635,27 +635,20 @@ tu_GetFenceStatus(VkDevice _device, VkFence _fence) } int -tu_signal_fences(struct tu_device *device, struct tu_syncobj *fence1, struct tu_syncobj *fence2) +tu_signal_syncs(struct tu_device *device, + struct vk_sync *sync1, struct vk_sync *sync2) { - tu_finishme("tu_signal_fences"); + tu_finishme("tu_signal_syncs"); return 0; } int -tu_syncobj_to_fd(struct tu_device *device, struct tu_syncobj *sync) +tu_syncobj_to_fd(struct tu_device *device, struct vk_sync *sync) { tu_finishme("tu_syncobj_to_fd"); return -1; } -VkResult -tu_device_submit_deferred_locked(struct tu_device *dev) -{ - tu_finishme("tu_device_submit_deferred_locked"); - - return VK_SUCCESS; -} - VkResult tu_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj) { diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index a71e5985f2e..b32b32f29d8 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -97,6 +97,11 @@ typedef uint32_t xcb_window_t; #include "vk_image.h" #include "vk_command_buffer.h" #include "vk_queue.h" +#include "vk_object.h" +#include "vk_sync.h" +#include "vk_fence.h" +#include "vk_semaphore.h" +#include "vk_drm_syncobj.h" #define MAX_VBS 32 #define MAX_VERTEX_ATTRIBS 32 @@ -225,6 +230,9 @@ struct tu_physical_device struct disk_cache *disk_cache; struct tu_memory_heap heap; + + struct vk_sync_type syncobj_type; + const struct vk_sync_type *sync_types[3]; }; enum tu_debug_flags @@ -298,7 +306,10 @@ struct tu_pipeline_key #define TU_MAX_QUEUE_FAMILIES 1 +/* Keep tu_syncobj until porting to common code for kgsl too */ +#ifdef TU_USE_KGSL struct tu_syncobj; +#endif struct tu_u_trace_syncobj; struct tu_queue @@ -309,9 +320,6 @@ struct tu_queue uint32_t msm_queue_id; int fence; - - /* Queue containing deferred submits */ - struct list_head queued_submits; }; struct tu_bo @@ -1707,11 +1715,13 @@ void tu_drm_submitqueue_close(const struct tu_device *dev, uint32_t queue_id); int -tu_signal_fences(struct tu_device *device, struct tu_syncobj *fence1, struct tu_syncobj *fence2); +tu_signal_syncs(struct tu_device *device, struct vk_sync *sync1, struct vk_sync *sync2); int -tu_syncobj_to_fd(struct tu_device *device, struct tu_syncobj *sync); +tu_syncobj_to_fd(struct tu_device *device, struct vk_sync *sync); +VkResult +tu_queue_submit(struct vk_queue *vk_queue, struct vk_queue_submit *submit); void tu_copy_timestamp_buffer(struct u_trace_context *utctx, void *cmdstream, diff --git a/src/freedreno/vulkan/tu_wsi.c b/src/freedreno/vulkan/tu_wsi.c index e7b0440f746..d8a0bc68b0c 100644 --- a/src/freedreno/vulkan/tu_wsi.c +++ b/src/freedreno/vulkan/tu_wsi.c @@ -71,8 +71,8 @@ tu_AcquireNextImage2KHR(VkDevice _device, uint32_t *pImageIndex) { TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_syncobj, fence, pAcquireInfo->fence); - TU_FROM_HANDLE(tu_syncobj, semaphore, pAcquireInfo->semaphore); + VK_FROM_HANDLE(vk_fence, fence, pAcquireInfo->fence); + VK_FROM_HANDLE(vk_semaphore, semaphore, pAcquireInfo->semaphore); struct tu_physical_device *pdevice = device->physical_device; @@ -80,7 +80,9 @@ tu_AcquireNextImage2KHR(VkDevice _device, &pdevice->wsi_device, _device, pAcquireInfo, pImageIndex); /* signal fence/semaphore - image is available immediately */ - tu_signal_fences(device, fence, semaphore); + tu_signal_syncs(device, + fence ? vk_fence_get_active_sync(fence) : NULL, + semaphore ? vk_semaphore_get_active_sync(semaphore) : NULL); return result; } diff --git a/src/freedreno/vulkan/tu_wsi_display.c b/src/freedreno/vulkan/tu_wsi_display.c index fce8d7a7d65..db51d45b281 100644 --- a/src/freedreno/vulkan/tu_wsi_display.c +++ b/src/freedreno/vulkan/tu_wsi_display.c @@ -33,6 +33,7 @@ #include "vk_format.h" #include "util/debug.h" #include "wsi_common_display.h" +#include "vulkan/runtime/vk_common_entrypoints.h" /* VK_EXT_display_control */ @@ -46,13 +47,13 @@ tu_RegisterDeviceEventEXT(VkDevice _device, VkResult ret; VkFence _fence; - ret = tu_CreateFence(_device, &(VkFenceCreateInfo) {}, allocator, &_fence); + ret = vk_common_CreateFence(_device, &(VkFenceCreateInfo) {}, allocator, &_fence); if (ret != VK_SUCCESS) return ret; - TU_FROM_HANDLE(tu_syncobj, fence, _fence); + VK_FROM_HANDLE(vk_fence, fence, _fence); - int sync_fd = tu_syncobj_to_fd(device, fence); + int sync_fd = tu_syncobj_to_fd(device, vk_fence_get_active_sync(fence)); if (sync_fd >= 0) { ret = wsi_register_device_event(_device, &device->physical_device->wsi_device, @@ -67,7 +68,7 @@ tu_RegisterDeviceEventEXT(VkDevice _device, } if (ret != VK_SUCCESS) - tu_DestroyFence(_device, _fence, allocator); + vk_common_DestroyFence(_device, _fence, allocator); else *out_fence = _fence; @@ -84,13 +85,14 @@ tu_RegisterDisplayEventEXT(VkDevice _device, TU_FROM_HANDLE(tu_device, device, _device); VkResult ret; - ret = tu_CreateFence(_device, &(VkFenceCreateInfo) {}, allocator, _fence); + ret = vk_common_CreateFence(_device, &(VkFenceCreateInfo) {}, allocator, _fence); if (ret != VK_SUCCESS) return ret; - TU_FROM_HANDLE(tu_syncobj, fence, *_fence); + VK_FROM_HANDLE(vk_fence, fence, *_fence); + + int sync_fd = tu_syncobj_to_fd(device, vk_fence_get_active_sync(fence)); - int sync_fd = tu_syncobj_to_fd(device, fence); if (sync_fd >= 0) { ret = wsi_register_display_event(_device, &device->physical_device->wsi_device, @@ -106,7 +108,7 @@ tu_RegisterDisplayEventEXT(VkDevice _device, } if (ret != VK_SUCCESS) - tu_DestroyFence(_device, *_fence, allocator); + vk_common_DestroyFence(_device, *_fence, allocator); return ret; }