diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index 0f86685cbb0..12ceba62e86 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -1281,11 +1281,48 @@ anv_queue_exec_locked(struct anv_queue *queue, uint32_t perf_query_pass) { struct anv_device *device = queue->device; - return device->kmd_backend->queue_exec_locked(queue, wait_count, waits, - cmd_buffer_count, - cmd_buffers, signal_count, - signals, perf_query_pool, - perf_query_pass); + VkResult result = VK_SUCCESS; + + /* We only need to synchronize the main & companion command buffers if we + * have a companion command buffer somewhere in the list of command + * buffers. + */ + bool needs_companion_sync = false; + for (uint32_t i = 0; i < cmd_buffer_count; i++) { + if (cmd_buffers[i]->companion_rcs_cmd_buffer != NULL) { + needs_companion_sync = true; + break; + } + } + + result = + device->kmd_backend->queue_exec_locked( + queue, + wait_count, waits, + cmd_buffer_count, cmd_buffers, + needs_companion_sync ? 0 : signal_count, signals, + perf_query_pool, + perf_query_pass); + if (result != VK_SUCCESS) + return result; + + if (needs_companion_sync) { + struct vk_sync_wait companion_sync = { + .sync = queue->companion_sync, + }; + /* If any of the command buffer had a companion batch, the submission + * backend will signal queue->companion_sync, so to ensure completion, + * we just need to wait on that fence. + */ + result = + device->kmd_backend->queue_exec_locked(queue, + 1, &companion_sync, + 0, NULL, + signal_count, signals, + NULL, 0); + } + + return result; } static inline bool diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 21718b0f2ee..2d6acce6361 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1081,6 +1081,29 @@ struct anv_queue { /** Synchronization object for debug purposes (DEBUG_SYNC) */ struct vk_sync *sync; + /** Companion synchronization object + * + * Vulkan command buffers can be destroyed as soon as their lifecycle moved + * from the Pending state to the Invalid/Executable state. This transition + * happens when the VkFence/VkSemaphore associated with the completion of + * the command buffer work is signaled. + * + * When we're using a companion command buffer to execute part of another + * command buffer, we need to tie the 2 work submissions together to ensure + * when the associated VkFence/VkSemaphore is signaled, both command + * buffers are actually unused by the HW. To do this, we run an empty batch + * buffer that we use to signal after both submissions : + * + * CCS --> main ---> empty_batch (with wait on companion) --> signal + * RCS --> companion -| + * + * When companion batch completes, it signals companion_sync and allow + * empty_batch to execute. Since empty_batch is running on the main engine, + * we're guaranteed that upon completion both main & companion command + * buffers are not used by HW anymore. + */ + struct vk_sync *companion_sync; + struct intel_ds_queue ds; }; diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c index 9178145894a..1989016f6b2 100644 --- a/src/intel/vulkan/anv_queue.c +++ b/src/intel/vulkan/anv_queue.c @@ -68,6 +68,9 @@ anv_queue_init(struct anv_device *device, struct anv_queue *queue, uint32_t index_in_family) { struct anv_physical_device *pdevice = device->physical; + assert(queue->vk.queue_family_index < pdevice->queue.family_count); + struct anv_queue_family *queue_family = + &device->physical->queue.families[pCreateInfo->queueFamilyIndex]; VkResult result; result = vk_queue_init(&queue->vk, &device->vk, pCreateInfo, @@ -75,6 +78,11 @@ anv_queue_init(struct anv_device *device, struct anv_queue *queue, if (result != VK_SUCCESS) return result; + queue->vk.driver_submit = anv_queue_submit; + queue->device = device; + queue->family = queue_family; + queue->decoder = &device->decoder[queue->vk.queue_family_index]; + result = anv_create_engine(device, queue, pCreateInfo); if (result != VK_SUCCESS) { vk_queue_finish(&queue->vk); @@ -91,14 +99,16 @@ anv_queue_init(struct anv_device *device, struct anv_queue *queue, } } - queue->vk.driver_submit = anv_queue_submit; - - queue->device = device; - - assert(queue->vk.queue_family_index < pdevice->queue.family_count); - queue->family = &pdevice->queue.families[queue->vk.queue_family_index]; - - queue->decoder = &device->decoder[queue->vk.queue_family_index]; + if (queue_family->engine_class == INTEL_ENGINE_CLASS_COPY || + queue_family->engine_class == INTEL_ENGINE_CLASS_COMPUTE) { + result = vk_sync_create(&device->vk, + &device->physical->sync_syncobj_type, + 0, 0, &queue->companion_sync); + if (result != VK_SUCCESS) { + anv_queue_finish(queue); + return result; + } + } return VK_SUCCESS; } @@ -109,6 +119,9 @@ anv_queue_finish(struct anv_queue *queue) if (queue->sync) vk_sync_destroy(&queue->device->vk, queue->sync); + if (queue->companion_sync) + vk_sync_destroy(&queue->device->vk, queue->companion_sync); + anv_destroy_engine(queue); vk_queue_finish(&queue->vk); } diff --git a/src/intel/vulkan/i915/anv_batch_chain.c b/src/intel/vulkan/i915/anv_batch_chain.c index 3ded767a631..c95efb2d173 100644 --- a/src/intel/vulkan/i915/anv_batch_chain.c +++ b/src/intel/vulkan/i915/anv_batch_chain.c @@ -677,6 +677,14 @@ i915_companion_rcs_queue_exec_locked(struct anv_queue *queue, goto error; } + if (queue->companion_sync) { + result = anv_execbuf_add_sync(device, &execbuf, + queue->companion_sync, + true /* is_signal */, 0); + if (result != VK_SUCCESS) + goto error; + } + result = setup_execbuf_for_cmd_buffers(&execbuf, queue, cmd_buffers, cmd_buffer_count, true /* is_companion_rcs_cmd_buffer */); diff --git a/src/intel/vulkan/xe/anv_batch_chain.c b/src/intel/vulkan/xe/anv_batch_chain.c index 5fadf847e19..76a8a4fc2ec 100644 --- a/src/intel/vulkan/xe/anv_batch_chain.c +++ b/src/intel/vulkan/xe/anv_batch_chain.c @@ -216,10 +216,16 @@ xe_companion_rcs_queue_exec_locked(struct anv_queue *queue, struct anv_device *device = queue->device; VkResult result; + struct vk_sync_signal companion_sync = { + .sync = queue->companion_sync, + }; struct drm_xe_sync *xe_syncs = NULL; uint32_t xe_syncs_count = 0; - result = xe_exec_process_syncs(queue, wait_count, waits, 0, NULL, NULL, - true, /* is_companion_rcs_queue */ + result = xe_exec_process_syncs(queue, + wait_count, waits, + 1, &companion_sync, + NULL /* utrace_submit */, + true /* is_companion_rcs_queue */, &xe_syncs, &xe_syncs_count); if (result != VK_SUCCESS)