diff --git a/src/broadcom/vulkan/v3dv_queue.c b/src/broadcom/vulkan/v3dv_queue.c
index 43ee4766347..9805ccb13a6 100644
--- a/src/broadcom/vulkan/v3dv_queue.c
+++ b/src/broadcom/vulkan/v3dv_queue.c
@@ -628,10 +628,110 @@ process_fence_to_signal(struct v3dv_device *device, VkFence _fence)
    return ret ? VK_ERROR_OUT_OF_HOST_MEMORY : VK_SUCCESS;
 }
 
+static void
+multisync_free(struct v3dv_device *device,
+               struct drm_v3d_sem *out_syncs,
+               struct drm_v3d_sem *in_syncs)
+{
+   vk_free(&device->vk.alloc, out_syncs);
+   vk_free(&device->vk.alloc, in_syncs);
+}
+
+static struct drm_v3d_sem *
+set_syncs(struct v3dv_device *device,
+          uint32_t *count, VkSemaphore *sems,
+          uint32_t last_job_sync)
+{
+   uint32_t n_sem = *count;
+
+   if (last_job_sync)
+      (*count)++;
+
+   if (!*count)
+      return NULL;
+
+   struct drm_v3d_sem *syncs =
+      vk_zalloc(&device->vk.alloc, *count * sizeof(struct drm_v3d_sem),
+                8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+
+   if (!syncs)
+      return NULL;
+
+   if (n_sem)
+      for (unsigned i = 0; i < n_sem; i++) {
+         struct v3dv_semaphore *sem = v3dv_semaphore_from_handle(sems[i]);
+         syncs[i].handle = sem->sync;
+      }
+
+   if (last_job_sync)
+      syncs[n_sem].handle = last_job_sync;
+
+   return syncs;
+}
+
+static void
+set_ext(struct drm_v3d_extension *ext,
+	struct drm_v3d_extension *next,
+	uint32_t id,
+	uintptr_t flags)
+{
+   ext->next = (uintptr_t)(void *)next;
+   ext->id = id;
+   ext->flags = flags;
+}
+
+/* This function sets the extension for multiple in/out syncobjs. When it is
+ * successful, it sets the extension id to DRM_V3D_EXT_ID_MULTI_SYNC.
+ * Otherwise, the extension id is 0, which means an out-of-memory error.
+ */
+static void
+set_multisync(struct drm_v3d_multi_sync *ms,
+              struct v3dv_submit_info_semaphores *sems_info,
+              struct drm_v3d_extension *next,
+              struct v3dv_device *device,
+              struct drm_v3d_sem *out_syncs,
+              struct drm_v3d_sem *in_syncs,
+              bool serialize,
+              enum v3d_queue queue)
+{
+   uint32_t out_sync_count = 0, in_sync_count = 0;
+
+   /* pSignalSemaphores are handled in the master thread (the reason for
+    * out_sync_count being initially zero), but we still want to always signal
+    * last_job_sync so we can serialize jobs when needed.
+    */
+   out_sync_count = 0;
+   out_syncs = set_syncs(device, &out_sync_count, NULL, device->last_job_sync);
+
+   assert(out_sync_count > 0);
+
+   if (!out_syncs)
+      return;
+
+   /* If we are serializing a job in a command buffer, we are already making
+    * it wait for completion of the last job submitted, so in that case we can
+    * skip waiting for any additional semaphores.
+    */
+   in_sync_count = serialize ? 0 : sems_info->sem_count;
+   in_syncs = set_syncs(device, &in_sync_count, sems_info->sems,
+                        (serialize ? device->last_job_sync : 0));
+   if (!in_syncs && in_sync_count) {
+      vk_free(&device->vk.alloc, out_syncs);
+      return;
+   }
+
+   set_ext(&ms->base, next, DRM_V3D_EXT_ID_MULTI_SYNC, 0);
+   ms->wait_stage = queue;
+   ms->out_sync_count = out_sync_count;
+   ms->out_syncs = (uintptr_t)(void *)out_syncs;
+   ms->in_sync_count = in_sync_count;
+   ms->in_syncs = (uintptr_t)(void *)in_syncs;
+}
+
 static VkResult
 handle_cl_job(struct v3dv_queue *queue,
               struct v3dv_job *job,
-              bool do_sem_wait)
+              struct v3dv_submit_info_semaphores *sems_info)
 {
    struct v3dv_device *device = queue->device;
 
@@ -673,8 +773,8 @@ handle_cl_job(struct v3dv_queue *queue,
    assert(bo_idx == submit.bo_handle_count);
    submit.bo_handles = (uintptr_t)(void *)bo_handles;
 
-   /* We need a binning sync if we are waiting on a sempahore (do_sem_wait) or
-    * if the job comes after a pipeline barrier than involves geometry stages
+   /* We need a binning sync if we are waiting on a semaphore or if the job
+    * comes after a pipeline barrier that involves geometry stages
     * (needs_bcl_sync).
     *
     * We need a render sync if the job doesn't need a binning sync but has
@@ -683,20 +783,39 @@ handle_cl_job(struct v3dv_queue *queue,
     * need to add a fence for those, however, we might need to wait on a CSD or
     * TFU job, which are not automatically serialized with CL jobs.
     *
-    * FIXME: for now, if we are asked to wait on any semaphores, we just wait
-    * on the last job we submitted. In the future we might want to pass the
-    * actual syncobj of the wait semaphores so we don't block on the last RCL
-    * if we only need to wait for a previous CSD or TFU, for example, but
-    * we would have to extend our kernel interface to support the case where
-    * we have more than one semaphore to wait on.
+    * FIXME: see if we can do better and avoid bcl syncs for any jobs in the
+    * command buffer after the first job where we should be able to track bcl
+    * dependencies strictly through barriers.
     */
-   const bool needs_bcl_sync = do_sem_wait || job->needs_bcl_sync;
+   const bool needs_bcl_sync =
+      sems_info->sem_count > 0 || job->needs_bcl_sync;
    const bool needs_rcl_sync = job->serialize && !needs_bcl_sync;
+   struct drm_v3d_sem *out_syncs = NULL, *in_syncs = NULL;
 
    mtx_lock(&queue->device->mutex);
-   submit.in_sync_bcl = needs_bcl_sync ? device->last_job_sync : 0;
-   submit.in_sync_rcl = needs_rcl_sync ? device->last_job_sync : 0;
-   submit.out_sync = device->last_job_sync;
+
+   /* Replace single semaphore settings whenever our kernel-driver supports
+    * multiple semaphores extension.
+    */
+   if (device->pdevice->caps.multisync) {
+      struct drm_v3d_multi_sync ms = { 0 };
+      set_multisync(&ms, sems_info, NULL, device, out_syncs, in_syncs,
+                    job->serialize, needs_rcl_sync ? V3D_RENDER : V3D_BIN);
+      if (!ms.base.id)
+         return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+      submit.flags |= DRM_V3D_SUBMIT_EXTENSION;
+      submit.extensions = (uintptr_t)(void *)&ms;
+      /* Disable legacy sync interface when multisync extension is used */
+      submit.in_sync_rcl = 0;
+      submit.in_sync_bcl = 0;
+      submit.out_sync = 0;
+   } else {
+      submit.in_sync_bcl = needs_bcl_sync ? device->last_job_sync : 0;
+      submit.in_sync_rcl = needs_rcl_sync ? device->last_job_sync : 0;
+      submit.out_sync = device->last_job_sync;
+   }
+
    v3dv_clif_dump(device, job, &submit);
    int ret = v3dv_ioctl(device->pdevice->render_fd,
                         DRM_IOCTL_V3D_SUBMIT_CL, &submit);
@@ -710,6 +829,8 @@ handle_cl_job(struct v3dv_queue *queue,
    }
 
    free(bo_handles);
+   if (device->pdevice->caps.multisync)
+      multisync_free(device, out_syncs, in_syncs);
 
    if (ret)
       return vk_error(device, VK_ERROR_DEVICE_LOST);
@@ -796,7 +917,7 @@ queue_submit_job(struct v3dv_queue *queue,
    bool do_sem_wait = wait_sems_info->sem_count > 0;
    switch (job->type) {
    case V3DV_JOB_TYPE_GPU_CL:
-      return handle_cl_job(queue, job, do_sem_wait);
+      return handle_cl_job(queue, job, wait_sems_info);
    case V3DV_JOB_TYPE_GPU_TFU:
       return handle_tfu_job(queue, job, do_sem_wait);
    case V3DV_JOB_TYPE_GPU_CSD: