diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 17c05ec7032..b82735dc60c 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -81,6 +81,8 @@ typedef void *drmDevicePtr;
 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
 #endif
 
+static VkResult radv_queue_submit2(struct vk_queue *vqueue, struct vk_queue_submit *submission);
+
 static struct radv_timeline_point *
 radv_timeline_find_point_at_least_locked(struct radv_device *device, struct radv_timeline *timeline,
                                          uint64_t p);
@@ -2673,6 +2675,8 @@ radv_queue_init(struct radv_device *device, struct radv_queue *queue,
    if (result != VK_SUCCESS)
       return result;
 
+   queue->vk.driver_submit = radv_queue_submit2;
+
    list_inithead(&queue->pending_submissions);
    mtx_init(&queue->pending_mutex, mtx_plain);
 
@@ -4739,6 +4743,115 @@ radv_queue_submission_update_queue(struct radv_deferred_queue_submission *submis
    u_cnd_monotonic_broadcast(&submission->queue->device->timeline_cond);
 }
 
+static VkResult
+radv_queue_submit2(struct vk_queue *vqueue, struct vk_queue_submit *submission)
+{
+   struct radv_queue *queue = (struct radv_queue *)vqueue;
+   struct radeon_winsys_ctx *ctx = queue->hw_ctx;
+   uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
+   bool can_patch = true;
+   uint32_t advance;
+   VkResult result;
+   struct radeon_cmdbuf *initial_preamble_cs = NULL;
+   struct radeon_cmdbuf *initial_flush_preamble_cs = NULL;
+   struct radeon_cmdbuf *continue_preamble_cs = NULL;
+
+   result =
+      radv_get_preambles(queue, submission->command_buffers, submission->command_buffer_count,
+                         &initial_preamble_cs, &initial_flush_preamble_cs, &continue_preamble_cs);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   for (uint32_t i = 0; i < submission->buffer_bind_count; ++i) {
+      result = radv_sparse_buffer_bind_memory(queue->device, submission->buffer_binds + i);
+      if (result != VK_SUCCESS)
+         goto fail;
+   }
+
+   for (uint32_t i = 0; i < submission->image_opaque_bind_count; ++i) {
+      result =
+         radv_sparse_image_opaque_bind_memory(queue->device, submission->image_opaque_binds + i);
+      if (result != VK_SUCCESS)
+         goto fail;
+   }
+
+   for (uint32_t i = 0; i < submission->image_bind_count; ++i) {
+      result = radv_sparse_image_bind_memory(queue->device, submission->image_binds + i);
+      if (result != VK_SUCCESS)
+         goto fail;
+   }
+
+   if (!submission->command_buffer_count && !submission->wait_count && !submission->signal_count)
+      return VK_SUCCESS;
+
+   if (!submission->command_buffer_count) {
+      result = queue->device->ws->cs_submit2(ctx, queue->vk.queue_family_index,
+                                             queue->vk.index_in_family, NULL, 0, NULL, NULL,
+                                             submission->wait_count, submission->waits,
+                                             submission->signal_count, submission->signals, false);
+      if (result != VK_SUCCESS)
+         goto fail;
+   } else {
+      struct radeon_cmdbuf **cs_array =
+         malloc(sizeof(struct radeon_cmdbuf *) * (submission->command_buffer_count));
+
+      for (uint32_t j = 0; j < submission->command_buffer_count; j++) {
+         struct radv_cmd_buffer *cmd_buffer =
+            (struct radv_cmd_buffer *)submission->command_buffers[j];
+         assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+
+         cs_array[j] = cmd_buffer->cs;
+         if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
+            can_patch = false;
+
+         cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
+      }
+
+      for (uint32_t j = 0; j < submission->command_buffer_count; j += advance) {
+         struct radeon_cmdbuf *initial_preamble =
+            !j ? initial_flush_preamble_cs : initial_preamble_cs;
+         advance = MIN2(max_cs_submission, submission->command_buffer_count - j);
+         bool last_submit = j + advance == submission->command_buffer_count;
+
+         if (queue->device->trace_bo)
+            *queue->device->trace_id_ptr = 0;
+
+         result = queue->device->ws->cs_submit2(
+            ctx, queue->vk.queue_family_index, queue->vk.index_in_family, cs_array + j, advance,
+            initial_preamble, continue_preamble_cs, j == 0 ? submission->wait_count : 0,
+            submission->waits, last_submit ? submission->signal_count : 0, submission->signals,
+            can_patch);
+         if (result != VK_SUCCESS) {
+            free(cs_array);
+            goto fail;
+         }
+
+         if (queue->device->trace_bo) {
+            radv_check_gpu_hangs(queue, cs_array[j]);
+         }
+
+         if (queue->device->tma_bo) {
+            radv_check_trap_handler(queue);
+         }
+      }
+
+      free(cs_array);
+   }
+
+fail:
+   if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {
+      /* When something bad happened during the submission, such as
+       * an out of memory issue, it might be hard to recover from
+       * this inconsistent state. To avoid this sort of problem, we
+       * assume that we are in a really bad situation and return
+       * VK_ERROR_DEVICE_LOST to ensure the clients do not attempt
+       * to submit the same job again to this device.
+       */
+      result = vk_device_set_lost(&queue->device->vk, "vkQueueSubmit() failed");
+   }
+   return result;
+}
+
 static VkResult
 radv_queue_submit_deferred(struct radv_deferred_queue_submission *submission,
                            struct list_head *processing_list)
@@ -5036,17 +5149,10 @@ bool
 radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs)
 {
    struct radeon_winsys_ctx *ctx = queue->hw_ctx;
-   struct radv_winsys_sem_info sem_info = {0};
-   VkResult result;
 
-   result = radv_alloc_sem_info(queue->device, &sem_info, 0, NULL, 0, 0, 0, NULL, VK_NULL_HANDLE);
-   if (result != VK_SUCCESS)
-      return false;
-
-   result =
-      queue->device->ws->cs_submit(ctx, queue->vk.queue_family_index, queue->vk.index_in_family,
-                                   &cs, 1, NULL, NULL, &sem_info, false);
-   radv_free_sem_info(&sem_info);
+   VkResult result =
+      queue->device->ws->cs_submit2(ctx, queue->vk.queue_family_index, queue->vk.index_in_family,
+                                    &cs, 1, NULL, NULL, 0, NULL, 0, NULL, false);
    if (result != VK_SUCCESS)
       return false;