diff --git a/src/virtio/vulkan/vn_command_buffer.c b/src/virtio/vulkan/vn_command_buffer.c
index 35db167f125..3b225f0f855 100644
--- a/src/virtio/vulkan/vn_command_buffer.c
+++ b/src/virtio/vulkan/vn_command_buffer.c
@@ -716,6 +716,7 @@ vn_CreateCommandPool(VkDevice device,
    pool->queue_family_index = pCreateInfo->queueFamilyIndex;
    list_inithead(&pool->command_buffers);
    list_inithead(&pool->free_query_batches);
+   list_inithead(&pool->free_query_feedback_cmds);
 
    VkCommandPool pool_handle = vn_command_pool_to_handle(pool);
    vn_async_vkCreateCommandPool(dev->instance, device, pCreateInfo, NULL,
@@ -726,6 +727,16 @@ vn_CreateCommandPool(VkDevice device,
    return VK_SUCCESS;
 }
 
+static inline void
+vn_recycle_query_feedback_cmd(struct vn_command_buffer *cmd)
+{
+   vn_ResetCommandBuffer(
+      vn_command_buffer_to_handle(cmd->linked_query_feedback_cmd), 0);
+   list_add(&cmd->linked_query_feedback_cmd->feedback_head,
+            &cmd->linked_query_feedback_cmd->pool->free_query_feedback_cmds);
+   cmd->linked_query_feedback_cmd = NULL;
+}
+
 void
 vn_DestroyCommandPool(VkDevice device,
                       VkCommandPool commandPool,
@@ -760,6 +771,9 @@ vn_DestroyCommandPool(VkDevice device,
                                &cmd->builder.query_batches, head)
          vk_free(alloc, batch);
 
+      if (cmd->linked_query_feedback_cmd)
+         vn_recycle_query_feedback_cmd(cmd);
+
       vk_free(alloc, cmd);
    }
 
@@ -789,6 +803,9 @@ vn_cmd_reset(struct vn_command_buffer *cmd)
                             &cmd->builder.query_batches, head)
       vn_cmd_query_batch_pop(cmd, batch);
 
+   if (cmd->linked_query_feedback_cmd)
+      vn_recycle_query_feedback_cmd(cmd);
+
    memset(&cmd->builder, 0, sizeof(cmd->builder));
 
    list_inithead(&cmd->builder.query_batches);
@@ -906,6 +923,9 @@ vn_FreeCommandBuffers(VkDevice device,
                                &cmd->builder.query_batches, head)
          vn_cmd_query_batch_pop(cmd, batch);
 
+      if (cmd->linked_query_feedback_cmd)
+         vn_recycle_query_feedback_cmd(cmd);
+
       vn_object_base_fini(&cmd->base);
       vk_free(alloc, cmd);
    }
@@ -1045,6 +1065,8 @@ vn_BeginCommandBuffer(VkCommandBuffer commandBuffer,
       cmd->state = VN_COMMAND_BUFFER_STATE_INVALID;
       return vn_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
    }
+   cmd->builder.is_simultaneous =
+      pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
 
    vn_encode_vkBeginCommandBuffer(&cmd->cs, 0, commandBuffer, pBeginInfo);
 
diff --git a/src/virtio/vulkan/vn_command_buffer.h b/src/virtio/vulkan/vn_command_buffer.h
index 292b0babc8b..73fc91d92d5 100644
--- a/src/virtio/vulkan/vn_command_buffer.h
+++ b/src/virtio/vulkan/vn_command_buffer.h
@@ -25,6 +25,7 @@ struct vn_command_pool {
    struct list_head command_buffers;
 
    struct list_head free_query_batches;
+   struct list_head free_query_feedback_cmds;
 
    /* Temporary storage for scrubbing VK_IMAGE_LAYOUT_PRESENT_SRC_KHR. The
     * storage's lifetime is the command pool's lifetime. We increase the
@@ -66,6 +67,8 @@ struct vn_command_buffer_builder {
    uint32_t subpass_index;
    /* track the active view mask inside a render pass instance */
    uint32_t view_mask;
+   /* track if VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT was set */
+   bool is_simultaneous;
    /* track the query feedbacks deferred outside the render pass instance */
    struct list_head query_batches;
 };
@@ -82,7 +85,11 @@ struct vn_command_buffer {
 
    struct vn_command_buffer_builder builder;
 
+   struct vn_command_buffer *linked_query_feedback_cmd;
+
    struct list_head head;
+
+   struct list_head feedback_head;
 };
 VK_DEFINE_HANDLE_CASTS(vn_command_buffer,
                        base.base,
diff --git a/src/virtio/vulkan/vn_feedback.c b/src/virtio/vulkan/vn_feedback.c
index 5adb55e4d78..c17d947e958 100644
--- a/src/virtio/vulkan/vn_feedback.c
+++ b/src/virtio/vulkan/vn_feedback.c
@@ -620,15 +620,25 @@ vn_feedback_query_batch_record(VkDevice dev_handle,
       .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
       .commandBufferCount = 1,
    };
+   struct vn_command_pool *cmd_pool =
+      vn_command_pool_from_handle(feedback_pool->pool);
    VkCommandBuffer feedback_cmd_handle;
    VkResult result;
 
    simple_mtx_lock(&feedback_pool->mutex);
 
-   result =
-      vn_AllocateCommandBuffers(dev_handle, &info, &feedback_cmd_handle);
-   if (result != VK_SUCCESS)
-      goto out_unlock;
+   if (!list_is_empty(&cmd_pool->free_query_feedback_cmds)) {
+      struct vn_command_buffer *free_cmd =
+         list_first_entry(&cmd_pool->free_query_feedback_cmds,
+                          struct vn_command_buffer, feedback_head);
+      feedback_cmd_handle = vn_command_buffer_to_handle(free_cmd);
+      list_del(&free_cmd->feedback_head);
+   } else {
+      result =
+         vn_AllocateCommandBuffers(dev_handle, &info, &feedback_cmd_handle);
+      if (result != VK_SUCCESS)
+         goto out_unlock;
+   }
 
    static const VkCommandBufferBeginInfo begin_info = {
       .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
diff --git a/src/virtio/vulkan/vn_queue.c b/src/virtio/vulkan/vn_queue.c
index 62d9980c011..a3169275a85 100644
--- a/src/virtio/vulkan/vn_queue.c
+++ b/src/virtio/vulkan/vn_queue.c
@@ -17,6 +17,7 @@
 #include "venus-protocol/vn_protocol_driver_semaphore.h"
 #include "venus-protocol/vn_protocol_driver_transport.h"
 
+#include "vn_command_buffer.h"
 #include "vn_device.h"
 #include "vn_device_memory.h"
 #include "vn_physical_device.h"
@@ -45,6 +46,7 @@ struct vn_queue_submission {
    const struct vn_device_memory *wsi_mem;
    uint32_t feedback_cmd_buffer_count;
    struct vn_sync_payload_external external_payload;
+   struct vn_command_buffer *recycle_query_feedback_cmd;
 
    /* Temporary storage allocation for submission
     * A single alloc for storage is performed and the offsets inside
@@ -489,6 +491,79 @@ vn_get_feedback_cmd_handle(struct vn_queue_submission *submit,
              : &feedback_cmds->cmd_buffer_infos[cmd_index].commandBuffer;
 }
 
+static VkResult
+vn_queue_submission_add_query_feedback(struct vn_queue_submission *submit,
+                                       uint32_t cmd_buffer_count,
+                                       struct vn_feedback_cmds *feedback_cmds)
+{
+   struct vk_queue *queue_vk = vk_queue_from_handle(submit->queue_handle);
+   VkDevice dev_handle = vk_device_to_handle(queue_vk->base.device);
+   struct vn_device *dev = vn_device_from_handle(dev_handle);
+   VkCommandBuffer *src_cmd_handles =
+      vn_get_feedback_cmd_handle(submit, feedback_cmds, 0);
+   VkCommandBuffer *feedback_cmd_handle =
+      vn_get_feedback_cmd_handle(submit, feedback_cmds, cmd_buffer_count);
+   uint32_t stride = (submit->batch_type == VK_STRUCTURE_TYPE_SUBMIT_INFO)
+                        ? sizeof(VkCommandBuffer *)
+                        : sizeof(VkCommandBufferSubmitInfo);
+   VkResult result;
+
+   uint32_t pool_index;
+   for (pool_index = 0; pool_index < dev->queue_family_count; pool_index++) {
+      if (dev->queue_families[pool_index] == queue_vk->queue_family_index)
+         break;
+   }
+
+   result = vn_feedback_query_batch_record(
+      dev_handle, &dev->cmd_pools[pool_index], src_cmd_handles,
+      cmd_buffer_count, stride, feedback_cmd_handle);
+   if (result != VK_SUCCESS)
+      return result;
+
+   /* link query feedback cmd lifecycle with a cmd in the original batch so
+    * that the feedback cmd can be reset and recycled when that cmd gets
+    * reset/freed.
+    *
+    * Avoid cmd buffers with VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT
+    * since we don't know if all its instances have completed execution.
+    * Should be rare enough to just log and leak the feedback cmd.
+    */
+   struct vn_command_buffer *linked_cmd_buffer = NULL;
+   for (uint32_t i = cmd_buffer_count - 1; i >= 0; i--) {
+      VkCommandBuffer *cmd_handle =
+         vn_get_feedback_cmd_handle(submit, feedback_cmds, i);
+      struct vn_command_buffer *cmd_buffer =
+         vn_command_buffer_from_handle(*cmd_handle);
+
+      if (!cmd_buffer->builder.is_simultaneous) {
+         linked_cmd_buffer = cmd_buffer;
+         break;
+      }
+   }
+
+   if (!linked_cmd_buffer) {
+      vn_log(dev->instance,
+             "Could not find non simultaneous cmd to link query feedback\n");
+      return VK_SUCCESS;
+   }
+
+   /* If a cmd that was submitted previously and already has a feedback cmd
+    * linked, as long as VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT is not
+    * set we can assume it has completed execution and is no longer in the
+    * pending state so its safe to recycle the old feedback command before
+    * linking a new one. Defer the actual recycle operation to
+    * vn_queue_submission_cleanup.
+    */
+   if (linked_cmd_buffer->linked_query_feedback_cmd)
+      submit->recycle_query_feedback_cmd =
+         linked_cmd_buffer->linked_query_feedback_cmd;
+
+   linked_cmd_buffer->linked_query_feedback_cmd =
+      vn_command_buffer_from_handle(*feedback_cmd_handle);
+
+   return VK_SUCCESS;
+}
+
 static VkResult
 vn_queue_submission_add_sem_feedback(struct vn_queue_submission *submit,
                                      uint32_t batch_index,
@@ -789,6 +864,14 @@ vn_queue_submission_cleanup(struct vn_queue_submission *submit)
    struct vn_queue *queue = vn_queue_from_handle(submit->queue_handle);
    const VkAllocationCallbacks *alloc = &queue->base.base.base.device->alloc;
 
+   if (submit->recycle_query_feedback_cmd) {
+      vn_ResetCommandBuffer(
+         vn_command_buffer_to_handle(submit->recycle_query_feedback_cmd), 0);
+      list_add(
+         &submit->recycle_query_feedback_cmd->feedback_head,
+         &submit->recycle_query_feedback_cmd->pool->free_query_feedback_cmds);
+   }
+
    /* TODO clean up pending src feedbacks on failure? */
    if (submit->has_feedback_semaphore)
       vn_queue_recycle_src_feedback(submit);