v3dv: Emulate multi-queue support via vk_queue for Android

Android14+ relies on at least 2 queues for vulkan skia/UI rendering. More explained [here][1] [1]: https://gitlab.freedesktop.org/mesa/mesa/-/work_items/11326 Signed-off-by: Roman Stratiienko <r.stratiienko@gmail.com> Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41213>
2026-05-26 10:18:12 +02:00 · 2026-04-29 19:02:11 +03:00 · 2026-04-29 19:02:11 +03:00 · 60fdab22a5
commit 60fdab22a5
parent 16526e451e
4 changed files with 100 additions and 49 deletions
--- a/src/broadcom/vulkan/v3dv_device.c
+++ b/src/broadcom/vulkan/v3dv_device.c
@ -1684,13 +1684,13 @@ v3dv_physical_device_device_id(const struct v3dv_physical_device *dev)
   }
 }

-/* We support exactly one queue family. */
+/* We support multiqueue emulation */
 static const VkQueueFamilyProperties
 v3dv_queue_family_properties = {
   .queueFlags = VK_QUEUE_GRAPHICS_BIT |
                 VK_QUEUE_COMPUTE_BIT |
                 VK_QUEUE_TRANSFER_BIT,
-   .queueCount = 1,
+   .queueCount = V3DV_MAX_QUEUES,
   .timestampValidBits = 64,
   .minImageTransferGranularity = { 1, 1, 1 },
 };
@ -1909,14 +1909,16 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,

   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);

-   /* Check requested queues (we only expose one queue ) */
-   assert(pCreateInfo->queueCreateInfoCount == 1);
+   /* Check requested queues */
+   uint32_t total_queues = 0;
   for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
      assert(pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex == 0);
-      assert(pCreateInfo->pQueueCreateInfos[i].queueCount == 1);
+      assert(pCreateInfo->pQueueCreateInfos[i].queueCount <= V3DV_MAX_QUEUES);
      if (pCreateInfo->pQueueCreateInfos[i].flags != 0)
         return vk_error(instance, VK_ERROR_INITIALIZATION_FAILED);
+      total_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
   }
+   assert(total_queues <= V3DV_MAX_QUEUES);

   device = vk_zalloc2(&physical_device->vk.instance->alloc, pAllocator,
                       sizeof(*device), 8,
@ -1939,6 +1941,7 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
   device->instance = instance;
   device->pdevice = physical_device;

+   mtx_init(&device->queue_mutex, mtx_plain);
   mtx_init(&device->query_mutex, mtx_plain);
   cnd_init(&device->query_ended);

@ -1948,10 +1951,25 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
   vk_device_set_drm_fd(&device->vk, physical_device->render_fd);
   vk_device_enable_threaded_submit(&device->vk);

-   result = queue_init(device, &device->queue,
-                       pCreateInfo->pQueueCreateInfos, 0);
-   if (result != VK_SUCCESS)
-      goto fail;
+   device->queues = vk_zalloc2(&device->vk.alloc, pAllocator,
+                               sizeof(*device->queues) * total_queues, 8,
+                               VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!device->queues) {
+      result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+      goto fail_queues_alloc;
+   }
+
+   device->queue_count = 0;
+   for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
+      for (uint32_t j = 0; j < pCreateInfo->pQueueCreateInfos[i].queueCount; j++) {
+         result = queue_init(device, &device->queues[device->queue_count],
+                             &pCreateInfo->pQueueCreateInfos[i], j);
+         if (result != VK_SUCCESS)
+            goto fail;
+
+         device->queue_count++;
+      }
+   }

   device->devinfo = physical_device->devinfo;

@ -2000,9 +2018,13 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
   return VK_SUCCESS;

 fail:
+   for (uint32_t i = 0; i < device->queue_count; i++)
+      queue_finish(&device->queues[i]);
+   vk_free2(&device->vk.alloc, pAllocator, device->queues);
+fail_queues_alloc:
   cnd_destroy(&device->query_ended);
   mtx_destroy(&device->query_mutex);
-   queue_finish(&device->queue);
+   mtx_destroy(&device->queue_mutex);
   if (device->noop_job)
      v3dv_job_destroy(device->noop_job);
   destroy_device_meta(device);
@ -2022,7 +2044,9 @@ v3dv_DestroyDevice(VkDevice _device,
   V3DV_FROM_HANDLE(v3dv_device, device, _device);

   device->vk.dispatch_table.DeviceWaitIdle(_device);
-   queue_finish(&device->queue);
+   for (uint32_t i = 0; i < device->queue_count; i++)
+      queue_finish(&device->queues[i]);
+   vk_free2(&device->vk.alloc, pAllocator, device->queues);

   if (device->noop_job)
      v3dv_job_destroy(device->noop_job);
@ -2049,6 +2073,7 @@ v3dv_DestroyDevice(VkDevice _device,

   cnd_destroy(&device->query_ended);
   mtx_destroy(&device->query_mutex);
+   mtx_destroy(&device->queue_mutex);

   vk_device_finish(&device->vk);
   vk_free2(&device->vk.alloc, pAllocator, device);
@ -2258,8 +2283,11 @@ free_memory(struct v3dv_device *device,
   if (mem->bo->map)
      device_unmap(device, mem);

-   if (mem->is_for_device_address)
+   if (mem->is_for_device_address) {
+      mtx_lock(&device->queue_mutex);
      device_remove_device_address_bo(device, mem->bo);
+      mtx_unlock(&device->queue_mutex);
+   }

   device_free(device, mem);

--- a/src/broadcom/vulkan/v3dv_device.h
+++ b/src/broadcom/vulkan/v3dv_device.h
@ -204,11 +204,6 @@ struct v3dv_queue {
   struct v3dv_device *device;

   struct v3dv_last_job_sync last_job_syncs;
-
-   /* The last active perfmon ID to prevent mixing of counter results when a
-    * job is submitted with a different perfmon id.
-    */
-   uint32_t last_perfmon_id;
 };

 VkResult v3dv_queue_driver_submit(struct vk_queue *vk_queue,
@ -253,10 +248,21 @@ struct v3dv_device {
   struct v3dv_physical_device *pdevice;

   struct v3d_device_info devinfo;
-   struct v3dv_queue queue;
+   struct v3dv_queue *queues;
+   uint32_t queue_count;
+
+   /* In cases where we instantiate more than one queue (Android), this protects
+    * against concurrent access from multiple queues.
+    */
+   mtx_t queue_mutex;

   struct v3dv_job *noop_job;

+   /* The last active perfmon ID to prevent mixing of counter results when a
+    * job is submitted with a different perfmon id.
+    */
+   uint32_t last_perfmon_id;
+
   /* Guards query->maybe_available and value for timestamps */
   mtx_t query_mutex;

--- a/src/broadcom/vulkan/v3dv_limits.h
+++ b/src/broadcom/vulkan/v3dv_limits.h
@ -24,6 +24,14 @@
 #define V3DV_LIMITS_H

 #include "drm-uapi/v3d_drm.h"
+#include "util/detect_os.h"
+
+#if DETECT_OS_ANDROID
+#define V3DV_MAX_QUEUES 4
+#else
+#define V3DV_MAX_QUEUES 1
+#endif
+

 /* From vulkan spec "If the multiple viewports feature is not enabled,
 * scissorCount must be 1", ditto for viewportCount. For now we don't support
--- a/src/broadcom/vulkan/v3dv_queue.c
+++ b/src/broadcom/vulkan/v3dv_queue.c
@ -258,14 +258,14 @@ set_multisync(struct drm_v3d_multi_sync *ms,
              struct vk_sync_wait *waits,
              unsigned wait_count,
              struct drm_v3d_extension *next,
-              struct v3dv_device *device,
+              struct v3dv_queue *queue,
              struct v3dv_job *job,
              enum v3dv_queue_type in_queue_sync,
              enum v3dv_queue_type out_queue_sync,
              enum v3d_queue wait_stage,
              bool signal_syncs)
 {
-   struct v3dv_queue *queue = &device->queue;
+   struct v3dv_device *device = queue->device;
   uint32_t out_sync_count = 0, in_sync_count = 0;
   struct drm_v3d_sem *out_syncs = NULL, *in_syncs = NULL;

@ -339,7 +339,7 @@ handle_reset_query_cpu_job(struct v3dv_queue *queue,

         reset.syncs = (uintptr_t)(void *)syncs;

-         set_multisync(&ms, sync_info, NULL, 0, (void *)&reset, device, job,
+         set_multisync(&ms, sync_info, NULL, 0, (void *)&reset, queue, job,
                       V3DV_QUEUE_CPU, V3DV_QUEUE_CPU, V3D_CPU, signal_syncs);
         if (!ms.base.id) {
            free(syncs);
@ -381,7 +381,7 @@ handle_reset_query_cpu_job(struct v3dv_queue *queue,
         reset.syncs = (uintptr_t)(void *)syncs;
         reset.kperfmon_ids = (uintptr_t)(void *)kperfmon_ids;

-         set_multisync(&ms, sync_info, waits, wait_count, (void *)&reset, device, job,
+         set_multisync(&ms, sync_info, waits, wait_count, (void *)&reset, queue, job,
                       V3DV_QUEUE_CPU, V3DV_QUEUE_CPU, V3D_CPU, signal_syncs);
         if (!ms.base.id) {
            free(syncs);
@ -481,7 +481,7 @@ export_perfmon_last_job_sync(struct v3dv_queue *queue, struct v3dv_job *job, int

      if (err) {
         close(*fd);
-         return vk_errorf(&job->device->queue, VK_ERROR_UNKNOWN,
+         return vk_errorf(queue, VK_ERROR_UNKNOWN,
                          "sync file export failed: %m");
      }

@ -490,7 +490,7 @@ export_perfmon_last_job_sync(struct v3dv_queue *queue, struct v3dv_job *job, int
      if (err) {
         close(tmp_fd);
         close(*fd);
-         return vk_errorf(&job->device->queue, VK_ERROR_UNKNOWN,
+         return vk_errorf(queue, VK_ERROR_UNKNOWN,
                          "failed to accumulate sync files: %m");
      }
   }
@ -499,7 +499,7 @@ export_perfmon_last_job_sync(struct v3dv_queue *queue, struct v3dv_job *job, int
 }

 static VkResult
-handle_end_query_cpu_job(struct v3dv_job *job, uint32_t counter_pass_idx)
+handle_end_query_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job, uint32_t counter_pass_idx)
 {
   MESA_TRACE_FUNC();
   VkResult result = VK_SUCCESS;
@ -507,7 +507,6 @@ handle_end_query_cpu_job(struct v3dv_job *job, uint32_t counter_pass_idx)
   mtx_lock(&job->device->query_mutex);

   struct v3dv_end_query_info *info = &job->cpu.query_end;
-   struct v3dv_queue *queue = &job->device->queue;

   int err = 0;
   int fd = -1;
@ -611,7 +610,7 @@ handle_copy_query_results_cpu_job(struct v3dv_queue *queue,
         copy.offsets = (uintptr_t)(void *)offsets;
         copy.syncs = (uintptr_t)(void *)syncs;

-         set_multisync(&ms, sync_info, NULL, 0, (void *)&copy, device, job,
+         set_multisync(&ms, sync_info, NULL, 0, (void *)&copy, queue, job,
                       V3DV_QUEUE_CPU, V3DV_QUEUE_CPU, V3D_CPU, signal_syncs);
         if (!ms.base.id) {
            free(bo_handles);
@ -668,7 +667,7 @@ handle_copy_query_results_cpu_job(struct v3dv_queue *queue,
         copy.syncs = (uintptr_t)(void *)syncs;
         copy.kperfmon_ids = (uintptr_t)(void *)kperfmon_ids;

-         set_multisync(&ms, sync_info, waits, wait_count, (void *)&copy, device, job,
+         set_multisync(&ms, sync_info, waits, wait_count, (void *)&copy, queue, job,
                       V3DV_QUEUE_CPU, V3DV_QUEUE_CPU, V3D_CPU, signal_syncs);
         if (!ms.base.id) {
            free(kperfmon_ids);
@ -796,7 +795,7 @@ handle_timestamp_query_cpu_job(struct v3dv_queue *queue,
    */
   job->serialize = V3DV_BARRIER_ALL;

-   set_multisync(&ms, sync_info, NULL, 0, (void *)&timestamp, device, job,
+   set_multisync(&ms, sync_info, NULL, 0, (void *)&timestamp, queue, job,
 	         V3DV_QUEUE_CPU, V3DV_QUEUE_CPU, V3D_CPU, signal_syncs);
   if (!ms.base.id) {
      free(offsets);
@ -901,7 +900,7 @@ handle_csd_indirect_cpu_job(struct v3dv_queue *queue,
    * CSD job, as the CPU job must obey to the CSD job synchronization
    * demands, such as barriers.
    */
-   set_multisync(&ms, sync_info, NULL, 0, (void *)&indirect, device, csd_job,
+   set_multisync(&ms, sync_info, NULL, 0, (void *)&indirect, queue, csd_job,
 	         V3DV_QUEUE_CPU, V3DV_QUEUE_CSD, V3D_CPU, signal_syncs);
   if (!ms.base.id)
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
@ -924,6 +923,22 @@ handle_csd_indirect_cpu_job(struct v3dv_queue *queue,
   return VK_SUCCESS;
 }

+static inline void
+job_add_device_address_bos(struct v3dv_job *job, struct v3dv_queue *queue)
+{
+   if (!job->uses_buffer_device_address)
+      return;
+
+   struct v3dv_device *device = queue->device;
+
+   mtx_lock(&device->queue_mutex);
+   util_dynarray_foreach(&device->device_address_bo_list,
+                         struct v3dv_bo *, bo) {
+      v3dv_job_add_bo(job, *bo);
+   }
+   mtx_unlock(&device->queue_mutex);
+}
+
 static VkResult
 handle_cl_job(struct v3dv_queue *queue,
              struct v3dv_job *job,
@ -966,12 +981,7 @@ handle_cl_job(struct v3dv_queue *queue,
    * buffers flagged with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT
    * are included.
    */
-   if (job->uses_buffer_device_address) {
-      util_dynarray_foreach(&queue->device->device_address_bo_list,
-                            struct v3dv_bo *, bo) {
-         v3dv_job_add_bo(job, *bo);
-      }
-   }
+   job_add_device_address_bos(job, queue);

   submit.bo_handle_count = job->bo_count;
   uint32_t *bo_handles =
@ -986,8 +996,10 @@ handle_cl_job(struct v3dv_queue *queue,

   submit.perfmon_id = job->perf ?
      job->perf->kperfmon_ids[counter_pass_idx] : 0;
-   const bool needs_perf_sync = queue->last_perfmon_id != submit.perfmon_id;
-   queue->last_perfmon_id = submit.perfmon_id;
+   mtx_lock(&device->queue_mutex);
+   const bool needs_perf_sync = device->last_perfmon_id != submit.perfmon_id;
+   device->last_perfmon_id = submit.perfmon_id;
+   mtx_unlock(&device->queue_mutex);

   /* We need a binning sync if we are the first CL job waiting on a semaphore
    * with a wait stage that involves the geometry pipeline, or if the job
@ -1026,7 +1038,7 @@ handle_cl_job(struct v3dv_queue *queue,
    */
   struct drm_v3d_multi_sync ms = { 0 };
   enum v3d_queue wait_stage = needs_rcl_sync ? V3D_RENDER : V3D_BIN;
-   set_multisync(&ms, sync_info, NULL, 0, NULL, device, job,
+   set_multisync(&ms, sync_info, NULL, 0, NULL, queue, job,
                 V3DV_QUEUE_CL, V3DV_QUEUE_CL, wait_stage, signal_syncs);
   if (!ms.base.id) {
      free(bo_handles);
@ -1078,7 +1090,7 @@ handle_tfu_job(struct v3dv_queue *queue,
    * multiple semaphore extension.
    */
   struct drm_v3d_multi_sync ms = { 0 };
-   set_multisync(&ms, sync_info, NULL, 0, NULL, device, job,
+   set_multisync(&ms, sync_info, NULL, 0, NULL, queue, job,
                 V3DV_QUEUE_TFU, V3DV_QUEUE_TFU, V3D_TFU, signal_syncs);
   if (!ms.base.id)
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
@ -1118,12 +1130,7 @@ handle_csd_job(struct v3dv_queue *queue,
    * buffers flagged with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT
    * are included.
    */
-   if (job->uses_buffer_device_address) {
-      util_dynarray_foreach(&queue->device->device_address_bo_list,
-                            struct v3dv_bo *, bo) {
-         v3dv_job_add_bo(job, *bo);
-      }
-   }
+   job_add_device_address_bos(job, queue);

   submit->bo_handle_count = job->bo_count;
   uint32_t *bo_handles =
@ -1140,7 +1147,7 @@ handle_csd_job(struct v3dv_queue *queue,
    * multiple semaphore extension.
    */
   struct drm_v3d_multi_sync ms = { 0 };
-   set_multisync(&ms, sync_info, NULL, 0, NULL, device, job,
+   set_multisync(&ms, sync_info, NULL, 0, NULL, queue, job,
                 V3DV_QUEUE_CSD, V3DV_QUEUE_CSD, V3D_CSD, signal_syncs);
   if (!ms.base.id)
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
@ -1154,7 +1161,9 @@ handle_csd_job(struct v3dv_queue *queue,

   submit->perfmon_id = job->perf ?
      job->perf->kperfmon_ids[counter_pass_idx] : 0;
-   queue->last_perfmon_id = submit->perfmon_id;
+   mtx_lock(&device->queue_mutex);
+   device->last_perfmon_id = submit->perfmon_id;
+   mtx_unlock(&device->queue_mutex);

   int ret = v3d_ioctl(device->pdevice->render_fd,
                       DRM_IOCTL_V3D_SUBMIT_CSD, submit);
@ -1220,7 +1229,7 @@ queue_handle_job(struct v3dv_queue *queue,
   case V3DV_JOB_TYPE_CPU_RESET_QUERIES:
      return handle_reset_query_cpu_job(queue, job, sync_info, signal_syncs);
   case V3DV_JOB_TYPE_CPU_END_QUERY:
-      return handle_end_query_cpu_job(job, counter_pass_idx);
+      return handle_end_query_cpu_job(queue, job, counter_pass_idx);
   case V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS:
      return handle_copy_query_results_cpu_job(queue, job, sync_info, signal_syncs);
   case V3DV_JOB_TYPE_CPU_CSD_INDIRECT: