v3dv: Emulate multi-queue support via vk_queue for Android
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Android14+ relies on at least 2 queues for vulkan skia/UI rendering.
More explained [here][1]

[1]: https://gitlab.freedesktop.org/mesa/mesa/-/work_items/11326

Signed-off-by: Roman Stratiienko <r.stratiienko@gmail.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41213>
This commit is contained in:
Roman Stratiienko 2026-04-29 19:02:11 +03:00 committed by Marge Bot
parent 16526e451e
commit 60fdab22a5
4 changed files with 100 additions and 49 deletions

View file

@ -1684,13 +1684,13 @@ v3dv_physical_device_device_id(const struct v3dv_physical_device *dev)
}
}
/* We support exactly one queue family. */
/* We support multiqueue emulation */
static const VkQueueFamilyProperties
v3dv_queue_family_properties = {
.queueFlags = VK_QUEUE_GRAPHICS_BIT |
VK_QUEUE_COMPUTE_BIT |
VK_QUEUE_TRANSFER_BIT,
.queueCount = 1,
.queueCount = V3DV_MAX_QUEUES,
.timestampValidBits = 64,
.minImageTransferGranularity = { 1, 1, 1 },
};
@ -1909,14 +1909,16 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
/* Check requested queues (we only expose one queue ) */
assert(pCreateInfo->queueCreateInfoCount == 1);
/* Check requested queues */
uint32_t total_queues = 0;
for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
assert(pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex == 0);
assert(pCreateInfo->pQueueCreateInfos[i].queueCount == 1);
assert(pCreateInfo->pQueueCreateInfos[i].queueCount <= V3DV_MAX_QUEUES);
if (pCreateInfo->pQueueCreateInfos[i].flags != 0)
return vk_error(instance, VK_ERROR_INITIALIZATION_FAILED);
total_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
}
assert(total_queues <= V3DV_MAX_QUEUES);
device = vk_zalloc2(&physical_device->vk.instance->alloc, pAllocator,
sizeof(*device), 8,
@ -1939,6 +1941,7 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
device->instance = instance;
device->pdevice = physical_device;
mtx_init(&device->queue_mutex, mtx_plain);
mtx_init(&device->query_mutex, mtx_plain);
cnd_init(&device->query_ended);
@ -1948,10 +1951,25 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
vk_device_set_drm_fd(&device->vk, physical_device->render_fd);
vk_device_enable_threaded_submit(&device->vk);
result = queue_init(device, &device->queue,
pCreateInfo->pQueueCreateInfos, 0);
if (result != VK_SUCCESS)
goto fail;
device->queues = vk_zalloc2(&device->vk.alloc, pAllocator,
sizeof(*device->queues) * total_queues, 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (!device->queues) {
result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail_queues_alloc;
}
device->queue_count = 0;
for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
for (uint32_t j = 0; j < pCreateInfo->pQueueCreateInfos[i].queueCount; j++) {
result = queue_init(device, &device->queues[device->queue_count],
&pCreateInfo->pQueueCreateInfos[i], j);
if (result != VK_SUCCESS)
goto fail;
device->queue_count++;
}
}
device->devinfo = physical_device->devinfo;
@ -2000,9 +2018,13 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
return VK_SUCCESS;
fail:
for (uint32_t i = 0; i < device->queue_count; i++)
queue_finish(&device->queues[i]);
vk_free2(&device->vk.alloc, pAllocator, device->queues);
fail_queues_alloc:
cnd_destroy(&device->query_ended);
mtx_destroy(&device->query_mutex);
queue_finish(&device->queue);
mtx_destroy(&device->queue_mutex);
if (device->noop_job)
v3dv_job_destroy(device->noop_job);
destroy_device_meta(device);
@ -2022,7 +2044,9 @@ v3dv_DestroyDevice(VkDevice _device,
V3DV_FROM_HANDLE(v3dv_device, device, _device);
device->vk.dispatch_table.DeviceWaitIdle(_device);
queue_finish(&device->queue);
for (uint32_t i = 0; i < device->queue_count; i++)
queue_finish(&device->queues[i]);
vk_free2(&device->vk.alloc, pAllocator, device->queues);
if (device->noop_job)
v3dv_job_destroy(device->noop_job);
@ -2049,6 +2073,7 @@ v3dv_DestroyDevice(VkDevice _device,
cnd_destroy(&device->query_ended);
mtx_destroy(&device->query_mutex);
mtx_destroy(&device->queue_mutex);
vk_device_finish(&device->vk);
vk_free2(&device->vk.alloc, pAllocator, device);
@ -2258,8 +2283,11 @@ free_memory(struct v3dv_device *device,
if (mem->bo->map)
device_unmap(device, mem);
if (mem->is_for_device_address)
if (mem->is_for_device_address) {
mtx_lock(&device->queue_mutex);
device_remove_device_address_bo(device, mem->bo);
mtx_unlock(&device->queue_mutex);
}
device_free(device, mem);

View file

@ -204,11 +204,6 @@ struct v3dv_queue {
struct v3dv_device *device;
struct v3dv_last_job_sync last_job_syncs;
/* The last active perfmon ID to prevent mixing of counter results when a
* job is submitted with a different perfmon id.
*/
uint32_t last_perfmon_id;
};
VkResult v3dv_queue_driver_submit(struct vk_queue *vk_queue,
@ -253,10 +248,21 @@ struct v3dv_device {
struct v3dv_physical_device *pdevice;
struct v3d_device_info devinfo;
struct v3dv_queue queue;
struct v3dv_queue *queues;
uint32_t queue_count;
/* In cases where we instantiate more than one queue (Android), this protects
* against concurrent access from multiple queues.
*/
mtx_t queue_mutex;
struct v3dv_job *noop_job;
/* The last active perfmon ID to prevent mixing of counter results when a
* job is submitted with a different perfmon id.
*/
uint32_t last_perfmon_id;
/* Guards query->maybe_available and value for timestamps */
mtx_t query_mutex;

View file

@ -24,6 +24,14 @@
#define V3DV_LIMITS_H
#include "drm-uapi/v3d_drm.h"
#include "util/detect_os.h"
#if DETECT_OS_ANDROID
#define V3DV_MAX_QUEUES 4
#else
#define V3DV_MAX_QUEUES 1
#endif
/* From vulkan spec "If the multiple viewports feature is not enabled,
* scissorCount must be 1", ditto for viewportCount. For now we don't support

View file

@ -258,14 +258,14 @@ set_multisync(struct drm_v3d_multi_sync *ms,
struct vk_sync_wait *waits,
unsigned wait_count,
struct drm_v3d_extension *next,
struct v3dv_device *device,
struct v3dv_queue *queue,
struct v3dv_job *job,
enum v3dv_queue_type in_queue_sync,
enum v3dv_queue_type out_queue_sync,
enum v3d_queue wait_stage,
bool signal_syncs)
{
struct v3dv_queue *queue = &device->queue;
struct v3dv_device *device = queue->device;
uint32_t out_sync_count = 0, in_sync_count = 0;
struct drm_v3d_sem *out_syncs = NULL, *in_syncs = NULL;
@ -339,7 +339,7 @@ handle_reset_query_cpu_job(struct v3dv_queue *queue,
reset.syncs = (uintptr_t)(void *)syncs;
set_multisync(&ms, sync_info, NULL, 0, (void *)&reset, device, job,
set_multisync(&ms, sync_info, NULL, 0, (void *)&reset, queue, job,
V3DV_QUEUE_CPU, V3DV_QUEUE_CPU, V3D_CPU, signal_syncs);
if (!ms.base.id) {
free(syncs);
@ -381,7 +381,7 @@ handle_reset_query_cpu_job(struct v3dv_queue *queue,
reset.syncs = (uintptr_t)(void *)syncs;
reset.kperfmon_ids = (uintptr_t)(void *)kperfmon_ids;
set_multisync(&ms, sync_info, waits, wait_count, (void *)&reset, device, job,
set_multisync(&ms, sync_info, waits, wait_count, (void *)&reset, queue, job,
V3DV_QUEUE_CPU, V3DV_QUEUE_CPU, V3D_CPU, signal_syncs);
if (!ms.base.id) {
free(syncs);
@ -481,7 +481,7 @@ export_perfmon_last_job_sync(struct v3dv_queue *queue, struct v3dv_job *job, int
if (err) {
close(*fd);
return vk_errorf(&job->device->queue, VK_ERROR_UNKNOWN,
return vk_errorf(queue, VK_ERROR_UNKNOWN,
"sync file export failed: %m");
}
@ -490,7 +490,7 @@ export_perfmon_last_job_sync(struct v3dv_queue *queue, struct v3dv_job *job, int
if (err) {
close(tmp_fd);
close(*fd);
return vk_errorf(&job->device->queue, VK_ERROR_UNKNOWN,
return vk_errorf(queue, VK_ERROR_UNKNOWN,
"failed to accumulate sync files: %m");
}
}
@ -499,7 +499,7 @@ export_perfmon_last_job_sync(struct v3dv_queue *queue, struct v3dv_job *job, int
}
static VkResult
handle_end_query_cpu_job(struct v3dv_job *job, uint32_t counter_pass_idx)
handle_end_query_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job, uint32_t counter_pass_idx)
{
MESA_TRACE_FUNC();
VkResult result = VK_SUCCESS;
@ -507,7 +507,6 @@ handle_end_query_cpu_job(struct v3dv_job *job, uint32_t counter_pass_idx)
mtx_lock(&job->device->query_mutex);
struct v3dv_end_query_info *info = &job->cpu.query_end;
struct v3dv_queue *queue = &job->device->queue;
int err = 0;
int fd = -1;
@ -611,7 +610,7 @@ handle_copy_query_results_cpu_job(struct v3dv_queue *queue,
copy.offsets = (uintptr_t)(void *)offsets;
copy.syncs = (uintptr_t)(void *)syncs;
set_multisync(&ms, sync_info, NULL, 0, (void *)&copy, device, job,
set_multisync(&ms, sync_info, NULL, 0, (void *)&copy, queue, job,
V3DV_QUEUE_CPU, V3DV_QUEUE_CPU, V3D_CPU, signal_syncs);
if (!ms.base.id) {
free(bo_handles);
@ -668,7 +667,7 @@ handle_copy_query_results_cpu_job(struct v3dv_queue *queue,
copy.syncs = (uintptr_t)(void *)syncs;
copy.kperfmon_ids = (uintptr_t)(void *)kperfmon_ids;
set_multisync(&ms, sync_info, waits, wait_count, (void *)&copy, device, job,
set_multisync(&ms, sync_info, waits, wait_count, (void *)&copy, queue, job,
V3DV_QUEUE_CPU, V3DV_QUEUE_CPU, V3D_CPU, signal_syncs);
if (!ms.base.id) {
free(kperfmon_ids);
@ -796,7 +795,7 @@ handle_timestamp_query_cpu_job(struct v3dv_queue *queue,
*/
job->serialize = V3DV_BARRIER_ALL;
set_multisync(&ms, sync_info, NULL, 0, (void *)&timestamp, device, job,
set_multisync(&ms, sync_info, NULL, 0, (void *)&timestamp, queue, job,
V3DV_QUEUE_CPU, V3DV_QUEUE_CPU, V3D_CPU, signal_syncs);
if (!ms.base.id) {
free(offsets);
@ -901,7 +900,7 @@ handle_csd_indirect_cpu_job(struct v3dv_queue *queue,
* CSD job, as the CPU job must obey to the CSD job synchronization
* demands, such as barriers.
*/
set_multisync(&ms, sync_info, NULL, 0, (void *)&indirect, device, csd_job,
set_multisync(&ms, sync_info, NULL, 0, (void *)&indirect, queue, csd_job,
V3DV_QUEUE_CPU, V3DV_QUEUE_CSD, V3D_CPU, signal_syncs);
if (!ms.base.id)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
@ -924,6 +923,22 @@ handle_csd_indirect_cpu_job(struct v3dv_queue *queue,
return VK_SUCCESS;
}
static inline void
job_add_device_address_bos(struct v3dv_job *job, struct v3dv_queue *queue)
{
if (!job->uses_buffer_device_address)
return;
struct v3dv_device *device = queue->device;
mtx_lock(&device->queue_mutex);
util_dynarray_foreach(&device->device_address_bo_list,
struct v3dv_bo *, bo) {
v3dv_job_add_bo(job, *bo);
}
mtx_unlock(&device->queue_mutex);
}
static VkResult
handle_cl_job(struct v3dv_queue *queue,
struct v3dv_job *job,
@ -966,12 +981,7 @@ handle_cl_job(struct v3dv_queue *queue,
* buffers flagged with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT
* are included.
*/
if (job->uses_buffer_device_address) {
util_dynarray_foreach(&queue->device->device_address_bo_list,
struct v3dv_bo *, bo) {
v3dv_job_add_bo(job, *bo);
}
}
job_add_device_address_bos(job, queue);
submit.bo_handle_count = job->bo_count;
uint32_t *bo_handles =
@ -986,8 +996,10 @@ handle_cl_job(struct v3dv_queue *queue,
submit.perfmon_id = job->perf ?
job->perf->kperfmon_ids[counter_pass_idx] : 0;
const bool needs_perf_sync = queue->last_perfmon_id != submit.perfmon_id;
queue->last_perfmon_id = submit.perfmon_id;
mtx_lock(&device->queue_mutex);
const bool needs_perf_sync = device->last_perfmon_id != submit.perfmon_id;
device->last_perfmon_id = submit.perfmon_id;
mtx_unlock(&device->queue_mutex);
/* We need a binning sync if we are the first CL job waiting on a semaphore
* with a wait stage that involves the geometry pipeline, or if the job
@ -1026,7 +1038,7 @@ handle_cl_job(struct v3dv_queue *queue,
*/
struct drm_v3d_multi_sync ms = { 0 };
enum v3d_queue wait_stage = needs_rcl_sync ? V3D_RENDER : V3D_BIN;
set_multisync(&ms, sync_info, NULL, 0, NULL, device, job,
set_multisync(&ms, sync_info, NULL, 0, NULL, queue, job,
V3DV_QUEUE_CL, V3DV_QUEUE_CL, wait_stage, signal_syncs);
if (!ms.base.id) {
free(bo_handles);
@ -1078,7 +1090,7 @@ handle_tfu_job(struct v3dv_queue *queue,
* multiple semaphore extension.
*/
struct drm_v3d_multi_sync ms = { 0 };
set_multisync(&ms, sync_info, NULL, 0, NULL, device, job,
set_multisync(&ms, sync_info, NULL, 0, NULL, queue, job,
V3DV_QUEUE_TFU, V3DV_QUEUE_TFU, V3D_TFU, signal_syncs);
if (!ms.base.id)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
@ -1118,12 +1130,7 @@ handle_csd_job(struct v3dv_queue *queue,
* buffers flagged with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT
* are included.
*/
if (job->uses_buffer_device_address) {
util_dynarray_foreach(&queue->device->device_address_bo_list,
struct v3dv_bo *, bo) {
v3dv_job_add_bo(job, *bo);
}
}
job_add_device_address_bos(job, queue);
submit->bo_handle_count = job->bo_count;
uint32_t *bo_handles =
@ -1140,7 +1147,7 @@ handle_csd_job(struct v3dv_queue *queue,
* multiple semaphore extension.
*/
struct drm_v3d_multi_sync ms = { 0 };
set_multisync(&ms, sync_info, NULL, 0, NULL, device, job,
set_multisync(&ms, sync_info, NULL, 0, NULL, queue, job,
V3DV_QUEUE_CSD, V3DV_QUEUE_CSD, V3D_CSD, signal_syncs);
if (!ms.base.id)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
@ -1154,7 +1161,9 @@ handle_csd_job(struct v3dv_queue *queue,
submit->perfmon_id = job->perf ?
job->perf->kperfmon_ids[counter_pass_idx] : 0;
queue->last_perfmon_id = submit->perfmon_id;
mtx_lock(&device->queue_mutex);
device->last_perfmon_id = submit->perfmon_id;
mtx_unlock(&device->queue_mutex);
int ret = v3d_ioctl(device->pdevice->render_fd,
DRM_IOCTL_V3D_SUBMIT_CSD, submit);
@ -1220,7 +1229,7 @@ queue_handle_job(struct v3dv_queue *queue,
case V3DV_JOB_TYPE_CPU_RESET_QUERIES:
return handle_reset_query_cpu_job(queue, job, sync_info, signal_syncs);
case V3DV_JOB_TYPE_CPU_END_QUERY:
return handle_end_query_cpu_job(job, counter_pass_idx);
return handle_end_query_cpu_job(queue, job, counter_pass_idx);
case V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS:
return handle_copy_query_results_cpu_job(queue, job, sync_info, signal_syncs);
case V3DV_JOB_TYPE_CPU_CSD_INDIRECT: