mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-26 10:18:12 +02:00
v3dv: Emulate multi-queue support via vk_queue for Android
Android14+ relies on at least 2 queues for vulkan skia/UI rendering. More explained [here][1] [1]: https://gitlab.freedesktop.org/mesa/mesa/-/work_items/11326 Signed-off-by: Roman Stratiienko <r.stratiienko@gmail.com> Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41213>
This commit is contained in:
parent
16526e451e
commit
60fdab22a5
4 changed files with 100 additions and 49 deletions
|
|
@ -1684,13 +1684,13 @@ v3dv_physical_device_device_id(const struct v3dv_physical_device *dev)
|
|||
}
|
||||
}
|
||||
|
||||
/* We support exactly one queue family. */
|
||||
/* We support multiqueue emulation */
|
||||
static const VkQueueFamilyProperties
|
||||
v3dv_queue_family_properties = {
|
||||
.queueFlags = VK_QUEUE_GRAPHICS_BIT |
|
||||
VK_QUEUE_COMPUTE_BIT |
|
||||
VK_QUEUE_TRANSFER_BIT,
|
||||
.queueCount = 1,
|
||||
.queueCount = V3DV_MAX_QUEUES,
|
||||
.timestampValidBits = 64,
|
||||
.minImageTransferGranularity = { 1, 1, 1 },
|
||||
};
|
||||
|
|
@ -1909,14 +1909,16 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
|
||||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
|
||||
|
||||
/* Check requested queues (we only expose one queue ) */
|
||||
assert(pCreateInfo->queueCreateInfoCount == 1);
|
||||
/* Check requested queues */
|
||||
uint32_t total_queues = 0;
|
||||
for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
|
||||
assert(pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex == 0);
|
||||
assert(pCreateInfo->pQueueCreateInfos[i].queueCount == 1);
|
||||
assert(pCreateInfo->pQueueCreateInfos[i].queueCount <= V3DV_MAX_QUEUES);
|
||||
if (pCreateInfo->pQueueCreateInfos[i].flags != 0)
|
||||
return vk_error(instance, VK_ERROR_INITIALIZATION_FAILED);
|
||||
total_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
|
||||
}
|
||||
assert(total_queues <= V3DV_MAX_QUEUES);
|
||||
|
||||
device = vk_zalloc2(&physical_device->vk.instance->alloc, pAllocator,
|
||||
sizeof(*device), 8,
|
||||
|
|
@ -1939,6 +1941,7 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
device->instance = instance;
|
||||
device->pdevice = physical_device;
|
||||
|
||||
mtx_init(&device->queue_mutex, mtx_plain);
|
||||
mtx_init(&device->query_mutex, mtx_plain);
|
||||
cnd_init(&device->query_ended);
|
||||
|
||||
|
|
@ -1948,10 +1951,25 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
vk_device_set_drm_fd(&device->vk, physical_device->render_fd);
|
||||
vk_device_enable_threaded_submit(&device->vk);
|
||||
|
||||
result = queue_init(device, &device->queue,
|
||||
pCreateInfo->pQueueCreateInfos, 0);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
device->queues = vk_zalloc2(&device->vk.alloc, pAllocator,
|
||||
sizeof(*device->queues) * total_queues, 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
if (!device->queues) {
|
||||
result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
goto fail_queues_alloc;
|
||||
}
|
||||
|
||||
device->queue_count = 0;
|
||||
for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
|
||||
for (uint32_t j = 0; j < pCreateInfo->pQueueCreateInfos[i].queueCount; j++) {
|
||||
result = queue_init(device, &device->queues[device->queue_count],
|
||||
&pCreateInfo->pQueueCreateInfos[i], j);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
device->queue_count++;
|
||||
}
|
||||
}
|
||||
|
||||
device->devinfo = physical_device->devinfo;
|
||||
|
||||
|
|
@ -2000,9 +2018,13 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
return VK_SUCCESS;
|
||||
|
||||
fail:
|
||||
for (uint32_t i = 0; i < device->queue_count; i++)
|
||||
queue_finish(&device->queues[i]);
|
||||
vk_free2(&device->vk.alloc, pAllocator, device->queues);
|
||||
fail_queues_alloc:
|
||||
cnd_destroy(&device->query_ended);
|
||||
mtx_destroy(&device->query_mutex);
|
||||
queue_finish(&device->queue);
|
||||
mtx_destroy(&device->queue_mutex);
|
||||
if (device->noop_job)
|
||||
v3dv_job_destroy(device->noop_job);
|
||||
destroy_device_meta(device);
|
||||
|
|
@ -2022,7 +2044,9 @@ v3dv_DestroyDevice(VkDevice _device,
|
|||
V3DV_FROM_HANDLE(v3dv_device, device, _device);
|
||||
|
||||
device->vk.dispatch_table.DeviceWaitIdle(_device);
|
||||
queue_finish(&device->queue);
|
||||
for (uint32_t i = 0; i < device->queue_count; i++)
|
||||
queue_finish(&device->queues[i]);
|
||||
vk_free2(&device->vk.alloc, pAllocator, device->queues);
|
||||
|
||||
if (device->noop_job)
|
||||
v3dv_job_destroy(device->noop_job);
|
||||
|
|
@ -2049,6 +2073,7 @@ v3dv_DestroyDevice(VkDevice _device,
|
|||
|
||||
cnd_destroy(&device->query_ended);
|
||||
mtx_destroy(&device->query_mutex);
|
||||
mtx_destroy(&device->queue_mutex);
|
||||
|
||||
vk_device_finish(&device->vk);
|
||||
vk_free2(&device->vk.alloc, pAllocator, device);
|
||||
|
|
@ -2258,8 +2283,11 @@ free_memory(struct v3dv_device *device,
|
|||
if (mem->bo->map)
|
||||
device_unmap(device, mem);
|
||||
|
||||
if (mem->is_for_device_address)
|
||||
if (mem->is_for_device_address) {
|
||||
mtx_lock(&device->queue_mutex);
|
||||
device_remove_device_address_bo(device, mem->bo);
|
||||
mtx_unlock(&device->queue_mutex);
|
||||
}
|
||||
|
||||
device_free(device, mem);
|
||||
|
||||
|
|
|
|||
|
|
@ -204,11 +204,6 @@ struct v3dv_queue {
|
|||
struct v3dv_device *device;
|
||||
|
||||
struct v3dv_last_job_sync last_job_syncs;
|
||||
|
||||
/* The last active perfmon ID to prevent mixing of counter results when a
|
||||
* job is submitted with a different perfmon id.
|
||||
*/
|
||||
uint32_t last_perfmon_id;
|
||||
};
|
||||
|
||||
VkResult v3dv_queue_driver_submit(struct vk_queue *vk_queue,
|
||||
|
|
@ -253,10 +248,21 @@ struct v3dv_device {
|
|||
struct v3dv_physical_device *pdevice;
|
||||
|
||||
struct v3d_device_info devinfo;
|
||||
struct v3dv_queue queue;
|
||||
struct v3dv_queue *queues;
|
||||
uint32_t queue_count;
|
||||
|
||||
/* In cases where we instantiate more than one queue (Android), this protects
|
||||
* against concurrent access from multiple queues.
|
||||
*/
|
||||
mtx_t queue_mutex;
|
||||
|
||||
struct v3dv_job *noop_job;
|
||||
|
||||
/* The last active perfmon ID to prevent mixing of counter results when a
|
||||
* job is submitted with a different perfmon id.
|
||||
*/
|
||||
uint32_t last_perfmon_id;
|
||||
|
||||
/* Guards query->maybe_available and value for timestamps */
|
||||
mtx_t query_mutex;
|
||||
|
||||
|
|
|
|||
|
|
@ -24,6 +24,14 @@
|
|||
#define V3DV_LIMITS_H
|
||||
|
||||
#include "drm-uapi/v3d_drm.h"
|
||||
#include "util/detect_os.h"
|
||||
|
||||
#if DETECT_OS_ANDROID
|
||||
#define V3DV_MAX_QUEUES 4
|
||||
#else
|
||||
#define V3DV_MAX_QUEUES 1
|
||||
#endif
|
||||
|
||||
|
||||
/* From vulkan spec "If the multiple viewports feature is not enabled,
|
||||
* scissorCount must be 1", ditto for viewportCount. For now we don't support
|
||||
|
|
|
|||
|
|
@ -258,14 +258,14 @@ set_multisync(struct drm_v3d_multi_sync *ms,
|
|||
struct vk_sync_wait *waits,
|
||||
unsigned wait_count,
|
||||
struct drm_v3d_extension *next,
|
||||
struct v3dv_device *device,
|
||||
struct v3dv_queue *queue,
|
||||
struct v3dv_job *job,
|
||||
enum v3dv_queue_type in_queue_sync,
|
||||
enum v3dv_queue_type out_queue_sync,
|
||||
enum v3d_queue wait_stage,
|
||||
bool signal_syncs)
|
||||
{
|
||||
struct v3dv_queue *queue = &device->queue;
|
||||
struct v3dv_device *device = queue->device;
|
||||
uint32_t out_sync_count = 0, in_sync_count = 0;
|
||||
struct drm_v3d_sem *out_syncs = NULL, *in_syncs = NULL;
|
||||
|
||||
|
|
@ -339,7 +339,7 @@ handle_reset_query_cpu_job(struct v3dv_queue *queue,
|
|||
|
||||
reset.syncs = (uintptr_t)(void *)syncs;
|
||||
|
||||
set_multisync(&ms, sync_info, NULL, 0, (void *)&reset, device, job,
|
||||
set_multisync(&ms, sync_info, NULL, 0, (void *)&reset, queue, job,
|
||||
V3DV_QUEUE_CPU, V3DV_QUEUE_CPU, V3D_CPU, signal_syncs);
|
||||
if (!ms.base.id) {
|
||||
free(syncs);
|
||||
|
|
@ -381,7 +381,7 @@ handle_reset_query_cpu_job(struct v3dv_queue *queue,
|
|||
reset.syncs = (uintptr_t)(void *)syncs;
|
||||
reset.kperfmon_ids = (uintptr_t)(void *)kperfmon_ids;
|
||||
|
||||
set_multisync(&ms, sync_info, waits, wait_count, (void *)&reset, device, job,
|
||||
set_multisync(&ms, sync_info, waits, wait_count, (void *)&reset, queue, job,
|
||||
V3DV_QUEUE_CPU, V3DV_QUEUE_CPU, V3D_CPU, signal_syncs);
|
||||
if (!ms.base.id) {
|
||||
free(syncs);
|
||||
|
|
@ -481,7 +481,7 @@ export_perfmon_last_job_sync(struct v3dv_queue *queue, struct v3dv_job *job, int
|
|||
|
||||
if (err) {
|
||||
close(*fd);
|
||||
return vk_errorf(&job->device->queue, VK_ERROR_UNKNOWN,
|
||||
return vk_errorf(queue, VK_ERROR_UNKNOWN,
|
||||
"sync file export failed: %m");
|
||||
}
|
||||
|
||||
|
|
@ -490,7 +490,7 @@ export_perfmon_last_job_sync(struct v3dv_queue *queue, struct v3dv_job *job, int
|
|||
if (err) {
|
||||
close(tmp_fd);
|
||||
close(*fd);
|
||||
return vk_errorf(&job->device->queue, VK_ERROR_UNKNOWN,
|
||||
return vk_errorf(queue, VK_ERROR_UNKNOWN,
|
||||
"failed to accumulate sync files: %m");
|
||||
}
|
||||
}
|
||||
|
|
@ -499,7 +499,7 @@ export_perfmon_last_job_sync(struct v3dv_queue *queue, struct v3dv_job *job, int
|
|||
}
|
||||
|
||||
static VkResult
|
||||
handle_end_query_cpu_job(struct v3dv_job *job, uint32_t counter_pass_idx)
|
||||
handle_end_query_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job, uint32_t counter_pass_idx)
|
||||
{
|
||||
MESA_TRACE_FUNC();
|
||||
VkResult result = VK_SUCCESS;
|
||||
|
|
@ -507,7 +507,6 @@ handle_end_query_cpu_job(struct v3dv_job *job, uint32_t counter_pass_idx)
|
|||
mtx_lock(&job->device->query_mutex);
|
||||
|
||||
struct v3dv_end_query_info *info = &job->cpu.query_end;
|
||||
struct v3dv_queue *queue = &job->device->queue;
|
||||
|
||||
int err = 0;
|
||||
int fd = -1;
|
||||
|
|
@ -611,7 +610,7 @@ handle_copy_query_results_cpu_job(struct v3dv_queue *queue,
|
|||
copy.offsets = (uintptr_t)(void *)offsets;
|
||||
copy.syncs = (uintptr_t)(void *)syncs;
|
||||
|
||||
set_multisync(&ms, sync_info, NULL, 0, (void *)©, device, job,
|
||||
set_multisync(&ms, sync_info, NULL, 0, (void *)©, queue, job,
|
||||
V3DV_QUEUE_CPU, V3DV_QUEUE_CPU, V3D_CPU, signal_syncs);
|
||||
if (!ms.base.id) {
|
||||
free(bo_handles);
|
||||
|
|
@ -668,7 +667,7 @@ handle_copy_query_results_cpu_job(struct v3dv_queue *queue,
|
|||
copy.syncs = (uintptr_t)(void *)syncs;
|
||||
copy.kperfmon_ids = (uintptr_t)(void *)kperfmon_ids;
|
||||
|
||||
set_multisync(&ms, sync_info, waits, wait_count, (void *)©, device, job,
|
||||
set_multisync(&ms, sync_info, waits, wait_count, (void *)©, queue, job,
|
||||
V3DV_QUEUE_CPU, V3DV_QUEUE_CPU, V3D_CPU, signal_syncs);
|
||||
if (!ms.base.id) {
|
||||
free(kperfmon_ids);
|
||||
|
|
@ -796,7 +795,7 @@ handle_timestamp_query_cpu_job(struct v3dv_queue *queue,
|
|||
*/
|
||||
job->serialize = V3DV_BARRIER_ALL;
|
||||
|
||||
set_multisync(&ms, sync_info, NULL, 0, (void *)×tamp, device, job,
|
||||
set_multisync(&ms, sync_info, NULL, 0, (void *)×tamp, queue, job,
|
||||
V3DV_QUEUE_CPU, V3DV_QUEUE_CPU, V3D_CPU, signal_syncs);
|
||||
if (!ms.base.id) {
|
||||
free(offsets);
|
||||
|
|
@ -901,7 +900,7 @@ handle_csd_indirect_cpu_job(struct v3dv_queue *queue,
|
|||
* CSD job, as the CPU job must obey to the CSD job synchronization
|
||||
* demands, such as barriers.
|
||||
*/
|
||||
set_multisync(&ms, sync_info, NULL, 0, (void *)&indirect, device, csd_job,
|
||||
set_multisync(&ms, sync_info, NULL, 0, (void *)&indirect, queue, csd_job,
|
||||
V3DV_QUEUE_CPU, V3DV_QUEUE_CSD, V3D_CPU, signal_syncs);
|
||||
if (!ms.base.id)
|
||||
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
|
@ -924,6 +923,22 @@ handle_csd_indirect_cpu_job(struct v3dv_queue *queue,
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static inline void
|
||||
job_add_device_address_bos(struct v3dv_job *job, struct v3dv_queue *queue)
|
||||
{
|
||||
if (!job->uses_buffer_device_address)
|
||||
return;
|
||||
|
||||
struct v3dv_device *device = queue->device;
|
||||
|
||||
mtx_lock(&device->queue_mutex);
|
||||
util_dynarray_foreach(&device->device_address_bo_list,
|
||||
struct v3dv_bo *, bo) {
|
||||
v3dv_job_add_bo(job, *bo);
|
||||
}
|
||||
mtx_unlock(&device->queue_mutex);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
handle_cl_job(struct v3dv_queue *queue,
|
||||
struct v3dv_job *job,
|
||||
|
|
@ -966,12 +981,7 @@ handle_cl_job(struct v3dv_queue *queue,
|
|||
* buffers flagged with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT
|
||||
* are included.
|
||||
*/
|
||||
if (job->uses_buffer_device_address) {
|
||||
util_dynarray_foreach(&queue->device->device_address_bo_list,
|
||||
struct v3dv_bo *, bo) {
|
||||
v3dv_job_add_bo(job, *bo);
|
||||
}
|
||||
}
|
||||
job_add_device_address_bos(job, queue);
|
||||
|
||||
submit.bo_handle_count = job->bo_count;
|
||||
uint32_t *bo_handles =
|
||||
|
|
@ -986,8 +996,10 @@ handle_cl_job(struct v3dv_queue *queue,
|
|||
|
||||
submit.perfmon_id = job->perf ?
|
||||
job->perf->kperfmon_ids[counter_pass_idx] : 0;
|
||||
const bool needs_perf_sync = queue->last_perfmon_id != submit.perfmon_id;
|
||||
queue->last_perfmon_id = submit.perfmon_id;
|
||||
mtx_lock(&device->queue_mutex);
|
||||
const bool needs_perf_sync = device->last_perfmon_id != submit.perfmon_id;
|
||||
device->last_perfmon_id = submit.perfmon_id;
|
||||
mtx_unlock(&device->queue_mutex);
|
||||
|
||||
/* We need a binning sync if we are the first CL job waiting on a semaphore
|
||||
* with a wait stage that involves the geometry pipeline, or if the job
|
||||
|
|
@ -1026,7 +1038,7 @@ handle_cl_job(struct v3dv_queue *queue,
|
|||
*/
|
||||
struct drm_v3d_multi_sync ms = { 0 };
|
||||
enum v3d_queue wait_stage = needs_rcl_sync ? V3D_RENDER : V3D_BIN;
|
||||
set_multisync(&ms, sync_info, NULL, 0, NULL, device, job,
|
||||
set_multisync(&ms, sync_info, NULL, 0, NULL, queue, job,
|
||||
V3DV_QUEUE_CL, V3DV_QUEUE_CL, wait_stage, signal_syncs);
|
||||
if (!ms.base.id) {
|
||||
free(bo_handles);
|
||||
|
|
@ -1078,7 +1090,7 @@ handle_tfu_job(struct v3dv_queue *queue,
|
|||
* multiple semaphore extension.
|
||||
*/
|
||||
struct drm_v3d_multi_sync ms = { 0 };
|
||||
set_multisync(&ms, sync_info, NULL, 0, NULL, device, job,
|
||||
set_multisync(&ms, sync_info, NULL, 0, NULL, queue, job,
|
||||
V3DV_QUEUE_TFU, V3DV_QUEUE_TFU, V3D_TFU, signal_syncs);
|
||||
if (!ms.base.id)
|
||||
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
|
@ -1118,12 +1130,7 @@ handle_csd_job(struct v3dv_queue *queue,
|
|||
* buffers flagged with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT
|
||||
* are included.
|
||||
*/
|
||||
if (job->uses_buffer_device_address) {
|
||||
util_dynarray_foreach(&queue->device->device_address_bo_list,
|
||||
struct v3dv_bo *, bo) {
|
||||
v3dv_job_add_bo(job, *bo);
|
||||
}
|
||||
}
|
||||
job_add_device_address_bos(job, queue);
|
||||
|
||||
submit->bo_handle_count = job->bo_count;
|
||||
uint32_t *bo_handles =
|
||||
|
|
@ -1140,7 +1147,7 @@ handle_csd_job(struct v3dv_queue *queue,
|
|||
* multiple semaphore extension.
|
||||
*/
|
||||
struct drm_v3d_multi_sync ms = { 0 };
|
||||
set_multisync(&ms, sync_info, NULL, 0, NULL, device, job,
|
||||
set_multisync(&ms, sync_info, NULL, 0, NULL, queue, job,
|
||||
V3DV_QUEUE_CSD, V3DV_QUEUE_CSD, V3D_CSD, signal_syncs);
|
||||
if (!ms.base.id)
|
||||
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
|
@ -1154,7 +1161,9 @@ handle_csd_job(struct v3dv_queue *queue,
|
|||
|
||||
submit->perfmon_id = job->perf ?
|
||||
job->perf->kperfmon_ids[counter_pass_idx] : 0;
|
||||
queue->last_perfmon_id = submit->perfmon_id;
|
||||
mtx_lock(&device->queue_mutex);
|
||||
device->last_perfmon_id = submit->perfmon_id;
|
||||
mtx_unlock(&device->queue_mutex);
|
||||
|
||||
int ret = v3d_ioctl(device->pdevice->render_fd,
|
||||
DRM_IOCTL_V3D_SUBMIT_CSD, submit);
|
||||
|
|
@ -1220,7 +1229,7 @@ queue_handle_job(struct v3dv_queue *queue,
|
|||
case V3DV_JOB_TYPE_CPU_RESET_QUERIES:
|
||||
return handle_reset_query_cpu_job(queue, job, sync_info, signal_syncs);
|
||||
case V3DV_JOB_TYPE_CPU_END_QUERY:
|
||||
return handle_end_query_cpu_job(job, counter_pass_idx);
|
||||
return handle_end_query_cpu_job(queue, job, counter_pass_idx);
|
||||
case V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS:
|
||||
return handle_copy_query_results_cpu_job(queue, job, sync_info, signal_syncs);
|
||||
case V3DV_JOB_TYPE_CPU_CSD_INDIRECT:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue