venus: allow renderer submissions on per-context virtgpu rings

For submissions to renderers that support multiple timelines, put
them on the virtgpu fencing timeline (dma fence context) specified by
the VkQueue's bound ring_idx. CPU-sync'd renderer submissions
can be sent in the same manner by using ring_idx = 0.

Signed-off-by: Ryan Neph <ryanneph@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19691>
This commit is contained in:
Ryan Neph 2022-08-09 17:12:07 -07:00 committed by Marge Bot
parent 9bba5032c2
commit 3beb4f055e
5 changed files with 20 additions and 17 deletions

View file

@ -86,20 +86,18 @@ struct vn_renderer_submit_batch {
size_t cs_size;
/*
* Submit cs to the virtual sync queue identified by sync_queue_index. The
* virtual queue is assumed to be associated with the physical VkQueue
* identified by vk_queue_id. After the execution completes on the
* VkQueue, the virtual sync queue is signaled.
* Submit cs to the timeline identified by ring_idx. A timeline is
* typically associated with a physical VkQueue and bound to the ring_idx
* during VkQueue creation. After execution completes on the VkQueue, the
* timeline sync point is signaled.
*
* sync_queue_index must be less than max_sync_queue_count.
*
* vk_queue_id specifies the object id of a VkQueue.
*
* When sync_queue_cpu is true, it specifies the special CPU sync queue,
* and sync_queue_index/vk_queue_id are ignored. TODO revisit this later
* ring_idx 0 is reserved for the context-specific CPU timeline. sync
* points on the CPU timeline are signaled immediately after command
* processing by the renderer.
*/
uint32_t sync_queue_index;
bool sync_queue_cpu;
uint32_t ring_idx;
// TODO remove once vtest supports multiple timelines
vn_object_id vk_queue_id;
/* syncs to update when the virtual sync queue is signaled */

View file

@ -21,7 +21,7 @@ vn_renderer_submit_simple_sync(struct vn_renderer *renderer,
&(const struct vn_renderer_submit_batch){
.cs_data = cs_data,
.cs_size = cs_size,
.sync_queue_cpu = true,
.ring_idx = 0, /* CPU ring */
.syncs = &sync,
.sync_values = &(const uint64_t){ 1 },
.sync_count = 1,

View file

@ -27,6 +27,7 @@ vn_renderer_submit_simple(struct vn_renderer *renderer,
&(const struct vn_renderer_submit_batch){
.cs_data = cs_data,
.cs_size = cs_size,
.ring_idx = 0, /* CPU ring */
},
.batch_count = 1,
};

View file

@ -509,6 +509,8 @@ sim_submit_alloc_gem_handles(struct vn_renderer_bo *const *bos,
static int
sim_submit(struct virtgpu *gpu, const struct vn_renderer_submit *submit)
{
const bool use_ring_idx = gpu->base.info.supports_multiple_timelines;
/* TODO replace submit->bos by submit->gem_handles to avoid malloc/loop */
uint32_t *gem_handles = NULL;
if (submit->bo_count) {
@ -523,11 +525,13 @@ sim_submit(struct virtgpu *gpu, const struct vn_renderer_submit *submit)
const struct vn_renderer_submit_batch *batch = &submit->batches[i];
struct drm_virtgpu_execbuffer args = {
.flags = batch->sync_count ? VIRTGPU_EXECBUF_FENCE_FD_OUT : 0,
.flags = (batch->sync_count ? VIRTGPU_EXECBUF_FENCE_FD_OUT : 0) |
(use_ring_idx ? VIRTGPU_EXECBUF_RING_IDX : 0),
.size = batch->cs_size,
.command = (uintptr_t)batch->cs_data,
.bo_handles = (uintptr_t)gem_handles,
.num_bo_handles = submit->bo_count,
.ring_idx = (use_ring_idx ? batch->ring_idx : 0),
};
ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
@ -539,7 +543,7 @@ sim_submit(struct virtgpu *gpu, const struct vn_renderer_submit *submit)
if (batch->sync_count) {
ret = sim_submit_signal_syncs(gpu, args.fence_fd, batch->syncs,
batch->sync_values, batch->sync_count,
batch->sync_queue_cpu);
batch->ring_idx == 0);
close(args.fence_fd);
if (ret)
break;

View file

@ -540,9 +540,9 @@ vtest_vcmd_submit_cmd2(struct vtest *vtest,
.sync_offset = sync_offset / sizeof(uint32_t),
.sync_count = batch->sync_count,
};
if (!batch->sync_queue_cpu) {
if (vtest->base.info.supports_multiple_timelines) {
dst.flags = VCMD_SUBMIT_CMD2_FLAG_SYNC_QUEUE;
dst.sync_queue_index = batch->sync_queue_index;
dst.sync_queue_index = batch->ring_idx;
dst.sync_queue_id = batch->vk_queue_id;
}
vtest_write(vtest, &dst, sizeof(dst));