venus: stop using vn_renderer_sync in vn_semaphore

Move away from vn_renderer_sync and toward a userspace-only solution
temporarily until the kernel does what we need.

Signed-off-by: Chia-I Wu <olvaffe@gmail.com>
Reviewed-by: Yiwei Zhang <zzyiwei@chromium.org>
Reviewed-by: Ryan Neph <ryanneph@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10146>
This commit is contained in:
Chia-I Wu 2021-04-01 15:22:17 -07:00 committed by Marge Bot
parent 4ffb0265cc
commit 6d2454ad3c
2 changed files with 130 additions and 289 deletions

View file

@ -1384,29 +1384,26 @@ static void
vn_physical_device_init_external_semaphore_handles(
struct vn_physical_device *physical_dev)
{
/* In the current model, it is not possible to support external semaphores.
* At least an external semaphore cannot be waited on GPU in the host but
* can only be waited on CPU in the guest.
/* The current code manipulates the host-side VkSemaphore directly. It
* works very well for binary semaphores because there is no CPU operation.
* But for timeline semaphores, the situation is similar to that of fences.
* vkWaitSemaphores is translated to repeated vkGetSemaphoreCounterValue.
*
* A binary vn_semaphore is implemented solely on top of a host-side binary
* VkSemaphore. There is no CPU operation against binary semaphroes and
* there is no need for vn_renderer_sync.
* External semaphore is not possible currently. We could cheat when the
* semaphore is binary and the handle type is sync file, but that would
* require associating a fence with the semaphore and doing vkWaitForFences
* in vkGetSemaphoreFdKHR.
*
* A timeline vn_semaphore is implemented on top of both a host-side
* timeline VkSemaphore and a vn_renderer_sync. Whenever a timeline
* vn_semaphore is updated, we make sure both the host-side timeline
* VkSemaphore and the vn_renderer_sync are updated. This allows us to use
* whichever is more convenient depending on the operations: the host-side
* timeline VkSemaphore for GPU waits and the vn_renderer_sync for CPU
* waits/gets.
*
* To support external semaphores, we should create a vn_renderer_sync from
* a host-side VkSemaphore instead, similar to how a vn_renderer_bo is
* created from a host-side VkDeviceMemory. The reasons to make a similar
* move for fences apply to timeline semaphores as well. Besides, the
* external handle (drm_syncobj or sync file) needs to carry the necessary
* information to identify the host-side semaphore.
* We would like to create a vn_renderer_sync from a host-side VkSemaphore,
* similar to how a vn_renderer_bo is created from a host-side
* VkDeviceMemory. The reasoning is the same as that for fences.
* Additionally, we would like the sync file exported from the
* vn_renderer_sync to carry the necessary information to identify the
* host-side VkSemaphore. That would allow the consumers to wait on the
* host side rather than the guest side.
*/
physical_dev->external_binary_semaphore_handles = 0;
physical_dev->external_timeline_semaphore_handles = 0;
}
static void

View file

@ -75,9 +75,6 @@ struct vn_queue_submission {
uint32_t wait_semaphore_count;
uint32_t wait_wsi_count;
uint32_t signal_semaphore_count;
uint32_t signal_device_only_count;
uint32_t signal_timeline_count;
uint32_t sync_count;
@ -103,16 +100,12 @@ vn_queue_submission_count_semaphores(struct vn_queue_submission *submit)
{
submit->wait_semaphore_count = 0;
submit->wait_wsi_count = 0;
submit->signal_semaphore_count = 0;
submit->signal_device_only_count = 0;
submit->signal_timeline_count = 0;
switch (submit->batch_type) {
case VK_STRUCTURE_TYPE_SUBMIT_INFO:
for (uint32_t i = 0; i < submit->batch_count; i++) {
const VkSubmitInfo *batch = &submit->submit_batches[i];
submit->wait_semaphore_count += batch->waitSemaphoreCount;
submit->signal_semaphore_count += batch->signalSemaphoreCount;
for (uint32_t j = 0; j < batch->waitSemaphoreCount; j++) {
struct vn_semaphore *sem =
@ -122,21 +115,6 @@ vn_queue_submission_count_semaphores(struct vn_queue_submission *submit)
if (payload->type == VN_SYNC_TYPE_WSI_SIGNALED)
submit->wait_wsi_count++;
}
for (uint32_t j = 0; j < batch->signalSemaphoreCount; j++) {
struct vn_semaphore *sem =
vn_semaphore_from_handle(batch->pSignalSemaphores[j]);
const struct vn_sync_payload *payload = sem->payload;
/* it must be one of the waited semaphores and will be reset */
if (payload->type == VN_SYNC_TYPE_WSI_SIGNALED)
payload = &sem->permanent;
if (payload->type == VN_SYNC_TYPE_DEVICE_ONLY)
submit->signal_device_only_count++;
else if (sem->type == VK_SEMAPHORE_TYPE_TIMELINE)
submit->signal_timeline_count++;
}
}
break;
case VK_STRUCTURE_TYPE_BIND_SPARSE_INFO:
@ -144,7 +122,6 @@ vn_queue_submission_count_semaphores(struct vn_queue_submission *submit)
const VkBindSparseInfo *batch = &submit->bind_sparse_batches[i];
submit->wait_semaphore_count += batch->waitSemaphoreCount;
submit->signal_semaphore_count += batch->signalSemaphoreCount;
for (uint32_t j = 0; j < batch->waitSemaphoreCount; j++) {
struct vn_semaphore *sem =
@ -154,17 +131,6 @@ vn_queue_submission_count_semaphores(struct vn_queue_submission *submit)
if (payload->type == VN_SYNC_TYPE_WSI_SIGNALED)
submit->wait_wsi_count++;
}
for (uint32_t j = 0; j < batch->signalSemaphoreCount; j++) {
struct vn_semaphore *sem =
vn_semaphore_from_handle(batch->pSignalSemaphores[j]);
const struct vn_sync_payload *payload = sem->payload;
if (payload->type == VN_SYNC_TYPE_DEVICE_ONLY)
submit->signal_device_only_count++;
else if (sem->type == VK_SEMAPHORE_TYPE_TIMELINE)
submit->signal_timeline_count++;
}
}
break;
default:
@ -172,8 +138,7 @@ vn_queue_submission_count_semaphores(struct vn_queue_submission *submit)
break;
}
submit->sync_count =
submit->signal_semaphore_count - submit->signal_device_only_count;
submit->sync_count = 0;
}
static VkResult
@ -305,55 +270,8 @@ vn_queue_submission_setup_batch_syncs(struct vn_queue_submission *submit,
uint32_t batch_index,
uint32_t sync_base)
{
union {
const VkSubmitInfo *submit_batch;
const VkBindSparseInfo *bind_sparse_batch;
} u;
const VkTimelineSemaphoreSubmitInfo *timeline;
const VkSemaphore *sems;
uint32_t sem_count;
switch (submit->batch_type) {
case VK_STRUCTURE_TYPE_SUBMIT_INFO:
u.submit_batch = &submit->submit_batches[batch_index];
timeline = vk_find_struct_const(u.submit_batch->pNext,
TIMELINE_SEMAPHORE_SUBMIT_INFO);
sems = u.submit_batch->pSignalSemaphores;
sem_count = u.submit_batch->signalSemaphoreCount;
break;
case VK_STRUCTURE_TYPE_BIND_SPARSE_INFO:
u.bind_sparse_batch = &submit->bind_sparse_batches[batch_index];
timeline = vk_find_struct_const(u.bind_sparse_batch->pNext,
TIMELINE_SEMAPHORE_SUBMIT_INFO);
sems = u.bind_sparse_batch->pSignalSemaphores;
sem_count = u.bind_sparse_batch->signalSemaphoreCount;
break;
default:
unreachable("unexpected batch type");
break;
}
struct vn_renderer_sync **syncs = &submit->temp.syncs[sync_base];
uint64_t *sync_values = &submit->temp.sync_values[sync_base];
uint32_t sync_count = 0;
for (uint32_t i = 0; i < sem_count; i++) {
struct vn_semaphore *sem = vn_semaphore_from_handle(sems[i]);
const struct vn_sync_payload *payload = sem->payload;
if (payload->type == VN_SYNC_TYPE_DEVICE_ONLY)
continue;
assert(payload->type == VN_SYNC_TYPE_SYNC);
syncs[sync_count] = payload->sync;
sync_values[sync_count] = sem->type == VK_SEMAPHORE_TYPE_TIMELINE
? timeline->pSignalSemaphoreValues[i]
: 1;
sync_count++;
}
submit->temp.batch_sync_counts[batch_index] = sync_count;
return sync_count;
assert(!submit->sync_count);
return 0;
}
static void
@ -389,12 +307,8 @@ vn_queue_submission_setup_batches(struct vn_queue_submission *submit)
submit, i, wait_sem_base);
}
if (submit->signal_semaphore_count > submit->signal_device_only_count) {
sync_base +=
vn_queue_submission_setup_batch_syncs(submit, i, sync_base);
} else if (submit->sync_count) {
submit->temp.batch_sync_counts[i] = 0;
}
sync_base +=
vn_queue_submission_setup_batch_syncs(submit, i, sync_base);
}
assert(sync_base == submit->sync_count);
@ -509,48 +423,18 @@ vn_QueueSubmit(VkQueue _queue,
}
}
/* TODO this should be one trip to the renderer */
if (submit.signal_timeline_count) {
uint32_t sync_base = 0;
for (uint32_t i = 0; i < submit.batch_count - 1; i++) {
vn_async_vkQueueSubmit(dev->instance, submit.queue, 1,
&submit.submit_batches[i], VK_NULL_HANDLE);
vn_instance_ring_wait(dev->instance);
result =
vn_call_vkQueueSubmit(dev->instance, submit.queue, submit.batch_count,
submit.submit_batches, submit.fence);
if (result != VK_SUCCESS) {
vn_queue_submission_cleanup(&submit);
return vn_error(dev->instance, result);
}
vn_queue_submit_syncs(queue, &submit.temp.syncs[sync_base],
&submit.temp.sync_values[sync_base],
submit.temp.batch_sync_counts[i], NULL);
sync_base += submit.temp.batch_sync_counts[i];
}
result = vn_call_vkQueueSubmit(
dev->instance, submit.queue, 1,
&submit.submit_batches[submit.batch_count - 1], submit.fence);
if (result != VK_SUCCESS) {
vn_queue_submission_cleanup(&submit);
return vn_error(dev->instance, result);
}
if (sync_base < submit.sync_count || wsi_mem) {
vn_queue_submit_syncs(queue, &submit.temp.syncs[sync_base],
&submit.temp.sync_values[sync_base],
submit.sync_count - sync_base,
wsi_mem ? wsi_mem->base_bo : NULL);
}
} else {
result = vn_call_vkQueueSubmit(dev->instance, submit.queue,
submit.batch_count,
submit.submit_batches, submit.fence);
if (result != VK_SUCCESS) {
vn_queue_submission_cleanup(&submit);
return vn_error(dev->instance, result);
}
if (submit.sync_count || wsi_mem) {
vn_queue_submit_syncs(queue, submit.temp.syncs,
submit.temp.sync_values, submit.sync_count,
wsi_mem ? wsi_mem->base_bo : NULL);
}
if (submit.sync_count || wsi_mem) {
vn_queue_submit_syncs(queue, submit.temp.syncs, submit.temp.sync_values,
submit.sync_count,
wsi_mem ? wsi_mem->base_bo : NULL);
}
/* XXX The implicit fence won't work because the host is not aware of it.
@ -588,48 +472,17 @@ vn_QueueBindSparse(VkQueue _queue,
if (result != VK_SUCCESS)
return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
/* TODO this should be one trip to the renderer */
if (submit.signal_timeline_count) {
uint32_t sync_base = 0;
for (uint32_t i = 0; i < submit.batch_count - 1; i++) {
vn_async_vkQueueBindSparse(dev->instance, submit.queue, 1,
&submit.bind_sparse_batches[i],
VK_NULL_HANDLE);
vn_instance_ring_wait(dev->instance);
result = vn_call_vkQueueBindSparse(
dev->instance, submit.queue, submit.batch_count,
submit.bind_sparse_batches, submit.fence);
if (result != VK_SUCCESS) {
vn_queue_submission_cleanup(&submit);
return vn_error(dev->instance, result);
}
vn_queue_submit_syncs(queue, &submit.temp.syncs[sync_base],
&submit.temp.sync_values[sync_base],
submit.temp.batch_sync_counts[i], NULL);
sync_base += submit.temp.batch_sync_counts[i];
}
result = vn_call_vkQueueBindSparse(
dev->instance, submit.queue, 1,
&submit.bind_sparse_batches[submit.batch_count - 1], submit.fence);
if (result != VK_SUCCESS) {
vn_queue_submission_cleanup(&submit);
return vn_error(dev->instance, result);
}
if (sync_base < submit.sync_count) {
vn_queue_submit_syncs(queue, &submit.temp.syncs[sync_base],
&submit.temp.sync_values[sync_base],
submit.sync_count - sync_base, NULL);
}
} else {
result = vn_call_vkQueueBindSparse(
dev->instance, submit.queue, submit.batch_count,
submit.bind_sparse_batches, submit.fence);
if (result != VK_SUCCESS) {
vn_queue_submission_cleanup(&submit);
return vn_error(dev->instance, result);
}
if (submit.sync_count) {
vn_queue_submit_syncs(queue, submit.temp.syncs,
submit.temp.sync_values, submit.sync_count,
NULL);
}
if (submit.sync_count) {
vn_queue_submit_syncs(queue, submit.temp.syncs, submit.temp.sync_values,
submit.sync_count, NULL);
}
vn_queue_submission_cleanup(&submit);
@ -954,15 +807,8 @@ vn_semaphore_init_payloads(struct vn_device *dev,
const VkAllocationCallbacks *alloc)
{
struct vn_renderer_sync *perm_sync;
VkResult result;
if (sem->type == VK_SEMAPHORE_TYPE_TIMELINE) {
result = vn_renderer_sync_create_semaphore(dev->instance->renderer,
VK_SEMAPHORE_TYPE_TIMELINE,
initial_val, 0, &perm_sync);
} else {
result =
vn_renderer_sync_create_empty(dev->instance->renderer, &perm_sync);
}
VkResult result =
vn_renderer_sync_create_empty(dev->instance->renderer, &perm_sync);
if (result != VK_SUCCESS)
return result;
@ -974,9 +820,8 @@ vn_semaphore_init_payloads(struct vn_device *dev,
return result;
}
sem->permanent.type = sem->type == VK_SEMAPHORE_TYPE_TIMELINE
? VN_SYNC_TYPE_SYNC
: VN_SYNC_TYPE_DEVICE_ONLY;
/* perm_sync is unused */
sem->permanent.type = VN_SYNC_TYPE_DEVICE_ONLY;
sem->permanent.sync = perm_sync;
/* temp_sync is uninitialized */
@ -995,8 +840,7 @@ vn_semaphore_reset_wsi(struct vn_device *dev, struct vn_semaphore *sem)
vn_sync_payload_release(dev, &sem->temporary);
if (perm->type == VN_SYNC_TYPE_SYNC)
vn_renderer_sync_reset(perm->sync, 0);
assert(perm->type != VN_SYNC_TYPE_SYNC);
sem->payload = perm;
}
@ -1081,11 +925,13 @@ vn_GetSemaphoreCounterValue(VkDevice device,
VkSemaphore semaphore,
uint64_t *pValue)
{
struct vn_device *dev = vn_device_from_handle(device);
struct vn_semaphore *sem = vn_semaphore_from_handle(semaphore);
struct vn_sync_payload *payload = sem->payload;
assert(payload->type == VN_SYNC_TYPE_SYNC);
return vn_renderer_sync_read(payload->sync, pValue);
assert(payload->type == VN_SYNC_TYPE_DEVICE_ONLY);
return vn_call_vkGetSemaphoreCounterValue(dev->instance, device, semaphore,
pValue);
}
VkResult
@ -1102,12 +948,48 @@ vn_SignalSemaphore(VkDevice device, const VkSemaphoreSignalInfo *pSignalInfo)
else
vn_async_vkSignalSemaphore(dev->instance, device, pSignalInfo);
assert(payload->type == VN_SYNC_TYPE_SYNC);
vn_renderer_sync_write(payload->sync, pSignalInfo->value);
assert(payload->type != VN_SYNC_TYPE_SYNC);
return VK_SUCCESS;
}
static VkResult
vn_find_first_signaled_semaphore(VkDevice device,
const VkSemaphore *semaphores,
const uint64_t *values,
uint32_t count)
{
for (uint32_t i = 0; i < count; i++) {
uint64_t val;
VkResult result =
vn_GetSemaphoreCounterValue(device, semaphores[i], &val);
if (result != VK_SUCCESS || val >= values[i])
return result;
}
return VK_NOT_READY;
}
static VkResult
vn_remove_signaled_semaphores(VkDevice device,
VkSemaphore *semaphores,
uint64_t *values,
uint32_t *count)
{
uint32_t cur = 0;
for (uint32_t i = 0; i < *count; i++) {
uint64_t val;
VkResult result =
vn_GetSemaphoreCounterValue(device, semaphores[i], &val);
if (result != VK_SUCCESS)
return result;
if (val < values[i])
semaphores[cur++] = semaphores[i];
}
*count = cur;
return cur ? VK_NOT_READY : VK_SUCCESS;
}
VkResult
vn_WaitSemaphores(VkDevice device,
const VkSemaphoreWaitInfo *pWaitInfo,
@ -1116,36 +998,46 @@ vn_WaitSemaphores(VkDevice device,
struct vn_device *dev = vn_device_from_handle(device);
const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
struct vn_renderer_sync *local_syncs[8];
struct vn_renderer_sync **syncs = local_syncs;
if (pWaitInfo->semaphoreCount > ARRAY_SIZE(local_syncs)) {
syncs = vk_alloc(alloc, sizeof(*syncs) * pWaitInfo->semaphoreCount,
VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!syncs)
return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
const int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
VkResult result = VK_NOT_READY;
uint32_t iter = 0;
if (pWaitInfo->semaphoreCount > 1 &&
!(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT)) {
uint32_t semaphore_count = pWaitInfo->semaphoreCount;
VkSemaphore local_semaphores[8];
uint64_t local_values[8];
VkSemaphore *semaphores = local_semaphores;
uint64_t *values = local_values;
if (semaphore_count > ARRAY_SIZE(local_semaphores)) {
semaphores = vk_alloc(
alloc, (sizeof(*semaphores) + sizeof(*values)) * semaphore_count,
VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!semaphores)
return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
values = (uint64_t *)&semaphores[semaphore_count];
}
memcpy(semaphores, pWaitInfo->pSemaphores,
sizeof(*semaphores) * semaphore_count);
memcpy(values, pWaitInfo->pValues, sizeof(*values) * semaphore_count);
while (result == VK_NOT_READY) {
result = vn_remove_signaled_semaphores(device, semaphores, values,
&semaphore_count);
result = vn_update_sync_result(result, abs_timeout, &iter);
}
if (semaphores != local_semaphores)
vk_free(alloc, semaphores);
} else {
while (result == VK_NOT_READY) {
result = vn_find_first_signaled_semaphore(
device, pWaitInfo->pSemaphores, pWaitInfo->pValues,
pWaitInfo->semaphoreCount);
result = vn_update_sync_result(result, abs_timeout, &iter);
}
}
for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; i++) {
struct vn_semaphore *sem =
vn_semaphore_from_handle(pWaitInfo->pSemaphores[i]);
const struct vn_sync_payload *payload = sem->payload;
assert(payload->type == VN_SYNC_TYPE_SYNC);
syncs[i] = payload->sync;
}
const struct vn_renderer_wait wait = {
.wait_any = pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT,
.timeout = timeout,
.syncs = syncs,
.sync_values = pWaitInfo->pValues,
.sync_count = pWaitInfo->semaphoreCount,
};
VkResult result = vn_renderer_wait(dev->instance->renderer, &wait);
if (syncs != local_syncs)
vk_free(alloc, syncs);
return vn_result(dev->instance, result);
}
@ -1154,37 +1046,7 @@ vn_ImportSemaphoreFdKHR(
VkDevice device, const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
{
struct vn_device *dev = vn_device_from_handle(device);
struct vn_semaphore *sem =
vn_semaphore_from_handle(pImportSemaphoreFdInfo->semaphore);
const bool sync_file = pImportSemaphoreFdInfo->handleType ==
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
const int fd = pImportSemaphoreFdInfo->fd;
struct vn_sync_payload *payload =
pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT
? &sem->temporary
: &sem->permanent;
if (payload->type == VN_SYNC_TYPE_SYNC)
vn_renderer_sync_release(payload->sync);
VkResult result;
if (sync_file && fd < 0)
result = vn_renderer_sync_init_signaled(payload->sync);
else
result = vn_renderer_sync_init_syncobj(payload->sync, fd, sync_file);
if (result != VK_SUCCESS)
return vn_error(dev->instance, result);
/* TODO import into the host-side semaphore */
payload->type = VN_SYNC_TYPE_SYNC;
sem->payload = payload;
if (fd >= 0)
close(fd);
return VK_SUCCESS;
return vn_error(dev->instance, VK_ERROR_UNKNOWN);
}
VkResult
@ -1193,25 +1055,7 @@ vn_GetSemaphoreFdKHR(VkDevice device,
int *pFd)
{
struct vn_device *dev = vn_device_from_handle(device);
struct vn_semaphore *sem = vn_semaphore_from_handle(pGetFdInfo->semaphore);
const bool sync_file =
pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
struct vn_sync_payload *payload = sem->payload;
assert(payload->type == VN_SYNC_TYPE_SYNC);
int fd = vn_renderer_sync_export_syncobj(payload->sync, sync_file);
if (fd < 0)
return vn_error(dev->instance, VK_ERROR_TOO_MANY_OBJECTS);
if (sync_file) {
vn_sync_payload_release(dev, &sem->temporary);
vn_renderer_sync_reset(sem->permanent.sync, 0);
sem->payload = &sem->permanent;
/* TODO reset the host-side semaphore */
}
*pFd = fd;
return VK_SUCCESS;
return vn_error(dev->instance, VK_ERROR_UNKNOWN);
}
/* event commands */