wsi: Add common infrastructure for explicit sync

Signed-off-by: Joshua Ashton <joshua@froggi.es>

Reviewed-by: Hans-Kristian Arntzen <post@arntzen-software.no>
Reviewed-by: Erik Kurzinger <ekurzinger@nvidia.com>
Reviewed-by: Sebastian Wick <sebastian.wick@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25709>
This commit is contained in:
Joshua Ashton 2023-10-12 19:03:28 +01:00 committed by Marge Bot
parent 4bf8f96b3e
commit 59813ae468
3 changed files with 466 additions and 34 deletions

View file

@ -716,6 +716,8 @@ wsi_create_image(const struct wsi_swapchain *chain,
#ifndef _WIN32
image->dma_buf_fd = -1;
for (uint32_t i = 0; i < WSI_ES_COUNT; i++)
image->explicit_sync[i].fd = -1;
#endif
result = wsi->CreateImage(chain->device, &info->create,
@ -738,6 +740,17 @@ wsi_create_image(const struct wsi_swapchain *chain,
goto fail;
}
if (info->explicit_sync) {
#if HAVE_LIBDRM
result = wsi_create_image_explicit_sync_drm(chain, image);
if (result != VK_SUCCESS)
goto fail;
#else
result = VK_ERROR_FEATURE_NOT_PRESENT;
goto fail;
#endif
}
return VK_SUCCESS;
fail:
@ -756,6 +769,12 @@ wsi_destroy_image(const struct wsi_swapchain *chain,
close(image->dma_buf_fd);
#endif
if (image->explicit_sync[WSI_ES_ACQUIRE].semaphore) {
#if HAVE_LIBDRM
wsi_destroy_image_explicit_sync_drm(chain, image);
#endif
}
if (image->cpu_map != NULL) {
wsi->UnmapMemory(chain->device, image->blit.buffer != VK_NULL_HANDLE ?
image->blit.memory : image->memory);
@ -1162,9 +1181,13 @@ wsi_signal_semaphore_for_image(struct vk_device *device,
vk_semaphore_reset_temporary(device, semaphore);
#ifdef HAVE_LIBDRM
VkResult result = wsi_create_sync_for_dma_buf_wait(chain, image,
VK_SYNC_FEATURE_GPU_WAIT,
&semaphore->temporary);
VkResult result = chain->image_info.explicit_sync ?
wsi_create_sync_for_image_syncobj(chain, image,
VK_SYNC_FEATURE_GPU_WAIT,
&semaphore->temporary) :
wsi_create_sync_for_dma_buf_wait(chain, image,
VK_SYNC_FEATURE_GPU_WAIT,
&semaphore->temporary);
if (result != VK_ERROR_FEATURE_NOT_PRESENT)
return result;
#endif
@ -1194,9 +1217,13 @@ wsi_signal_fence_for_image(struct vk_device *device,
vk_fence_reset_temporary(device, fence);
#ifdef HAVE_LIBDRM
VkResult result = wsi_create_sync_for_dma_buf_wait(chain, image,
VK_SYNC_FEATURE_CPU_WAIT,
&fence->temporary);
VkResult result = chain->image_info.explicit_sync ?
wsi_create_sync_for_image_syncobj(chain, image,
VK_SYNC_FEATURE_CPU_WAIT,
&fence->temporary) :
wsi_create_sync_for_dma_buf_wait(chain, image,
VK_SYNC_FEATURE_CPU_WAIT,
&fence->temporary);
if (result != VK_ERROR_FEATURE_NOT_PRESENT)
return result;
#endif
@ -1394,6 +1421,10 @@ wsi_common_queue_present(const struct wsi_device *wsi,
if (result != VK_SUCCESS)
goto fail_present;
VkTimelineSemaphoreSubmitInfo timeline_submit_info = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
};
VkSubmitInfo submit_info = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
};
@ -1446,34 +1477,50 @@ wsi_common_queue_present(const struct wsi_device *wsi,
VkFence fence = swapchain->fences[image_index];
struct wsi_memory_signal_submit_info mem_signal;
bool has_signal_dma_buf = false;
#ifdef HAVE_LIBDRM
result = wsi_prepare_signal_dma_buf_from_semaphore(swapchain, image);
if (result == VK_SUCCESS) {
bool explicit_sync = swapchain->image_info.explicit_sync;
if (explicit_sync) {
/* We will signal this acquire value ourselves when GPU work is done. */
image->explicit_sync[WSI_ES_ACQUIRE].timeline++;
/* The compositor will signal this value when it is done with the image. */
image->explicit_sync[WSI_ES_RELEASE].timeline++;
timeline_submit_info.signalSemaphoreValueCount = 1;
timeline_submit_info.pSignalSemaphoreValues = &image->explicit_sync[WSI_ES_ACQUIRE].timeline;
assert(submit_info.signalSemaphoreCount == 0);
submit_info.signalSemaphoreCount = 1;
submit_info.pSignalSemaphores = &swapchain->dma_buf_semaphore;
has_signal_dma_buf = true;
} else if (result == VK_ERROR_FEATURE_NOT_PRESENT) {
result = VK_SUCCESS;
has_signal_dma_buf = false;
submit_info.pSignalSemaphores = &image->explicit_sync[WSI_ES_ACQUIRE].semaphore;
__vk_append_struct(&submit_info, &timeline_submit_info);
} else {
goto fail_present;
}
#ifdef HAVE_LIBDRM
result = wsi_prepare_signal_dma_buf_from_semaphore(swapchain, image);
if (result == VK_SUCCESS) {
assert(submit_info.signalSemaphoreCount == 0);
submit_info.signalSemaphoreCount = 1;
submit_info.pSignalSemaphores = &swapchain->dma_buf_semaphore;
has_signal_dma_buf = true;
} else if (result == VK_ERROR_FEATURE_NOT_PRESENT) {
result = VK_SUCCESS;
has_signal_dma_buf = false;
} else {
goto fail_present;
}
#endif
struct wsi_memory_signal_submit_info mem_signal;
if (!has_signal_dma_buf) {
/* If we don't have dma-buf signaling, signal the memory object by
* chaining wsi_memory_signal_submit_info into VkSubmitInfo.
*/
result = VK_SUCCESS;
has_signal_dma_buf = false;
mem_signal = (struct wsi_memory_signal_submit_info) {
.sType = VK_STRUCTURE_TYPE_WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA,
.memory = image->memory,
};
__vk_append_struct(&submit_info, &mem_signal);
if (!has_signal_dma_buf) {
/* If we don't have dma-buf signaling, signal the memory object by
* chaining wsi_memory_signal_submit_info into VkSubmitInfo.
*/
result = VK_SUCCESS;
has_signal_dma_buf = false;
mem_signal = (struct wsi_memory_signal_submit_info) {
.sType = VK_STRUCTURE_TYPE_WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA,
.memory = image->memory,
};
__vk_append_struct(&submit_info, &mem_signal);
}
}
result = wsi->QueueSubmit(submit_queue, 1, &submit_info, fence);
@ -1485,15 +1532,17 @@ wsi_common_queue_present(const struct wsi_device *wsi,
image->acquired = false;
image->present_serial = ++swapchain->present_serial;
if (!explicit_sync) {
#ifdef HAVE_LIBDRM
if (has_signal_dma_buf) {
result = wsi_signal_dma_buf_from_semaphore(swapchain, image);
if (result != VK_SUCCESS)
goto fail_present;
}
if (has_signal_dma_buf) {
result = wsi_signal_dma_buf_from_semaphore(swapchain, image);
if (result != VK_SUCCESS)
goto fail_present;
}
#else
assert(!has_signal_dma_buf);
assert(!has_signal_dma_buf);
#endif
}
if (wsi->sw)
wsi->WaitForFences(device, 1, &swapchain->fences[image_index],

View file

@ -29,9 +29,11 @@
#include "util/xmlconfig.h"
#include "vk_device.h"
#include "vk_physical_device.h"
#include "vk_log.h"
#include "vk_util.h"
#include "drm-uapi/drm_fourcc.h"
#include "drm-uapi/dma-buf.h"
#include "util/libsync.h"
#include <errno.h>
#include <time.h>
@ -229,6 +231,190 @@ fail_close_sync_file:
return result;
}
VkResult
wsi_create_image_explicit_sync_drm(const struct wsi_swapchain *chain,
struct wsi_image *image)
{
/* Cleanup of any failures is handled by the caller in wsi_create_image
* calling wsi_destroy_image -> wsi_destroy_image_explicit_sync_drm. */
VK_FROM_HANDLE(vk_device, device, chain->device);
const struct wsi_device *wsi = chain->wsi;
VkResult result = VK_SUCCESS;
int ret = 0;
const VkExportSemaphoreCreateInfo semaphore_export_info = {
.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
/* This is a syncobj fd for any drivers using syncobj. */
.handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
};
const VkSemaphoreTypeCreateInfo semaphore_type_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
.pNext = &semaphore_export_info,
.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
};
const VkSemaphoreCreateInfo semaphore_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
.pNext = &semaphore_type_info,
};
for (uint32_t i = 0; i < WSI_ES_COUNT; i++) {
result = wsi->CreateSemaphore(chain->device,
&semaphore_info,
&chain->alloc,
&image->explicit_sync[i].semaphore);
if (result != VK_SUCCESS)
return result;
const VkSemaphoreGetFdInfoKHR semaphore_get_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
.semaphore = image->explicit_sync[i].semaphore,
.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
};
result = wsi->GetSemaphoreFdKHR(chain->device, &semaphore_get_info, &image->explicit_sync[i].fd);
if (result != VK_SUCCESS)
return result;
}
for (uint32_t i = 0; i < WSI_ES_COUNT; i++) {
ret = drmSyncobjFDToHandle(device->drm_fd, image->explicit_sync[i].fd, &image->explicit_sync[i].handle);
if (ret != 0)
return VK_ERROR_FEATURE_NOT_PRESENT;
}
return VK_SUCCESS;
}
void
wsi_destroy_image_explicit_sync_drm(const struct wsi_swapchain *chain,
struct wsi_image *image)
{
VK_FROM_HANDLE(vk_device, device, chain->device);
const struct wsi_device *wsi = chain->wsi;
for (uint32_t i = 0; i < WSI_ES_COUNT; i++) {
if (image->explicit_sync[i].handle != 0) {
drmSyncobjDestroy(device->drm_fd, image->explicit_sync[i].handle);
image->explicit_sync[i].handle = 0;
}
if (image->explicit_sync[i].fd >= 0) {
close(image->explicit_sync[i].fd);
image->explicit_sync[i].fd = -1;
}
if (image->explicit_sync[i].semaphore != VK_NULL_HANDLE) {
wsi->DestroySemaphore(chain->device, image->explicit_sync[i].semaphore, &chain->alloc);
image->explicit_sync[i].semaphore = VK_NULL_HANDLE;
}
}
}
static VkResult
wsi_create_sync_imm(struct vk_device *device, struct vk_sync **sync_out)
{
const struct vk_sync_type *sync_type =
get_sync_file_sync_type(device, VK_SYNC_FEATURE_CPU_WAIT);
struct vk_sync *sync = NULL;
VkResult result;
result = vk_sync_create(device, sync_type, VK_SYNC_IS_SHAREABLE, 0, &sync);
if (result != VK_SUCCESS)
goto error;
result = vk_sync_signal(device, sync, 0);
if (result != VK_SUCCESS)
goto error;
*sync_out = sync;
goto done;
error:
vk_sync_destroy(device, sync);
done:
return result;
}
VkResult
wsi_create_sync_for_image_syncobj(const struct wsi_swapchain *chain,
const struct wsi_image *image,
enum vk_sync_features req_features,
struct vk_sync **sync_out)
{
VK_FROM_HANDLE(vk_device, device, chain->device);
const struct vk_sync_type *sync_type =
get_sync_file_sync_type(device, VK_SYNC_FEATURE_CPU_WAIT);
VkResult result = VK_SUCCESS;
struct vk_sync *sync = NULL;
int sync_file_fds[WSI_ES_COUNT] = { -1, -1 };
uint32_t tmp_handles[WSI_ES_COUNT] = { 0, 0 };
int merged_sync_fd = -1;
if (sync_type == NULL)
return VK_ERROR_FEATURE_NOT_PRESENT;
if (image->explicit_sync[WSI_ES_RELEASE].timeline == 0) {
/* Signal immediately, there is no release to forward. */
return wsi_create_sync_imm(device, sync_out);
}
/* Transfer over to a new sync file with a
* surrogate handle.
*/
for (uint32_t i = 0; i < WSI_ES_COUNT; i++) {
if (drmSyncobjCreate(device->drm_fd, 0, &tmp_handles[i])) {
result = vk_errorf(NULL, VK_ERROR_OUT_OF_DEVICE_MEMORY, "Failed to create temp syncobj. Errno: %d - %s", errno, strerror(errno));
goto fail;
}
if (drmSyncobjTransfer(device->drm_fd, tmp_handles[i], 0,
image->explicit_sync[i].handle, image->explicit_sync[i].timeline, 0)) {
result = vk_errorf(NULL, VK_ERROR_OUT_OF_DEVICE_MEMORY, "Failed to transfer syncobj. Was the timeline point materialized? Errno: %d - %s", errno, strerror(errno));
goto fail;
}
if (drmSyncobjExportSyncFile(device->drm_fd, tmp_handles[i], &sync_file_fds[i])) {
result = vk_errorf(NULL, VK_ERROR_OUT_OF_DEVICE_MEMORY, "Failed to export sync file. Errno: %d - %s", errno, strerror(errno));
goto fail;
}
}
merged_sync_fd = sync_merge("acquire merged sync", sync_file_fds[WSI_ES_ACQUIRE], sync_file_fds[WSI_ES_RELEASE]);
if (merged_sync_fd < 0) {
result = vk_errorf(NULL, VK_ERROR_OUT_OF_DEVICE_MEMORY, "Failed to merge acquire + release sync timelines. Errno: %d - %s", errno, strerror(errno));
goto fail;
}
result = vk_sync_create(device, sync_type, VK_SYNC_IS_SHAREABLE, 0, &sync);
if (result != VK_SUCCESS)
goto fail;
result = vk_sync_import_sync_file(device, sync, merged_sync_fd);
if (result != VK_SUCCESS)
goto fail;
*sync_out = sync;
goto done;
fail:
if (sync)
vk_sync_destroy(device, sync);
done:
for (uint32_t i = 0; i < WSI_ES_COUNT; i++) {
if (tmp_handles[i])
drmSyncobjDestroy(device->drm_fd, tmp_handles[i]);
}
for (uint32_t i = 0; i < WSI_ES_COUNT; i++) {
if (sync_file_fds[i] >= 0)
close(sync_file_fds[i]);
}
if (merged_sync_fd >= 0)
close(merged_sync_fd);
return result;
}
bool
wsi_common_drm_devices_equal(int fd_a, int fd_b)
{
@ -647,3 +833,163 @@ wsi_drm_configure_image(const struct wsi_swapchain *chain,
info);
}
}
enum wsi_explicit_sync_state_flags
{
WSI_ES_STATE_RELEASE_MATERIALIZED = (1u << 0),
WSI_ES_STATE_RELEASE_SIGNALLED = (1u << 1),
WSI_ES_STATE_ACQUIRE_SIGNALLED = (1u << 2),
};
/* Levels of "freeness"
* 0 -> Acquire Signalled + Release Signalled
* 1 -> Acquire Signalled + Release Materialized
* 2 -> Release Signalled
* 3 -> Release Materialized
*/
static const uint32_t wsi_explicit_sync_free_levels[] = {
(WSI_ES_STATE_RELEASE_SIGNALLED | WSI_ES_STATE_RELEASE_MATERIALIZED | WSI_ES_STATE_ACQUIRE_SIGNALLED),
(WSI_ES_STATE_RELEASE_MATERIALIZED | WSI_ES_STATE_ACQUIRE_SIGNALLED),
(WSI_ES_STATE_RELEASE_MATERIALIZED | WSI_ES_STATE_RELEASE_SIGNALLED),
(WSI_ES_STATE_RELEASE_MATERIALIZED),
};
static uint32_t
wsi_drm_image_explicit_sync_state(struct vk_device *device, struct wsi_image *image)
{
if (image->explicit_sync[WSI_ES_RELEASE].timeline == 0) {
/* This image has never been used in a timeline.
* It must be free.
*/
return WSI_ES_STATE_RELEASE_SIGNALLED | WSI_ES_STATE_RELEASE_MATERIALIZED | WSI_ES_STATE_ACQUIRE_SIGNALLED;
}
uint64_t points[WSI_ES_COUNT] = { 0 };
uint32_t handles[WSI_ES_COUNT] = {
image->explicit_sync[WSI_ES_ACQUIRE].handle,
image->explicit_sync[WSI_ES_RELEASE].handle
};
int ret = drmSyncobjQuery(device->drm_fd, handles, points, WSI_ES_COUNT);
if (ret)
return 0;
uint32_t flags = 0;
if (points[WSI_ES_ACQUIRE] >= image->explicit_sync[WSI_ES_ACQUIRE].timeline) {
flags |= WSI_ES_STATE_ACQUIRE_SIGNALLED;
}
if (points[WSI_ES_RELEASE] >= image->explicit_sync[WSI_ES_RELEASE].timeline) {
flags |= WSI_ES_STATE_RELEASE_SIGNALLED | WSI_ES_STATE_RELEASE_MATERIALIZED;
} else {
uint32_t first_signalled;
ret = drmSyncobjTimelineWait(device->drm_fd, &handles[WSI_ES_RELEASE], &image->explicit_sync[WSI_ES_RELEASE].timeline, 1, 0, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE, &first_signalled);
if (ret == 0)
flags |= WSI_ES_STATE_RELEASE_MATERIALIZED;
}
return flags;
}
static uint64_t
wsi_drm_rel_timeout_to_abs(uint64_t rel_timeout_ns)
{
uint64_t cur_time_ns = os_time_get_nano();
/* Syncobj timeouts are signed */
return rel_timeout_ns > INT64_MAX - cur_time_ns
? INT64_MAX
: cur_time_ns + rel_timeout_ns;
}
VkResult
wsi_drm_wait_for_explicit_sync_release(struct wsi_swapchain *chain,
uint32_t image_count,
struct wsi_image **images,
uint64_t rel_timeout_ns,
uint32_t *image_index)
{
#ifdef HAVE_LIBDRM
STACK_ARRAY(uint32_t, handles, image_count);
STACK_ARRAY(uint64_t, points, image_count);
STACK_ARRAY(uint32_t, indices, image_count);
STACK_ARRAY(uint32_t, flags, image_count);
VK_FROM_HANDLE(vk_device, device, chain->device);
int ret = 0;
/* We don't need to wait for the merged timeline on the CPU,
* only on the GPU side of things.
*
* We already know that the CPU side for the acquire has materialized,
* for all images in this array.
* That's what "busy"/"free" essentially represents.
*/
uint32_t unacquired_image_count = 0;
for (uint32_t i = 0; i < image_count; i++) {
if (images[i]->acquired)
continue;
flags[unacquired_image_count] = wsi_drm_image_explicit_sync_state(device, images[i]);
handles[unacquired_image_count] = images[i]->explicit_sync[WSI_ES_RELEASE].handle;
points[unacquired_image_count] = images[i]->explicit_sync[WSI_ES_RELEASE].timeline;
indices[unacquired_image_count] = i;
unacquired_image_count++;
}
/* Handle the case where there are no images to possible acquire. */
if (!unacquired_image_count) {
ret = -ETIME;
goto done;
}
/* Find the most optimal image using the free levels above. */
for (uint32_t free_level_idx = 0; free_level_idx < ARRAY_SIZE(wsi_explicit_sync_free_levels); free_level_idx++) {
uint32_t free_level = wsi_explicit_sync_free_levels[free_level_idx];
uint64_t present_serial = UINT64_MAX;
for (uint32_t i = 0; i < unacquired_image_count; i++) {
/* Pick the image that was presented longest ago inside
* of this free level, so it has the highest chance of
* being totally free the soonest.
*/
if ((flags[i] & free_level) == free_level &&
images[indices[i]]->present_serial < present_serial) {
*image_index = indices[i];
present_serial = images[indices[i]]->present_serial;
}
}
if (present_serial != UINT64_MAX)
goto done;
}
/* Use DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE so we do not need to wait for the
* compositor's GPU work to be finished to acquire on the CPU side.
*
* We will forward the GPU signal to the VkSemaphore/VkFence of the acquire.
*/
uint32_t first_signalled;
ret = drmSyncobjTimelineWait(device->drm_fd, handles, points, unacquired_image_count,
wsi_drm_rel_timeout_to_abs(rel_timeout_ns),
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE,
&first_signalled);
/* Return the first image that materialized. */
if (ret != 0)
goto done;
*image_index = indices[first_signalled];
done:
STACK_ARRAY_FINISH(flags);
STACK_ARRAY_FINISH(indices);
STACK_ARRAY_FINISH(points);
STACK_ARRAY_FINISH(handles);
if (ret == 0)
return VK_SUCCESS;
else if (ret == -ETIME)
return rel_timeout_ns ? VK_TIMEOUT : VK_NOT_READY;
else
return VK_ERROR_OUT_OF_DATE_KHR;
#else
return VK_ERROR_FEATURE_NOT_PRESENT;
#endif
}

View file

@ -114,6 +114,21 @@ struct wsi_image_info {
struct wsi_image *image);
};
enum wsi_explicit_sync_timelines
{
WSI_ES_ACQUIRE,
WSI_ES_RELEASE,
WSI_ES_COUNT,
};
struct wsi_image_explicit_sync_timeline {
VkSemaphore semaphore;
uint64_t timeline;
int fd;
uint32_t handle;
};
enum wsi_swapchain_blit_type {
WSI_SWAPCHAIN_NO_BLIT,
WSI_SWAPCHAIN_BUFFER_BLIT,
@ -136,6 +151,8 @@ struct wsi_image {
bool acquired;
uint64_t present_serial;
struct wsi_image_explicit_sync_timeline explicit_sync[WSI_ES_COUNT];
#ifndef _WIN32
uint64_t drm_modifier;
#endif
@ -327,6 +344,26 @@ wsi_create_sync_for_dma_buf_wait(const struct wsi_swapchain *chain,
const struct wsi_image *image,
enum vk_sync_features sync_features,
struct vk_sync **sync_out);
VkResult
wsi_create_sync_for_image_syncobj(const struct wsi_swapchain *chain,
const struct wsi_image *image,
enum vk_sync_features req_features,
struct vk_sync **sync_out);
VkResult
wsi_create_image_explicit_sync_drm(const struct wsi_swapchain *chain,
struct wsi_image *image);
void
wsi_destroy_image_explicit_sync_drm(const struct wsi_swapchain *chain,
struct wsi_image *image);
VkResult
wsi_drm_wait_for_explicit_sync_release(struct wsi_swapchain *chain,
uint32_t image_count,
struct wsi_image **images,
uint64_t rel_timeout_ns,
uint32_t *image_index);
#endif
struct wsi_interface {