tu: Use the common syncobj sync type for the layered timelines.

Everyone else doing emulated timelines is doing it on top of their base
binary type, and that appears to have been the intent of the shared
emulation layer.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36563>
This commit is contained in:
Emma Anholt 2025-08-04 12:30:46 -07:00 committed by Marge Bot
parent 9e61e72e9f
commit 00336bc9c4
5 changed files with 6 additions and 307 deletions

View file

@ -25,22 +25,6 @@ enum tu_bo_alloc_flags {
TU_BO_ALLOC_SHAREABLE = 1 << 5,
};
/* Define tu_timeline_sync type based on drm syncobj for a point type
* for vk_sync_timeline, and the logic to handle is mostly copied from
* anv_bo_sync since it seems it can be used by similar way to anv.
*/
enum tu_timeline_sync_state {
/** Indicates that this is a new (or newly reset fence) */
TU_TIMELINE_SYNC_STATE_RESET,
/** Indicates that this fence has been submitted to the GPU but is still
* (as far as we know) in use by the GPU.
*/
TU_TIMELINE_SYNC_STATE_SUBMITTED,
TU_TIMELINE_SYNC_STATE_SIGNALED,
};
enum tu_mem_sync_op {
TU_MEM_SYNC_CACHE_TO_GPU,
TU_MEM_SYNC_CACHE_FROM_GPU,
@ -129,13 +113,6 @@ struct tu_zombie_vma {
uint64_t size;
};
struct tu_timeline_sync {
struct vk_sync base;
enum tu_timeline_sync_state state;
uint32_t syncobj;
};
VkResult
tu_bo_init_new_explicit_iova(struct tu_device *dev,
struct vk_object_base *base,

View file

@ -175,206 +175,3 @@ msm_submit_add_entries(struct tu_device *device, void *_submit,
bos[i] = entries[i].bo;
}
}
uint32_t
tu_syncobj_from_vk_sync(struct vk_sync *sync)
{
uint32_t syncobj = -1;
if (vk_sync_is_tu_timeline_sync(sync)) {
syncobj = to_tu_timeline_sync(sync)->syncobj;
} else if (vk_sync_type_is_drm_syncobj(sync->type)) {
syncobj = vk_sync_as_drm_syncobj(sync)->syncobj;
}
assert(syncobj != -1);
return syncobj;
}
static VkResult
tu_timeline_sync_init(struct vk_device *vk_device,
struct vk_sync *vk_sync,
uint64_t initial_value)
{
struct tu_device *device = container_of(vk_device, struct tu_device, vk);
struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
uint32_t flags = 0;
int err = vk_device->sync->create(vk_device->sync, flags, &sync->syncobj);
if (err < 0) {
return vk_error(device, VK_ERROR_DEVICE_LOST);
}
sync->state = initial_value ? TU_TIMELINE_SYNC_STATE_SIGNALED :
TU_TIMELINE_SYNC_STATE_RESET;
return VK_SUCCESS;
}
static void
tu_timeline_sync_finish(struct vk_device *vk_device,
struct vk_sync *vk_sync)
{
struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
ASSERTED int err = vk_device->sync->destroy(vk_device->sync, sync->syncobj);
assert(err == 0);
}
static VkResult
tu_timeline_sync_reset(struct vk_device *vk_device,
struct vk_sync *vk_sync)
{
struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
int err = vk_device->sync->reset(vk_device->sync, &sync->syncobj, 1);
if (err) {
return vk_errorf(dev, VK_ERROR_UNKNOWN,
"DRM_IOCTL_SYNCOBJ_RESET failed: %m");
} else {
sync->state = TU_TIMELINE_SYNC_STATE_RESET;
}
return VK_SUCCESS;
}
static VkResult
drm_syncobj_wait(struct tu_device *device,
uint32_t *handles, uint32_t count_handles,
uint64_t timeout_nsec, bool wait_all)
{
MESA_TRACE_FUNC();
struct util_sync_provider *sync = device->vk.sync;
uint32_t syncobj_wait_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
if (wait_all) syncobj_wait_flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
/* syncobj absolute timeouts are signed. clamp OS_TIMEOUT_INFINITE down. */
timeout_nsec = MIN2(timeout_nsec, (uint64_t)INT64_MAX);
int err = sync->wait(sync, handles,
count_handles, timeout_nsec,
syncobj_wait_flags,
NULL /* first_signaled */);
if (err && errno == ETIME) {
return VK_TIMEOUT;
} else if (err) {
return vk_errorf(device, VK_ERROR_UNKNOWN,
"DRM_IOCTL_SYNCOBJ_WAIT failed: %m");
}
return VK_SUCCESS;
}
/* Based on anv_bo_sync_wait */
static VkResult
tu_timeline_sync_wait(struct vk_device *vk_device,
uint32_t wait_count,
const struct vk_sync_wait *waits,
enum vk_sync_wait_flags wait_flags,
uint64_t abs_timeout_ns)
{
MESA_TRACE_FUNC();
struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
bool wait_all = !(wait_flags & VK_SYNC_WAIT_ANY);
uint32_t handles[wait_count];
uint32_t submit_count;
VkResult ret = VK_SUCCESS;
uint32_t pending = wait_count;
struct tu_timeline_sync *submitted_syncs[wait_count];
while (pending) {
pending = 0;
submit_count = 0;
for (unsigned i = 0; i < wait_count; ++i) {
struct tu_timeline_sync *sync = to_tu_timeline_sync(waits[i].sync);
if (sync->state == TU_TIMELINE_SYNC_STATE_RESET) {
assert(!(wait_flags & VK_SYNC_WAIT_PENDING));
pending++;
} else if (sync->state == TU_TIMELINE_SYNC_STATE_SIGNALED) {
if (wait_flags & VK_SYNC_WAIT_ANY)
return VK_SUCCESS;
} else if (sync->state == TU_TIMELINE_SYNC_STATE_SUBMITTED) {
if (!(wait_flags & VK_SYNC_WAIT_PENDING)) {
handles[submit_count] = sync->syncobj;
submitted_syncs[submit_count++] = sync;
}
}
}
if (submit_count > 0) {
do {
ret = drm_syncobj_wait(dev, handles, submit_count, abs_timeout_ns, wait_all);
} while (ret == VK_TIMEOUT && os_time_get_nano() < abs_timeout_ns);
if (ret == VK_SUCCESS) {
for (unsigned i = 0; i < submit_count; ++i) {
struct tu_timeline_sync *sync = submitted_syncs[i];
sync->state = TU_TIMELINE_SYNC_STATE_SIGNALED;
}
} else {
/* return error covering timeout */
return ret;
}
} else if (pending > 0) {
/* If we've hit this then someone decided to vkWaitForFences before
* they've actually submitted any of them to a queue. This is a
* fairly pessimal case, so it's ok to lock here and use a standard
* pthreads condition variable.
*/
pthread_mutex_lock(&dev->submit_mutex);
/* It's possible that some of the fences have changed state since the
* last time we checked. Now that we have the lock, check for
* pending fences again and don't wait if it's changed.
*/
uint32_t now_pending = 0;
for (uint32_t i = 0; i < wait_count; i++) {
struct tu_timeline_sync *sync = to_tu_timeline_sync(waits[i].sync);
if (sync->state == TU_TIMELINE_SYNC_STATE_RESET)
now_pending++;
}
assert(now_pending <= pending);
if (now_pending == pending) {
struct timespec abstime = {
.tv_sec = abs_timeout_ns / NSEC_PER_SEC,
.tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
};
ASSERTED int ret;
ret = pthread_cond_timedwait(&dev->timeline_cond,
&dev->submit_mutex, &abstime);
assert(ret != EINVAL);
if (os_time_get_nano() >= abs_timeout_ns) {
pthread_mutex_unlock(&dev->submit_mutex);
return VK_TIMEOUT;
}
}
pthread_mutex_unlock(&dev->submit_mutex);
}
}
return ret;
}
/* Emulated timeline support on top of binary sync drm syncobjs, see
* https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14105
*/
const struct vk_sync_type tu_timeline_sync_type = {
.size = sizeof(struct tu_timeline_sync),
.features = (enum vk_sync_features)(
VK_SYNC_FEATURE_BINARY | VK_SYNC_FEATURE_GPU_WAIT |
VK_SYNC_FEATURE_GPU_MULTI_WAIT | VK_SYNC_FEATURE_CPU_WAIT |
VK_SYNC_FEATURE_CPU_RESET | VK_SYNC_FEATURE_WAIT_ANY |
VK_SYNC_FEATURE_WAIT_PENDING),
.init = tu_timeline_sync_init,
.finish = tu_timeline_sync_finish,
.reset = tu_timeline_sync_reset,
.wait_many = tu_timeline_sync_wait,
};

View file

@ -49,21 +49,4 @@ fence_before(uint32_t a, uint32_t b)
return (int32_t)(a - b) < 0;
}
extern const struct vk_sync_type tu_timeline_sync_type;
static inline bool
vk_sync_is_tu_timeline_sync(const struct vk_sync *sync)
{
return sync->type == &tu_timeline_sync_type;
}
static inline struct tu_timeline_sync *
to_tu_timeline_sync(struct vk_sync *sync)
{
assert(sync->type == &tu_timeline_sync_type);
return container_of(sync, struct tu_timeline_sync, base);
}
uint32_t tu_syncobj_from_vk_sync(struct vk_sync *sync);
#endif

View file

@ -895,7 +895,7 @@ msm_queue_submit(struct tu_queue *queue, void *_submit,
struct vk_sync *sync = waits[i].sync;
in_syncobjs[i] = (struct drm_msm_gem_submit_syncobj) {
.handle = tu_syncobj_from_vk_sync(sync),
.handle = vk_sync_as_drm_syncobj(sync)->syncobj,
.flags = 0,
.point = waits[i].wait_value,
};
@ -905,7 +905,7 @@ msm_queue_submit(struct tu_queue *queue, void *_submit,
struct vk_sync *sync = signals[i].sync;
out_syncobjs[i] = (struct drm_msm_gem_submit_syncobj) {
.handle = tu_syncobj_from_vk_sync(sync),
.handle = vk_sync_as_drm_syncobj(sync)->syncobj,
.flags = 0,
.point = signals[i].signal_value,
};
@ -976,35 +976,6 @@ msm_queue_submit(struct tu_queue *queue, void *_submit,
u_trace_submission_data->gpu_ts_offset = gpu_offset;
}
for (uint32_t i = 0; i < wait_count; i++) {
if (!vk_sync_is_tu_timeline_sync(waits[i].sync))
continue;
struct tu_timeline_sync *sync =
container_of(waits[i].sync, struct tu_timeline_sync, base);
assert(sync->state != TU_TIMELINE_SYNC_STATE_RESET);
/* Set SIGNALED to the state of the wait timeline sync since this means the syncobj
* is done and ready again so this can be garbage-collectioned later.
*/
sync->state = TU_TIMELINE_SYNC_STATE_SIGNALED;
}
for (uint32_t i = 0; i < signal_count; i++) {
if (!vk_sync_is_tu_timeline_sync(signals[i].sync))
continue;
struct tu_timeline_sync *sync =
container_of(signals[i].sync, struct tu_timeline_sync, base);
assert(sync->state == TU_TIMELINE_SYNC_STATE_RESET);
/* Set SUBMITTED to the state of the signal timeline sync so we could wait for
* this timeline sync until completed if necessary.
*/
sync->state = TU_TIMELINE_SYNC_STATE_SUBMITTED;
}
fail_submit:
vk_free(&queue->device->vk.alloc, out_syncobjs);
fail_out_syncobjs:
@ -1123,7 +1094,7 @@ tu_knl_drm_msm_load(struct tu_instance *instance,
device->syncobj_type = vk_drm_syncobj_get_type(fd);
/* we don't support DRM_CAP_SYNCOBJ_TIMELINE, but drm-shim does */
if (!(device->syncobj_type.features & VK_SYNC_FEATURE_TIMELINE))
device->timeline_type = vk_sync_timeline_get_type(&tu_timeline_sync_type);
device->timeline_type = vk_sync_timeline_get_type(&device->syncobj_type);
device->sync_types[0] = &device->syncobj_type;
device->sync_types[1] = &device->timeline_type.sync;

View file

@ -999,7 +999,7 @@ virtio_queue_submit(struct tu_queue *queue, void *_submit,
struct vk_sync *sync = waits[i].sync;
in_syncobjs[i] = (struct drm_virtgpu_execbuffer_syncobj) {
.handle = tu_syncobj_from_vk_sync(sync),
.handle = vk_sync_as_drm_syncobj(sync)->syncobj,
.flags = 0,
.point = waits[i].wait_value,
};
@ -1009,7 +1009,7 @@ virtio_queue_submit(struct tu_queue *queue, void *_submit,
struct vk_sync *sync = signals[i].sync;
out_syncobjs[i] = (struct drm_virtgpu_execbuffer_syncobj) {
.handle = tu_syncobj_from_vk_sync(sync),
.handle = vk_sync_as_drm_syncobj(sync)->syncobj,
.flags = 0,
.point = signals[i].signal_value,
};
@ -1091,35 +1091,6 @@ virtio_queue_submit(struct tu_queue *queue, void *_submit,
u_trace_submission_data->gpu_ts_offset = gpu_offset;
}
for (uint32_t i = 0; i < wait_count; i++) {
if (!vk_sync_is_tu_timeline_sync(waits[i].sync))
continue;
struct tu_timeline_sync *sync =
container_of(waits[i].sync, struct tu_timeline_sync, base);
assert(sync->state != TU_TIMELINE_SYNC_STATE_RESET);
/* Set SIGNALED to the state of the wait timeline sync since this means the syncobj
* is done and ready again so this can be garbage-collectioned later.
*/
sync->state = TU_TIMELINE_SYNC_STATE_SIGNALED;
}
for (uint32_t i = 0; i < signal_count; i++) {
if (!vk_sync_is_tu_timeline_sync(signals[i].sync))
continue;
struct tu_timeline_sync *sync =
container_of(signals[i].sync, struct tu_timeline_sync, base);
assert(sync->state == TU_TIMELINE_SYNC_STATE_RESET);
/* Set SUBMITTED to the state of the signal timeline sync so we could wait for
* this timeline sync until completed if necessary.
*/
sync->state = TU_TIMELINE_SYNC_STATE_SUBMITTED;
}
fail_submit:
vk_free(&queue->device->vk.alloc, req);
fail_alloc_req:
@ -1295,7 +1266,7 @@ tu_knl_drm_virtio_load(struct tu_instance *instance,
/* we don't support DRM_CAP_SYNCOBJ_TIMELINE, but drm-shim does */
if (!(device->syncobj_type.features & VK_SYNC_FEATURE_TIMELINE))
device->timeline_type = vk_sync_timeline_get_type(&tu_timeline_sync_type);
device->timeline_type = vk_sync_timeline_get_type(&device->syncobj_type);
device->sync_types[0] = &device->syncobj_type;
device->sync_types[1] = &device->timeline_type.sync;