anv: Switch to truly asynchronous VM binding in Xe KMD

Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26805>
This commit is contained in:
José Roberto de Souza 2023-12-22 13:13:25 -08:00 committed by Marge Bot
parent 63abbb24ad
commit 3579413aed
2 changed files with 35 additions and 30 deletions

View file

@ -25,6 +25,7 @@
#include "anv_private.h"
#include "anv_measure.h"
#include "common/intel_bind_timeline.h"
#include "drm-uapi/xe_drm.h"
@ -40,22 +41,27 @@ xe_execute_simple_batch(struct anv_queue *queue,
queue->exec_queue_id;
struct drm_syncobj_create syncobj_create = {};
struct drm_syncobj_destroy syncobj_destroy = {};
struct drm_xe_sync syncs[2] = {};
VkResult result = VK_SUCCESS;
if (intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_CREATE, &syncobj_create))
return vk_errorf(device, VK_ERROR_UNKNOWN, "Unable to create sync obj");
struct drm_xe_sync sync = {
.type = DRM_XE_SYNC_TYPE_SYNCOBJ,
.flags = DRM_XE_SYNC_FLAG_SIGNAL,
.handle = syncobj_create.handle,
};
syncs[0].type = DRM_XE_SYNC_TYPE_SYNCOBJ;
syncs[0].flags = DRM_XE_SYNC_FLAG_SIGNAL;
syncs[0].handle = syncobj_create.handle;
/* vm bind sync */
syncs[1].type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ;
syncs[1].handle = intel_bind_timeline_get_syncobj(&device->bind_timeline);
syncs[1].timeline_value = intel_bind_timeline_get_last_point(&device->bind_timeline);
struct drm_xe_exec exec = {
.exec_queue_id = exec_queue_id,
.num_batch_buffer = 1,
.address = batch_bo->offset,
.num_syncs = 1,
.syncs = (uintptr_t)&sync,
.num_syncs = ARRAY_SIZE(syncs),
.syncs = (uintptr_t)syncs,
};
if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec)) {
@ -121,37 +127,35 @@ xe_exec_process_syncs(struct anv_queue *queue,
util_dynarray_num_elements(&utrace_submit->batch_bos, struct anv_bo *) == 0;
const uint32_t num_syncs = wait_count + signal_count + extra_sync_count +
(has_utrace_sync ? 1 : 0) +
((queue->sync && !is_companion_rcs_queue) ? 1 : 0);
if (!num_syncs)
return VK_SUCCESS;
((queue->sync && !is_companion_rcs_queue) ? 1 : 0) +
1 /* vm bind sync */;
struct drm_xe_sync *xe_syncs = vk_zalloc(&device->vk.alloc,
sizeof(*xe_syncs) * num_syncs, 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
struct drm_xe_sync *xe_sync;
if (!xe_syncs)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
uint32_t count = 0;
if (has_utrace_sync) {
struct drm_xe_sync *xe_sync = &xe_syncs[count++];
xe_sync = &xe_syncs[count++];
xe_exec_fill_sync(xe_sync, utrace_submit->sync, 0, TYPE_SIGNAL);
}
for (uint32_t i = 0; i < wait_count; i++) {
struct drm_xe_sync *xe_sync = &xe_syncs[count++];
const struct vk_sync_wait *vk_wait = &waits[i];
xe_sync = &xe_syncs[count++];
xe_exec_fill_sync(xe_sync, vk_wait->sync, vk_wait->wait_value,
TYPE_WAIT);
}
for (uint32_t i = 0; i < signal_count; i++) {
struct drm_xe_sync *xe_sync = &xe_syncs[count++];
const struct vk_sync_signal *vk_signal = &signals[i];
xe_sync = &xe_syncs[count++];
xe_exec_fill_sync(xe_sync, vk_signal->sync, vk_signal->signal_value,
TYPE_SIGNAL);
}
@ -160,12 +164,17 @@ xe_exec_process_syncs(struct anv_queue *queue,
xe_syncs[count++] = extra_syncs[i];
if (queue->sync && !is_companion_rcs_queue) {
struct drm_xe_sync *xe_sync = &xe_syncs[count++];
xe_sync = &xe_syncs[count++];
xe_exec_fill_sync(xe_sync, queue->sync, 0,
TYPE_SIGNAL);
}
/* vm bind sync */
xe_sync = &xe_syncs[count++];
xe_sync->handle = intel_bind_timeline_get_syncobj(&device->bind_timeline);
xe_sync->type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ;
xe_sync->timeline_value = intel_bind_timeline_get_last_point(&device->bind_timeline);
assert(count == num_syncs);
*ret = xe_syncs;
*ret_count = num_syncs;
@ -240,9 +249,13 @@ xe_queue_exec_utrace_locked(struct anv_queue *queue,
struct anv_utrace_submit *utrace_submit)
{
struct anv_device *device = queue->device;
struct drm_xe_sync xe_sync = {};
struct drm_xe_sync xe_syncs[2] = {};
xe_exec_fill_sync(&xe_sync, utrace_submit->sync, 0, TYPE_SIGNAL);
xe_exec_fill_sync(&xe_syncs[0], utrace_submit->sync, 0, TYPE_SIGNAL);
xe_syncs[1].type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ;
xe_syncs[1].handle = intel_bind_timeline_get_syncobj(&device->bind_timeline);
xe_syncs[1].timeline_value = intel_bind_timeline_get_last_point(&device->bind_timeline);
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
if (device->physical->memory.need_flush &&
@ -257,8 +270,8 @@ xe_queue_exec_utrace_locked(struct anv_queue *queue,
struct drm_xe_exec exec = {
.exec_queue_id = queue->exec_queue_id,
.num_batch_buffer = 1,
.syncs = (uintptr_t)&xe_sync,
.num_syncs = 1,
.syncs = (uintptr_t)xe_syncs,
.num_syncs = ARRAY_SIZE(xe_syncs),
.address = batch_bo->offset,
};
if (likely(!device->info->no_hw)) {

View file

@ -127,11 +127,6 @@ xe_vm_bind_op(struct anv_device *device,
.num_syncs = 1,
.syncs = (uintptr_t)&xe_sync,
};
struct drm_syncobj_timeline_wait syncobj_wait = {
.timeout_nsec = INT64_MAX,
.handles = (uintptr_t)&xe_sync.handle,
.count_handles = 1,
};
int ret;
STACK_ARRAY(struct drm_xe_vm_bind_op, xe_binds_stackarray,
@ -192,9 +187,6 @@ xe_vm_bind_op(struct anv_device *device,
ANV_RMV(vm_binds, device, submit->binds, submit->binds_len);
syncobj_wait.points = (uintptr_t)&xe_sync.timeline_value;
ret = intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, &syncobj_wait);
out_stackarray:
STACK_ARRAY_FINISH(xe_binds_stackarray);