From 665d30b5448f606d7a79afe0596c3a2264ab3e15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Tue, 6 Feb 2024 10:27:54 -0800 Subject: [PATCH] iris: Wait for drm_xe_exec_queue to be idle before destroying it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Xe KMD don't refcount anything, so resources could be freed while they are still in use if we don't wait for exec_queue to be idle. This issue was found with Xe KMD error capture, VM was already destroyed when it attemped to capture error state but it can also happen in applications that did not hang. This fixed the '*ERROR* GT0: TLB invalidation' errors when running piglit all test list. Signed-off-by: José Roberto de Souza Reviewed-by: Lionel Landwerlin Part-of: --- src/gallium/drivers/iris/iris_batch.c | 6 +-- src/gallium/drivers/iris/iris_batch.h | 3 ++ src/gallium/drivers/iris/xe/iris_batch.c | 51 +++++++++++++++++++++++- 3 files changed, 55 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/iris/iris_batch.c b/src/gallium/drivers/iris/iris_batch.c index 1505fe37f79..2d61048282c 100644 --- a/src/gallium/drivers/iris/iris_batch.c +++ b/src/gallium/drivers/iris/iris_batch.c @@ -862,8 +862,8 @@ iris_batch_name_to_string(enum iris_batch_name name) return names[name]; } -static inline bool -context_or_exec_queue_was_banned(struct iris_bufmgr *bufmgr, int ret) +bool +iris_batch_is_banned(struct iris_bufmgr *bufmgr, int ret) { enum intel_kmd_type kmd_type = iris_bufmgr_get_device_info(bufmgr)->kmd_type; @@ -960,7 +960,7 @@ _iris_batch_flush(struct iris_batch *batch, const char *file, int line) * has been lost and needs to be re-initialized. If this succeeds, * dubiously claim success... */ - if (ret && context_or_exec_queue_was_banned(bufmgr, ret)) { + if (ret && iris_batch_is_banned(bufmgr, ret)) { enum pipe_reset_status status = iris_batch_check_for_reset(batch); if (status != PIPE_NO_RESET || ice->context_reset_signaled) diff --git a/src/gallium/drivers/iris/iris_batch.h b/src/gallium/drivers/iris/iris_batch.h index 341a3c9fe5e..f0cfe4fb031 100644 --- a/src/gallium/drivers/iris/iris_batch.h +++ b/src/gallium/drivers/iris/iris_batch.h @@ -446,6 +446,9 @@ iris_batch_mark_reset_sync(struct iris_batch *batch) const char * iris_batch_name_to_string(enum iris_batch_name name); +bool +iris_batch_is_banned(struct iris_bufmgr *bufmgr, int ret); + #define iris_foreach_batch(ice, batch) \ for (struct iris_batch *batch = &ice->batches[0]; \ batch <= &ice->batches[((struct iris_screen *)ice->ctx.screen)->devinfo->ver >= 12 ? IRIS_BATCH_BLITTER : IRIS_BATCH_COMPUTE]; \ diff --git a/src/gallium/drivers/iris/xe/iris_batch.c b/src/gallium/drivers/iris/xe/iris_batch.c index 0c0fc208cb9..7e09d352773 100644 --- a/src/gallium/drivers/iris/xe/iris_batch.c +++ b/src/gallium/drivers/iris/xe/iris_batch.c @@ -151,7 +151,45 @@ void iris_xe_init_batches(struct iris_context *ice) free(engines_info); } -void iris_xe_destroy_batch(struct iris_batch *batch) +/* + * Wait for all previous DRM_IOCTL_XE_EXEC calls over the + * drm_xe_exec_queue in this iris_batch to complete. + **/ +static void +iris_xe_wait_exec_queue_idle(struct iris_batch *batch) +{ + struct iris_bufmgr *bufmgr = batch->screen->bufmgr; + struct iris_syncobj *syncobj = iris_create_syncobj(bufmgr); + struct drm_xe_sync xe_sync = { + .type = DRM_XE_SYNC_TYPE_SYNCOBJ, + .flags = DRM_XE_SYNC_FLAG_SIGNAL, + }; + struct drm_xe_exec exec = { + .exec_queue_id = batch->xe.exec_queue_id, + .num_syncs = 1, + .syncs = (uintptr_t)&xe_sync, + }; + int ret; + + if (!syncobj) + return; + + xe_sync.handle = syncobj->handle; + /* Using the special exec.num_batch_buffer == 0 handling to get syncobj + * signaled when the last DRM_IOCTL_XE_EXEC is completed. + */ + ret = intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_XE_EXEC, &exec); + if (ret == 0) { + assert(iris_wait_syncobj(bufmgr, syncobj, INT64_MAX)); + } else { + assert(iris_batch_is_banned(bufmgr, errno) == true); + } + + iris_syncobj_destroy(bufmgr, syncobj); +} + +static void +iris_xe_destroy_exec_queue(struct iris_batch *batch) { struct iris_screen *screen = batch->screen; struct iris_bufmgr *bufmgr = screen->bufmgr; @@ -165,6 +203,15 @@ void iris_xe_destroy_batch(struct iris_batch *batch) assert(ret == 0); } +void iris_xe_destroy_batch(struct iris_batch *batch) +{ + /* Xe KMD don't refcount anything, so resources could be freed while they + * are still in use if we don't wait for exec_queue to be idle. + */ + iris_xe_wait_exec_queue_idle(batch); + iris_xe_destroy_exec_queue(batch); +} + bool iris_xe_replace_batch(struct iris_batch *batch) { enum intel_engine_class engine_classes[IRIS_BATCH_COUNT]; @@ -184,7 +231,7 @@ bool iris_xe_replace_batch(struct iris_batch *batch) ret = iris_xe_init_batch(bufmgr, engines_info, engine_classes[batch->name], ice->priority, &new_exec_queue_id); if (ret) { - iris_xe_destroy_batch(batch); + iris_xe_destroy_exec_queue(batch); batch->xe.exec_queue_id = new_exec_queue_id; iris_lost_context_state(batch); }