iris: track reset signalling instead of replacing the context

Instead of creating a new context when a reset is encountered, we now track whether the reset was reported back to the application. According to the spec, the application should poll the reset status and recreate the context when NO_ERROR encountered after a RESET. From the EXT_robustness spec: 5. How should the application react to a reset context event? RESOLVED: For this extension, the application is expected to query the reset status until NO_ERROR is returned. If a reset is encountered, at least one *RESET* status will be returned. Once NO_ERROR is again encountered, the application can safely destroy the old context and create a new one. Signed-off-by: Rohan Garg <rohan.garg@intel.com> Reviewed-by: José Roberto de Souza <jose.souza@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24224>
2026-01-06 02:20:11 +01:00 · 2023-07-27 12:07:54 +02:00 · 2023-07-27 12:07:54 +02:00 · 2756534660
commit 2756534660
parent e52f7d5cb9
3 changed files with 16 additions and 11 deletions
--- a/src/gallium/drivers/iris/iris_batch.c
+++ b/src/gallium/drivers/iris/iris_batch.c
@ -697,18 +697,19 @@ iris_batch_check_for_reset(struct iris_batch *batch)
 {
   struct iris_screen *screen = batch->screen;
   struct iris_bufmgr *bufmgr = screen->bufmgr;
+   struct iris_context *ice = batch->ice;
   const struct iris_kmd_backend *backend;
-   enum pipe_reset_status status;
+   enum pipe_reset_status status = PIPE_NO_RESET;
+
+   /* Banned context was already signalled to application */
+   if (ice->context_reset_signaled)
+      return status;

   backend = iris_bufmgr_get_kernel_driver_backend(bufmgr);
   status = backend->batch_check_for_reset(batch);
-   if (status != PIPE_NO_RESET) {
-      /* Our context is likely banned, or at least in an unknown state.
-       * Throw it away and start with a fresh context.  Ideally this may
-       * catch the problem before our next execbuf fails with -EIO.
-       */
-      replace_kernel_ctx(batch);
-   }
+
+   if (status != PIPE_NO_RESET)
+      ice->context_reset_signaled = true;

   return status;
 }
@ -956,6 +957,10 @@ _iris_batch_flush(struct iris_batch *batch, const char *file, int line)
    */
   if (ret && context_or_engine_was_banned(bufmgr, ret)) {
      enum pipe_reset_status status = iris_batch_check_for_reset(batch);
+
+      if (status != PIPE_NO_RESET || ice->context_reset_signaled)
+         replace_kernel_ctx(batch);
+
      if (batch->reset->reset) {
         /* Tell gallium frontends the device is lost and it was our fault. */
         batch->reset->reset(batch->reset->data, status);
--- a/src/gallium/drivers/iris/iris_context.c
+++ b/src/gallium/drivers/iris/iris_context.c
@ -99,9 +99,6 @@ iris_get_device_reset_status(struct pipe_context *ctx)
    * worst status (if one was guilty, proclaim guilt).
    */
   iris_foreach_batch(ice, batch) {
-      /* This will also recreate the hardware contexts as necessary, so any
-       * future queries will show no resets.  We only want to report once.
-       */
      enum pipe_reset_status batch_reset =
         iris_batch_check_for_reset(batch);

--- a/src/gallium/drivers/iris/iris_context.h
+++ b/src/gallium/drivers/iris/iris_context.h
@ -637,6 +637,9 @@ struct iris_context {
   /** Whether the context protected (through EGL_EXT_protected_content) */
   bool protected;

+   /** Whether a banned context was already signalled */
+   bool context_reset_signaled;
+
   /** A device reset status callback for notifying that the GPU is hosed. */
   struct pipe_device_reset_callback reset;