asahi: Do not sync against our own queue

We previously introduced cross-context sync points to make ordering work with multiple queues. Unfortunately, that also adds a CPU round trip in the kernel when a single context flushes and then keeps submitting, since it introduces a sync against itself. That's pointless. To fix this without introducing races, on flush, we check the previous sync point. If it's foreign, we record it, and we also keep track of our last local sync point. Then, when waiting, if we're about to wait on our last flush sync point from our own queue, we instead wait for the foreign one. A foreign sync after that will cause the equality check to fail and future submits from this queue to sync against the most up to date point, and the next flush will then record it as the last known foreign sync point for this queue (and continue flushing against it until another foreign queue flushes again). Fixes glmark2 perf regression (particularly with `build` and similar high-FPS tests). Signed-off-by: Asahi Lina <lina@asahilina.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30633>
2026-05-09 04:38:03 +02:00 · 2024-07-27 23:45:10 +09:00 · 2024-07-27 23:45:10 +09:00 · 54cec6ae30
commit 54cec6ae30
parent c7994a2955
3 changed files with 35 additions and 1 deletions
--- a/src/gallium/drivers/asahi/agx_batch.c
+++ b/src/gallium/drivers/asahi/agx_batch.c
@ -763,6 +763,16 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
   struct agx_bo **shared_bos = malloc(max_syncs * sizeof(struct agx_bo *));

   uint64_t wait_seqid = p_atomic_read(&screen->flush_wait_seqid);
+
+   /* Elide syncing against our own queue */
+   if (wait_seqid && wait_seqid == ctx->flush_my_seqid) {
+      batch_debug(batch,
+                  "Wait sync point %" PRIu64 " is ours, waiting on %" PRIu64
+                  " instead",
+                  wait_seqid, ctx->flush_other_seqid);
+      wait_seqid = ctx->flush_other_seqid;
+   }
+
   uint64_t seqid = p_atomic_inc_return(&screen->flush_cur_seqid);
   assert(seqid > wait_seqid);

--- a/src/gallium/drivers/asahi/agx_pipe.c
+++ b/src/gallium/drivers/asahi/agx_pipe.c
@ -1562,7 +1562,8 @@ agx_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,

   agx_flush_all(ctx, "Gallium flush");

-   if (!(flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC))) {
+   if (!(flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC)) &&
+       ctx->flush_last_seqid) {
      /* Ensure other contexts in this screen serialize against the last
       * submission (and all prior submissions).
       */
@ -1580,6 +1581,27 @@ agx_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
       */

      simple_mtx_unlock(&screen->flush_seqid_lock);
+
+      /* Optimization: Avoid serializing against our own queue by
+       * recording the last seen foreign seqid when flushing, and our own
+       * flush seqid. If we then try to sync against our own seqid, we'll
+       * instead sync against the last possible foreign one. This is *not*
+       * the `val` we got above, because another context might flush with a
+       * seqid between `val` and `flush_last_seqid` (which would not update
+       * `flush_wait_seqid` per the logic above). This is somewhat
+       * conservative: it means that if *any* foreign context flushes, then
+       * on next flush of this context we will start waiting for *all*
+       * prior submits on *all* contexts (even if unflushed) at that point,
+       * including any local submissions prior to the latest one. That's
+       * probably fine, it creates a one-time "wait for the second-previous
+       * batch" wait on this queue but that still allows for at least
+       * the previous batch to pipeline on the GPU and it's one-time
+       * until another foreign flush happens. Phew.
+       */
+      if (val && val != ctx->flush_my_seqid)
+         ctx->flush_other_seqid = ctx->flush_last_seqid - 1;
+
+      ctx->flush_my_seqid = ctx->flush_last_seqid;
   }

   /* At this point all pending work has been submitted. Since jobs are
--- a/src/gallium/drivers/asahi/agx_state.h
+++ b/src/gallium/drivers/asahi/agx_state.h
@ -706,6 +706,8 @@ struct agx_context {
   int in_sync_fd;
   uint32_t in_sync_obj;
   uint64_t flush_last_seqid;
+   uint64_t flush_my_seqid;
+   uint64_t flush_other_seqid;

   struct agx_scratch scratch_vs;
   struct agx_scratch scratch_fs;