glthread: determine global locking once every 64 batches to fix get_time perf

This mitigates a large perf degradation when the clock source is HPET instead of TSC. Just call get_time less frequently. Fixes: 3ed141e9 - glthread: add a heuristic to stop locking global mutexes with multiple contexts Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8910 Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24168> (cherry picked from commit 6cda08416b)
2026-05-05 18:18:06 +02:00 · 2023-07-15 08:22:37 -04:00 · 2023-07-15 08:22:37 -04:00 · eb64d60140
commit eb64d60140
parent 62e3b18ddd
3 changed files with 28 additions and 7 deletions
--- a/.pick_status.json
+++ b/.pick_status.json
@ -184,7 +184,7 @@
        "description": "glthread: determine global locking once every 64 batches to fix get_time perf",
        "nominated": true,
        "nomination_type": 1,
-        "resolution": 0,
+        "resolution": 1,
        "main_sha": null,
        "because_sha": "3ed141e9d80bc2174e34afafb13b5bf07c802ef0",
        "notes": null
--- a/src/mesa/main/glthread.c
+++ b/src/mesa/main/glthread.c
@ -43,13 +43,8 @@
 #include "state_tracker/st_context.h"

 static void
-glthread_unmarshal_batch(void *job, void *gdata, int thread_index)
+glthread_update_global_locking(struct gl_context *ctx)
 {
-   struct glthread_batch *batch = (struct glthread_batch*)job;
-   struct gl_context *ctx = batch->ctx;
-   unsigned pos = 0;
-   unsigned used = batch->used;
-   uint64_t *buffer = batch->buffer;
   struct gl_shared_state *shared = ctx->Shared;

   /* Determine if we should lock the global mutexes. */
@ -102,12 +97,34 @@ glthread_unmarshal_batch(void *job, void *gdata, int thread_index)
   }
   simple_mtx_unlock(&shared->Mutex);

+   ctx->GLThread.LockGlobalMutexes = lock_mutexes;
+}
+
+static void
+glthread_unmarshal_batch(void *job, void *gdata, int thread_index)
+{
+   struct glthread_batch *batch = (struct glthread_batch*)job;
+   struct gl_context *ctx = batch->ctx;
+   unsigned pos = 0;
+   unsigned used = batch->used;
+   uint64_t *buffer = batch->buffer;
+   struct gl_shared_state *shared = ctx->Shared;
+
+   /* Determine once every 64 batches whether shared mutexes should be locked.
+    * We have to do this less frequently because os_time_get_nano() is very
+    * expensive if the clock source is not TSC. See:
+    *    https://gitlab.freedesktop.org/mesa/mesa/-/issues/8910
+    */
+   if (ctx->GLThread.GlobalLockUpdateBatchCounter++ % 64 == 0)
+      glthread_update_global_locking(ctx);
+
   /* Execute the GL calls. */
   _glapi_set_dispatch(ctx->Dispatch.Current);

   /* Here we lock the mutexes once globally if possible. If not, we just
    * fallback to the individual API calls doing it.
    */
+   bool lock_mutexes = ctx->GLThread.LockGlobalMutexes;
   if (lock_mutexes) {
      _mesa_HashLockMutex(shared->BufferObjects);
      ctx->BufferObjectsLocked = true;
--- a/src/mesa/main/glthread.h
+++ b/src/mesa/main/glthread.h
@ -273,6 +273,10 @@ struct glthread_state
   /** The last added call of the given function. */
   struct marshal_cmd_CallList *LastCallList;
   struct marshal_cmd_BindBuffer *LastBindBuffer;
+
+   /** Global mutex update info. */
+   unsigned GlobalLockUpdateBatchCounter;
+   bool LockGlobalMutexes;
 };

 void _mesa_glthread_init(struct gl_context *ctx);