glthread: determine global locking once every 64 batches to fix get_time perf

This mitigates a large perf degradation when the clock source is HPET instead of TSC.
Just call get_time less frequently.

Fixes: 3ed141e9 - glthread: add a heuristic to stop locking global mutexes with multiple contexts
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8910

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24168>
(cherry picked from commit 6cda08416b)
This commit is contained in:
Marek Olšák 2023-07-15 08:22:37 -04:00 committed by Dylan Baker
parent 62e3b18ddd
commit eb64d60140
3 changed files with 28 additions and 7 deletions

View file

@ -184,7 +184,7 @@
"description": "glthread: determine global locking once every 64 batches to fix get_time perf",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "3ed141e9d80bc2174e34afafb13b5bf07c802ef0",
"notes": null

View file

@ -43,13 +43,8 @@
#include "state_tracker/st_context.h"
static void
glthread_unmarshal_batch(void *job, void *gdata, int thread_index)
glthread_update_global_locking(struct gl_context *ctx)
{
struct glthread_batch *batch = (struct glthread_batch*)job;
struct gl_context *ctx = batch->ctx;
unsigned pos = 0;
unsigned used = batch->used;
uint64_t *buffer = batch->buffer;
struct gl_shared_state *shared = ctx->Shared;
/* Determine if we should lock the global mutexes. */
@ -102,12 +97,34 @@ glthread_unmarshal_batch(void *job, void *gdata, int thread_index)
}
simple_mtx_unlock(&shared->Mutex);
ctx->GLThread.LockGlobalMutexes = lock_mutexes;
}
static void
glthread_unmarshal_batch(void *job, void *gdata, int thread_index)
{
struct glthread_batch *batch = (struct glthread_batch*)job;
struct gl_context *ctx = batch->ctx;
unsigned pos = 0;
unsigned used = batch->used;
uint64_t *buffer = batch->buffer;
struct gl_shared_state *shared = ctx->Shared;
/* Determine once every 64 batches whether shared mutexes should be locked.
* We have to do this less frequently because os_time_get_nano() is very
* expensive if the clock source is not TSC. See:
* https://gitlab.freedesktop.org/mesa/mesa/-/issues/8910
*/
if (ctx->GLThread.GlobalLockUpdateBatchCounter++ % 64 == 0)
glthread_update_global_locking(ctx);
/* Execute the GL calls. */
_glapi_set_dispatch(ctx->Dispatch.Current);
/* Here we lock the mutexes once globally if possible. If not, we just
* fallback to the individual API calls doing it.
*/
bool lock_mutexes = ctx->GLThread.LockGlobalMutexes;
if (lock_mutexes) {
_mesa_HashLockMutex(shared->BufferObjects);
ctx->BufferObjectsLocked = true;

View file

@ -273,6 +273,10 @@ struct glthread_state
/** The last added call of the given function. */
struct marshal_cmd_CallList *LastCallList;
struct marshal_cmd_BindBuffer *LastBindBuffer;
/** Global mutex update info. */
unsigned GlobalLockUpdateBatchCounter;
bool LockGlobalMutexes;
};
void _mesa_glthread_init(struct gl_context *ctx);