mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 05:18:08 +02:00
glthread: pin driver threads to the same L3 as the main thread regularly
This improves performance on my Ryzen 3900X, which has 4 L3 caches and 6 threads per L3. The best improvement is 33% if the kernel CPU scheduler doesn't move the main thread too often. v2: pin only once in 128 batch flushes Acked-by: Jose Fonseca <jfonseca@vmware.com> Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7054>
This commit is contained in:
parent
d8ea509965
commit
5957b0c162
4 changed files with 38 additions and 0 deletions
|
|
@ -1326,6 +1326,8 @@ struct dd_function_table {
|
|||
void (*SetMaxShaderCompilerThreads)(struct gl_context *ctx, unsigned count);
|
||||
bool (*GetShaderProgramCompletionStatus)(struct gl_context *ctx,
|
||||
struct gl_shader_program *shprog);
|
||||
|
||||
void (*PinDriverToL3Cache)(struct gl_context *ctx, unsigned L3_cache);
|
||||
};
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@
|
|||
#include "main/hash.h"
|
||||
#include "util/u_atomic.h"
|
||||
#include "util/u_thread.h"
|
||||
#include "util/u_cpu_detect.h"
|
||||
|
||||
|
||||
static void
|
||||
|
|
@ -195,6 +196,25 @@ _mesa_glthread_flush_batch(struct gl_context *ctx)
|
|||
if (!next->used)
|
||||
return;
|
||||
|
||||
/* Pin threads regularly to the same Zen CCX that the main thread is
|
||||
* running on. The main thread can move between CCXs.
|
||||
*/
|
||||
if (util_cpu_caps.nr_cpus != util_cpu_caps.cores_per_L3 &&
|
||||
/* driver support */
|
||||
ctx->Driver.PinDriverToL3Cache &&
|
||||
++glthread->pin_thread_counter % 128 == 0) {
|
||||
int cpu = util_get_current_cpu();
|
||||
|
||||
if (cpu >= 0) {
|
||||
unsigned L3_cache = util_cpu_caps.cpu_to_L3[cpu];
|
||||
|
||||
util_set_thread_affinity(glthread->queue.threads[0],
|
||||
util_cpu_caps.L3_affinity_mask[L3_cache],
|
||||
NULL, UTIL_MAX_CPUS);
|
||||
ctx->Driver.PinDriverToL3Cache(ctx, L3_cache);
|
||||
}
|
||||
}
|
||||
|
||||
/* Debug: execute the batch immediately from this thread.
|
||||
*
|
||||
* Note that glthread_unmarshal_batch() changes the dispatch table so we'll
|
||||
|
|
|
|||
|
|
@ -134,6 +134,9 @@ struct glthread_state
|
|||
/** Whether GLThread is inside a display list generation. */
|
||||
bool inside_dlist;
|
||||
|
||||
/** For L3 cache pinning. */
|
||||
unsigned pin_thread_counter;
|
||||
|
||||
/** The ring of batches in memory. */
|
||||
struct glthread_batch batches[MARSHAL_MAX_BATCHES];
|
||||
|
||||
|
|
|
|||
|
|
@ -908,6 +908,16 @@ st_get_driver_uuid(struct gl_context *ctx, char *uuid)
|
|||
}
|
||||
|
||||
|
||||
static void
|
||||
st_pin_driver_to_l3_cache(struct gl_context *ctx, unsigned L3_cache)
|
||||
{
|
||||
struct pipe_context *pipe = st_context(ctx)->pipe;
|
||||
|
||||
pipe->set_context_param(pipe, PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE,
|
||||
L3_cache);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
st_init_driver_functions(struct pipe_screen *screen,
|
||||
struct dd_function_table *functions)
|
||||
|
|
@ -999,6 +1009,9 @@ st_create_context(gl_api api, struct pipe_context *pipe,
|
|||
memset(&funcs, 0, sizeof(funcs));
|
||||
st_init_driver_functions(pipe->screen, &funcs);
|
||||
|
||||
if (pipe->set_context_param)
|
||||
funcs.PinDriverToL3Cache = st_pin_driver_to_l3_cache;
|
||||
|
||||
ctx = calloc(1, sizeof(struct gl_context));
|
||||
if (!ctx)
|
||||
return NULL;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue