zink: implement async gfx precompile

the pipe_context::link_shader hook is called when shaders are
linked into a program by the application

by leveraging this, it becomes possible to utilize the existing
graphics pipeline library to implement precompilation
by creating a partial pipeline containing only the shader stages
and then adding in the vertex input and fragment output stages
dynamically using the fast-link feature

if all goes well, and if the vulkan driver's fast-linking is
truly fast, the full pipeline should be dynamically combined
in time to avoid stuttering, and an optimized variant will be
queued for async compile to be used the next time the pipeline
triggers a draw

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18961>
This commit is contained in:
Mike Blumenkrantz 2022-09-22 16:42:19 -04:00 committed by Marge Bot
parent aed4e716d0
commit 41ffb15de5
6 changed files with 106 additions and 6 deletions

View file

@ -3387,6 +3387,7 @@ zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
_mesa_hash_table_remove(ht, he); _mesa_hash_table_remove(ht, he);
prog->base.removed = true; prog->base.removed = true;
simple_mtx_unlock(&prog->ctx->program_lock[idx]); simple_mtx_unlock(&prog->ctx->program_lock[idx]);
util_queue_fence_wait(&prog->base.cache_fence);
} }
if (stage != MESA_SHADER_TESS_CTRL || !shader->is_generated) { if (stage != MESA_SHADER_TESS_CTRL || !shader->is_generated) {
prog->shaders[stage] = NULL; prog->shaders[stage] = NULL;

View file

@ -607,6 +607,8 @@ ALWAYS_INLINE static void
update_gfx_shader_module_optimal(struct zink_context *ctx, struct zink_gfx_program *prog, gl_shader_stage pstage) update_gfx_shader_module_optimal(struct zink_context *ctx, struct zink_gfx_program *prog, gl_shader_stage pstage)
{ {
struct zink_screen *screen = zink_screen(ctx->base.screen); struct zink_screen *screen = zink_screen(ctx->base.screen);
if (screen->info.have_EXT_graphics_pipeline_library)
util_queue_fence_wait(&prog->base.cache_fence);
struct zink_shader_module *zm = get_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state); struct zink_shader_module *zm = get_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state);
if (!zm) if (!zm)
zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state); zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state);
@ -674,6 +676,28 @@ zink_gfx_program_update_optimal(struct zink_context *ctx)
ctx->last_vertex_stage_dirty = false; ctx->last_vertex_stage_dirty = false;
} }
static void
optimized_compile_job(void *data, void *gdata, int thread_index)
{
struct zink_gfx_pipeline_cache_entry *pc_entry = data;
struct zink_screen *screen = gdata;
VkPipeline pipeline;
if (pc_entry->gkey)
pipeline = zink_create_gfx_pipeline_combined(screen, pc_entry->prog, pc_entry->ikey->pipeline, pc_entry->gkey->pipeline, pc_entry->okey->pipeline, false);
else
pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, &pc_entry->state, pc_entry->state.element_state->binding_map, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true);
if (pipeline) {
pc_entry->unoptimized_pipeline = pc_entry->pipeline;
pc_entry->pipeline = pipeline;
}
}
void
zink_gfx_program_compile_queue(struct zink_context *ctx, struct zink_gfx_pipeline_cache_entry *pc_entry)
{
util_queue_add_job(&zink_screen(ctx->base.screen)->cache_get_thread, pc_entry, &pc_entry->fence, optimized_compile_job, NULL, 0);
}
static void static void
update_cs_shader_module(struct zink_context *ctx, struct zink_compute_program *comp) update_cs_shader_module(struct zink_context *ctx, struct zink_compute_program *comp)
{ {
@ -1149,7 +1173,9 @@ zink_destroy_gfx_program(struct zink_screen *screen,
hash_table_foreach(&prog->pipelines[r][i], entry) { hash_table_foreach(&prog->pipelines[r][i], entry) {
struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data; struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data;
util_queue_fence_wait(&pc_entry->fence);
VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL); VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL);
VKSCR(DestroyPipeline)(screen->dev, pc_entry->unoptimized_pipeline, NULL);
free(pc_entry); free(pc_entry);
} }
} }
@ -1540,6 +1566,59 @@ zink_create_pipeline_lib(struct zink_screen *screen, struct zink_gfx_program *pr
return gkey; return gkey;
} }
static void
precompile_job(void *data, void *gdata, int thread_index)
{
struct zink_screen *screen = gdata;
struct zink_gfx_program *prog = data;
struct zink_gfx_pipeline_state state = {0};
state.shader_keys_optimal.key.vs_base.last_vertex_stage = true;
generate_gfx_program_modules_optimal(NULL, screen, prog, &state);
zink_screen_get_pipeline_cache(screen, &prog->base, true);
zink_create_pipeline_lib(screen, prog, &state);
zink_screen_update_pipeline_cache(screen, &prog->base, true);
}
static void
zink_link_gfx_shader(struct pipe_context *pctx, void **shaders)
{
struct zink_context *ctx = zink_context(pctx);
struct zink_shader **zshaders = (struct zink_shader **)shaders;
if (shaders[MESA_SHADER_COMPUTE])
return;
/* can't precompile fixedfunc */
if (!shaders[MESA_SHADER_VERTEX] || !shaders[MESA_SHADER_FRAGMENT])
return;
unsigned hash = 0;
unsigned shader_stages = 0;
for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
if (zshaders[i]) {
hash ^= zshaders[i]->hash;
shader_stages |= BITFIELD_BIT(i);
}
}
unsigned tess_stages = BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL);
unsigned tess = shader_stages & tess_stages;
/* can't do fixedfunc tes either */
if (tess && !shaders[MESA_SHADER_TESS_EVAL])
return;
struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(shader_stages)];
simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
/* link can be called repeatedly with the same shaders: ignore */
if (_mesa_hash_table_search_pre_hashed(ht, hash, shaders)) {
simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
return;
}
struct zink_gfx_program *prog = zink_create_gfx_program(ctx, zshaders, 3);
u_foreach_bit(i, shader_stages)
assert(prog->shaders[i]);
_mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
// precompile_job(prog, ctx, 0);
util_queue_add_job(&zink_screen(pctx->screen)->cache_get_thread, prog, &prog->base.cache_fence, precompile_job, NULL, 0);
}
void void
zink_program_init(struct zink_context *ctx) zink_program_init(struct zink_context *ctx)
{ {
@ -1585,6 +1664,9 @@ zink_program_init(struct zink_context *ctx)
offsetof(struct zink_gfx_input_key, element_state) - offsetof(struct zink_gfx_input_key, input)); offsetof(struct zink_gfx_input_key, element_state) - offsetof(struct zink_gfx_input_key, input));
STATIC_ASSERT(sizeof(union zink_shader_key_optimal) == sizeof(uint32_t)); STATIC_ASSERT(sizeof(union zink_shader_key_optimal) == sizeof(uint32_t));
if (zink_screen(ctx->base.screen)->info.have_EXT_graphics_pipeline_library)
ctx->base.link_shader = zink_link_gfx_shader;
} }
bool bool

View file

@ -126,6 +126,8 @@ uint32_t hash_gfx_output_ds3(const void *key);
uint32_t hash_gfx_input(const void *key); uint32_t hash_gfx_input(const void *key);
uint32_t hash_gfx_input_dynamic(const void *key); uint32_t hash_gfx_input_dynamic(const void *key);
void
zink_gfx_program_compile_queue(struct zink_context *ctx, struct zink_gfx_pipeline_cache_entry *pc_entry);
static inline unsigned static inline unsigned
get_primtype_idx(enum pipe_prim_type mode) get_primtype_idx(enum pipe_prim_type mode)

View file

@ -221,6 +221,8 @@ zink_get_gfx_pipeline(struct zink_context *ctx,
if (!pc_entry) if (!pc_entry)
return VK_NULL_HANDLE; return VK_NULL_HANDLE;
memcpy(&pc_entry->state, state, sizeof(*state)); memcpy(&pc_entry->state, state, sizeof(*state));
pc_entry->prog = prog;
util_queue_fence_init(&pc_entry->fence);
entry = _mesa_hash_table_insert_pre_hashed(&prog->pipelines[rp_idx][idx], state->final_hash, pc_entry, pc_entry); entry = _mesa_hash_table_insert_pre_hashed(&prog->pipelines[rp_idx][idx], state->final_hash, pc_entry, pc_entry);
if (HAVE_LIB && if (HAVE_LIB &&
/* TODO: if there's ever a dynamic render extension with input attachments */ /* TODO: if there's ever a dynamic render extension with input attachments */
@ -247,13 +249,16 @@ zink_get_gfx_pipeline(struct zink_context *ctx,
pc_entry->okey = okey; pc_entry->okey = okey;
pipeline = zink_create_gfx_pipeline_combined(screen, prog, ikey->pipeline, gkey->pipeline, okey->pipeline, true); pipeline = zink_create_gfx_pipeline_combined(screen, prog, ikey->pipeline, gkey->pipeline, okey->pipeline, true);
} else { } else {
pipeline = zink_create_gfx_pipeline(screen, prog, state, state->element_state->binding_map, vkmode, true); /* optimize by default only when expecting precompiles in order to reduce stuttering */
pipeline = zink_create_gfx_pipeline(screen, prog, state, state->element_state->binding_map, vkmode, !HAVE_LIB);
} }
if (pipeline == VK_NULL_HANDLE) if (pipeline == VK_NULL_HANDLE)
return VK_NULL_HANDLE; return VK_NULL_HANDLE;
zink_screen_update_pipeline_cache(screen, &prog->base, false); zink_screen_update_pipeline_cache(screen, &prog->base, false);
pc_entry->pipeline = pipeline; pc_entry->pipeline = pipeline;
if (HAVE_LIB)
zink_gfx_program_compile_queue(ctx, pc_entry);
} }
struct zink_gfx_pipeline_cache_entry *cache_entry = (struct zink_gfx_pipeline_cache_entry *)entry->data; struct zink_gfx_pipeline_cache_entry *cache_entry = (struct zink_gfx_pipeline_cache_entry *)entry->data;

View file

@ -176,11 +176,18 @@ zink_set_max_shader_compiler_threads(struct pipe_screen *pscreen, unsigned max_t
static bool static bool
zink_is_parallel_shader_compilation_finished(struct pipe_screen *screen, void *shader, enum pipe_shader_type shader_type) zink_is_parallel_shader_compilation_finished(struct pipe_screen *screen, void *shader, enum pipe_shader_type shader_type)
{ {
/* not supported yet */ if (shader_type == MESA_SHADER_COMPUTE) {
if (shader_type != MESA_SHADER_COMPUTE) struct zink_program *pg = shader;
return true; return !pg->can_precompile || util_queue_fence_is_signalled(&pg->cache_fence);
struct zink_program *pg = shader; }
return !pg->can_precompile || util_queue_fence_is_signalled(&pg->cache_fence);
struct zink_shader *zs = shader;
bool finished = true;
set_foreach(zs->programs, entry) {
struct zink_gfx_program *prog = (void*)entry->key;
finished &= util_queue_fence_is_signalled(&prog->base.cache_fence);
}
return finished;
} }
static VkDeviceSize static VkDeviceSize

View file

@ -830,9 +830,12 @@ struct zink_gfx_pipeline_cache_entry {
struct zink_gfx_pipeline_state state; struct zink_gfx_pipeline_state state;
VkPipeline pipeline; VkPipeline pipeline;
/* GPL only */ /* GPL only */
struct util_queue_fence fence;
struct zink_gfx_input_key *ikey; struct zink_gfx_input_key *ikey;
struct zink_gfx_library_key *gkey; struct zink_gfx_library_key *gkey;
struct zink_gfx_output_key *okey; struct zink_gfx_output_key *okey;
struct zink_gfx_program *prog;
VkPipeline unoptimized_pipeline;
}; };
struct zink_gfx_program { struct zink_gfx_program {