zink: enable EXT_shader_object for generic precompiles

this should match the functionality of GPL, but it should also (theoretically)
have significantly less CPU overhead, so I've enabled this to be the new
default when available

currently I'm not changing any of the requirements for shader object enablement,
so this is probably only be usable on desktops

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22725>
This commit is contained in:
Mike Blumenkrantz 2023-04-03 16:35:40 -04:00 committed by Marge Bot
parent 29a62dd2ae
commit dfd39d1d9d
7 changed files with 89 additions and 45 deletions

View file

@ -3197,7 +3197,7 @@ zink_shader_dump(const struct zink_shader *zs, void *words, size_t size, const c
}
struct zink_shader_object
zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj)
zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj, struct zink_program *pg)
{
VkShaderModuleCreateInfo smci = {0};
VkShaderCreateInfoEXT sci = {0};
@ -3220,10 +3220,15 @@ zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, st
sci.codeSize = spirv->num_words * sizeof(uint32_t);
sci.pCode = spirv->words;
sci.pName = "main";
sci.setLayoutCount = zs->info.stage + 1;
VkDescriptorSetLayout dsl[ZINK_GFX_SHADER_COUNT] = {0};
dsl[zs->info.stage] = zs->precompile.dsl;;
sci.pSetLayouts = dsl;
if (pg) {
sci.setLayoutCount = pg->num_dsl;
sci.pSetLayouts = pg->dsl;
} else {
sci.setLayoutCount = zs->info.stage + 1;
dsl[zs->info.stage] = zs->precompile.dsl;;
sci.pSetLayouts = dsl;
}
VkPushConstantRange pcr;
pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
pcr.offset = 0;
@ -3525,7 +3530,7 @@ invert_point_coord(nir_shader *nir)
}
static struct zink_shader_object
compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool can_shobj)
compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool can_shobj, struct zink_program *pg)
{
struct zink_shader_info *sinfo = &zs->sinfo;
prune_io(nir);
@ -3535,7 +3540,7 @@ compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *n
struct zink_shader_object obj;
struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version);
if (spirv)
obj = zink_shader_spirv_compile(screen, zs, spirv, can_shobj);
obj = zink_shader_spirv_compile(screen, zs, spirv, can_shobj, pg);
/* TODO: determine if there's any reason to cache spirv output? */
if (zs->info.stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
@ -3547,7 +3552,7 @@ compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *n
struct zink_shader_object
zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs,
nir_shader *nir, const struct zink_shader_key *key, const void *extra_data)
nir_shader *nir, const struct zink_shader_key *key, const void *extra_data, struct zink_program *pg)
{
struct zink_shader_info *sinfo = &zs->sinfo;
bool need_optimize = false;
@ -3739,7 +3744,7 @@ zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shad
} else if (need_optimize)
optimize_nir(nir, zs);
struct zink_shader_object obj = compile_module(screen, zs, nir, false);
struct zink_shader_object obj = compile_module(screen, zs, nir, can_shobj, pg);
ralloc_free(nir);
return obj;
}
@ -3786,7 +3791,7 @@ zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
nir_shader *nir_clone = NULL;
if (screen->info.have_EXT_shader_object)
nir_clone = nir_shader_clone(nir, nir);
struct zink_shader_object obj = compile_module(screen, zs, nir, true);
struct zink_shader_object obj = compile_module(screen, zs, nir, true, NULL);
if (screen->info.have_EXT_shader_object && !zs->info.internal) {
/* always try to pre-generate a tcs in case it's needed */
if (zs->info.stage == MESA_SHADER_TESS_EVAL) {
@ -3810,7 +3815,7 @@ zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
nir_fixup_deref_modes(nir_clone);
NIR_PASS_V(nir_clone, nir_remove_dead_variables, nir_var_shader_temp, NULL);
optimize_nir(nir_clone, NULL);
zs->precompile.no_psiz_obj = compile_module(screen, zs, nir_clone, true);
zs->precompile.no_psiz_obj = compile_module(screen, zs, nir_clone, true, NULL);
spirv_shader_delete(zs->precompile.no_psiz_obj.spirv);
zs->precompile.no_psiz_obj.spirv = NULL;
}
@ -5251,12 +5256,12 @@ zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader)
struct zink_shader_object
zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices)
zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices, bool can_shobj, struct zink_program *pg)
{
assert(zs->info.stage == MESA_SHADER_TESS_CTRL);
/* shortcut all the nir passes since we just have to change this one word */
zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
return zink_shader_spirv_compile(screen, zs, NULL, false);
return zink_shader_spirv_compile(screen, zs, NULL, can_shobj, pg);
}
/* creating a passthrough tcs shader that's roughly:

View file

@ -69,7 +69,7 @@ void
zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer);
/* pass very large shader key data with extra_data */
struct zink_shader_object
zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs, nir_shader *nir, const struct zink_shader_key *key, const void *extra_data);
zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs, nir_shader *nir, const struct zink_shader_key *key, const void *extra_data, struct zink_program *pg);
struct zink_shader_object
zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs);
struct zink_shader *
@ -85,9 +85,9 @@ void
zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader);
struct zink_shader_object
zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj);
zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj, struct zink_program *pg);
struct zink_shader_object
zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices);
zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices, bool can_shobj, struct zink_program *pg);
struct zink_shader *
zink_shader_tcs_create(struct zink_screen *screen, nir_shader *tes, unsigned vertices_per_patch, nir_shader **nir_ret);

View file

@ -272,13 +272,25 @@ update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum
{
VkPipeline prev_pipeline = ctx->gfx_pipeline_state.pipeline;
const struct zink_screen *screen = zink_screen(ctx->base.screen);
bool shaders_changed = ctx->gfx_dirty;
bool shaders_changed = ctx->gfx_dirty || ctx->dirty_gfx_stages;
if (screen->optimal_keys && !ctx->is_generated_gs_bound)
zink_gfx_program_update_optimal(ctx);
else
zink_gfx_program_update(ctx);
bool pipeline_changed = false;
if (ctx->curr_program->base.uses_shobj) {
VkPipeline pipeline = VK_NULL_HANDLE;
if (!ctx->curr_program->base.uses_shobj) {
if (screen->info.have_EXT_graphics_pipeline_library)
pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, true>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
else
pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, false>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
}
if (pipeline) {
pipeline_changed = prev_pipeline != pipeline;
if (BATCH_CHANGED || pipeline_changed || ctx->shobj_draw)
VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
ctx->shobj_draw = false;
} else {
if (BATCH_CHANGED || shaders_changed || !ctx->shobj_draw) {
VkShaderStageFlagBits stages[] = {
VK_SHADER_STAGE_VERTEX_BIT,
@ -290,19 +302,8 @@ update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum
/* always rebind all stages */
VKCTX(CmdBindShadersEXT)(bs->cmdbuf, ZINK_GFX_SHADER_COUNT, stages, ctx->curr_program->objects);
VKCTX(CmdSetDepthBiasEnable)(bs->cmdbuf, VK_TRUE);
pipeline_changed = false;
}
ctx->shobj_draw = true;
} else {
VkPipeline pipeline;
if (screen->info.have_EXT_graphics_pipeline_library)
pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, true>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
else
pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, false>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
pipeline_changed = prev_pipeline != pipeline;
if (BATCH_CHANGED || pipeline_changed || ctx->shobj_draw)
VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
ctx->shobj_draw = false;
}
return pipeline_changed;
}

View file

@ -26,6 +26,7 @@
#include "zink_pipeline.h"
#include "zink_compiler.h"
#include "nir_to_spirv/nir_to_spirv.h"
#include "zink_context.h"
#include "zink_program.h"
#include "zink_render_pass.h"
@ -375,6 +376,7 @@ zink_create_gfx_pipeline(struct zink_screen *screen,
}
VkPipelineShaderStageCreateInfo shader_stages[ZINK_GFX_SHADER_COUNT];
VkShaderModuleCreateInfo smci[ZINK_GFX_SHADER_COUNT] = {0};
uint32_t num_stages = 0;
for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
if (!prog->shaders[i])
@ -383,8 +385,15 @@ zink_create_gfx_pipeline(struct zink_screen *screen,
VkPipelineShaderStageCreateInfo stage = {0};
stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
stage.stage = mesa_to_vk_shader_stage(i);
stage.module = objs[i].mod;
stage.pName = "main";
if (objs[i].mod) {
stage.module = objs[i].mod;
} else {
smci[i].sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
stage.pNext = &smci[i];
smci[i].codeSize = objs[i].spirv->num_words * sizeof(uint32_t);
smci[i].pCode = objs[i].spirv->words;
}
shader_stages[num_stages++] = stage;
}
assert(num_stages > 0);

View file

@ -149,15 +149,15 @@ create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *scr
unsigned patch_vertices = state->shader_keys.key[MESA_SHADER_TESS_CTRL].key.tcs.patch_vertices;
if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) {
assert(ctx); //TODO async
zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices);
zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices, prog->base.uses_shobj, &prog->base);
} else {
zm->obj = zink_shader_compile(screen, false, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]), key, &ctx->di.zs_swizzle[stage]);
zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]), key, &ctx->di.zs_swizzle[stage], &prog->base);
}
if (!zm->obj.mod) {
FREE(zm);
return NULL;
}
zm->shobj = false;
zm->shobj = prog->base.uses_shobj;
zm->num_uniforms = inline_size;
if (!is_nongenerated_tcs) {
zm->key_size = key->size;
@ -267,16 +267,16 @@ create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_scr
struct zink_tcs_key *tcs = (struct zink_tcs_key*)key;
patch_vertices = tcs->patch_vertices;
}
zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices);
zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices, prog->base.uses_shobj, &prog->base);
} else {
zm->obj = zink_shader_compile(screen, false, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]),
(struct zink_shader_key*)key, shadow_needs_shader_swizzle ? &ctx->di.zs_swizzle[stage] : NULL);
zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]),
(struct zink_shader_key*)key, shadow_needs_shader_swizzle ? &ctx->di.zs_swizzle[stage] : NULL, &prog->base);
}
if (!zm->obj.mod) {
FREE(zm);
return NULL;
}
zm->shobj = false;
zm->shobj = prog->base.uses_shobj;
/* non-generated tcs won't use the shader key */
const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
if (key && !is_nongenerated_tcs) {
@ -396,6 +396,7 @@ update_gfx_shader_modules(struct zink_context *ctx,
hash_changed = true;
default_variants &= zm->default_variant;
prog->objs[i] = zm->obj;
prog->objects[i] = zm->obj.obj;
prog->module_hash[i] = zm->hash;
if (has_inline) {
if (zm->num_uniforms)
@ -436,7 +437,8 @@ generate_gfx_program_modules(struct zink_context *ctx, struct zink_screen *scree
inline_size, nonseamless_size,
screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map);
state->modules[i] = zm->obj.mod;
prog->objs[i] = zm->obj ;
prog->objs[i] = zm->obj;
prog->objects[i] = zm->obj.obj;
prog->module_hash[i] = zm->hash;
if (zm->num_uniforms)
prog->inline_variants |= BITFIELD_BIT(i);
@ -464,6 +466,7 @@ generate_gfx_program_modules_optimal(struct zink_context *ctx, struct zink_scree
struct zink_shader_module *zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[i], prog, i, state);
prog->objs[i] = zm->obj;
prog->objects[i] = zm->obj.obj;
}
p_atomic_dec(&prog->base.reference.count);
@ -659,6 +662,7 @@ update_gfx_shader_module_optimal(struct zink_context *ctx, struct zink_gfx_progr
bool changed = prog->objs[pstage].mod != zm->obj.mod;
prog->objs[pstage] = zm->obj;
prog->objects[pstage] = zm->obj.obj;
return changed;
}
@ -788,13 +792,29 @@ optimized_compile_job(void *data, void *gdata, int thread_index)
}
}
static void
optimized_shobj_compile_job(void *data, void *gdata, int thread_index)
{
struct zink_gfx_pipeline_cache_entry *pc_entry = data;
struct zink_screen *screen = gdata;
struct zink_shader_object objs[ZINK_GFX_SHADER_COUNT];
for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
objs[i].mod = VK_NULL_HANDLE;
objs[i].spirv = pc_entry->shobjs[i].spirv;
}
pc_entry->pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, objs, &pc_entry->state, NULL, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true);
/* no unoptimized_pipeline dance */
}
void
zink_gfx_program_compile_queue(struct zink_context *ctx, struct zink_gfx_pipeline_cache_entry *pc_entry)
{
struct zink_screen *screen = zink_screen(ctx->base.screen);
if (screen->driver_workarounds.disable_optimized_compile)
return;
util_queue_add_job(&screen->cache_get_thread, pc_entry, &pc_entry->fence, optimized_compile_job, NULL, 0);
util_queue_add_job(&screen->cache_get_thread, pc_entry, &pc_entry->fence,
pc_entry->prog->base.uses_shobj ? optimized_shobj_compile_job : optimized_compile_job, NULL, 0);
}
static void
@ -853,7 +873,7 @@ update_cs_shader_module(struct zink_context *ctx, struct zink_compute_program *c
return;
}
zm->shobj = false;
zm->obj = zink_shader_compile(screen, false, zs, zink_shader_blob_deserialize(screen, &comp->shader->blob), key, zs_swizzle_size ? &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE] : NULL);
zm->obj = zink_shader_compile(screen, false, zs, zink_shader_blob_deserialize(screen, &comp->shader->blob), key, zs_swizzle_size ? &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE] : NULL, &comp->base);
if (!zm->obj.spirv) {
FREE(zm);
return;
@ -1303,7 +1323,7 @@ precompile_compute_job(void *data, void *gdata, int thread_index)
comp->curr = comp->module = CALLOC_STRUCT(zink_shader_module);
assert(comp->module);
comp->module->shobj = false;
comp->module->obj = zink_shader_compile(screen, false, comp->shader, comp->nir, NULL, NULL);
comp->module->obj = zink_shader_compile(screen, false, comp->shader, comp->nir, NULL, NULL, &comp->base);
/* comp->nir will be freed by zink_shader_compile */
comp->nir = NULL;
assert(comp->module->obj.spirv);
@ -2033,9 +2053,11 @@ precompile_job(void *data, void *gdata, int thread_index)
state.optimal_key = state.shader_keys_optimal.key.val;
generate_gfx_program_modules_optimal(NULL, screen, prog, &state);
zink_screen_get_pipeline_cache(screen, &prog->base, true);
simple_mtx_lock(&prog->libs->lock);
zink_create_pipeline_lib(screen, prog, &state);
simple_mtx_unlock(&prog->libs->lock);
if (!screen->info.have_EXT_shader_object) {
simple_mtx_lock(&prog->libs->lock);
zink_create_pipeline_lib(screen, prog, &state);
simple_mtx_unlock(&prog->libs->lock);
}
zink_screen_update_pipeline_cache(screen, &prog->base, true);
}
@ -2105,6 +2127,8 @@ zink_link_gfx_shader(struct pipe_context *pctx, void **shaders)
shaders[MESA_SHADER_TESS_EVAL] ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, true);
print_pipeline_stats(screen, pipeline);
} else {
if (zink_screen(pctx->screen)->info.have_EXT_shader_object)
prog->base.uses_shobj = !BITSET_TEST(zshaders[MESA_SHADER_FRAGMENT]->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
util_queue_add_job(&zink_screen(pctx->screen)->cache_get_thread, prog, &prog->base.cache_fence, precompile_job, NULL, 0);
}
}
@ -2207,7 +2231,8 @@ zink_program_init(struct zink_context *ctx)
STATIC_ASSERT(sizeof(union zink_shader_key_optimal) == sizeof(uint32_t));
if (zink_screen(ctx->base.screen)->info.have_EXT_graphics_pipeline_library || zink_debug & ZINK_DEBUG_SHADERDB)
struct zink_screen *screen = zink_screen(ctx->base.screen);
if (screen->info.have_EXT_graphics_pipeline_library || screen->info.have_EXT_shader_object || zink_debug & ZINK_DEBUG_SHADERDB)
ctx->base.link_shader = zink_link_gfx_shader;
}

View file

@ -186,7 +186,10 @@ zink_get_gfx_pipeline(struct zink_context *ctx,
/* init the optimized background compile fence */
util_queue_fence_init(&pc_entry->fence);
entry = _mesa_hash_table_insert_pre_hashed(&prog->pipelines[rp_idx][idx], state->final_hash, pc_entry, pc_entry);
if (HAVE_LIB && zink_can_use_pipeline_libs(ctx)) {
if (prog->base.uses_shobj && !prog->is_separable) {
memcpy(pc_entry->shobjs, prog->objs, sizeof(prog->objs));
zink_gfx_program_compile_queue(ctx, pc_entry);
} else if (HAVE_LIB && zink_can_use_pipeline_libs(ctx)) {
/* this is the graphics pipeline library path: find/construct all partial pipelines */
simple_mtx_lock(&prog->libs->lock);
struct set_entry *he = _mesa_set_search(&prog->libs->libs, &ctx->gfx_pipeline_state.optimal_key);

View file

@ -1028,6 +1028,7 @@ struct zink_gfx_pipeline_cache_entry {
struct zink_gfx_output_key *okey;
VkPipeline unoptimized_pipeline;
} gpl;
struct zink_shader_object shobjs[ZINK_GFX_SHADER_COUNT];
};
};