zink: use EXT_shader_object to implement generic separate shader precompile

this adds precompile for all separate shader stages (+tcs,tes,geom)
using separate shaders, which should eliminate stuttering for games
using it (e.g., Tomb Raider)

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22671>
This commit is contained in:
Mike Blumenkrantz 2023-03-31 17:23:34 -04:00 committed by Marge Bot
parent 234f9953a2
commit 4c47d83051
5 changed files with 38 additions and 12 deletions

View file

@ -3215,14 +3215,14 @@ zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, st
sci.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT; sci.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT;
sci.stage = mesa_to_vk_shader_stage(zs->info.stage); sci.stage = mesa_to_vk_shader_stage(zs->info.stage);
if (sci.stage != VK_SHADER_STAGE_FRAGMENT_BIT) if (sci.stage != VK_SHADER_STAGE_FRAGMENT_BIT)
sci.nextStage = VK_SHADER_STAGE_FRAGMENT_BIT; sci.nextStage = VK_SHADER_STAGE_ALL_GRAPHICS & ~VK_SHADER_STAGE_VERTEX_BIT;
sci.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT; sci.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT;
sci.codeSize = spirv->num_words * sizeof(uint32_t); sci.codeSize = spirv->num_words * sizeof(uint32_t);
sci.pCode = spirv->words; sci.pCode = spirv->words;
sci.pName = "main"; sci.pName = "main";
sci.setLayoutCount = 2; sci.setLayoutCount = zs->info.stage + 1;
VkDescriptorSetLayout dsl[2] = {0}; VkDescriptorSetLayout dsl[ZINK_GFX_SHADER_COUNT] = {0};
dsl[zs->info.stage == MESA_SHADER_FRAGMENT] = zs->precompile.dsl; dsl[zs->info.stage] = zs->precompile.dsl;;
sci.pSetLayouts = dsl; sci.pSetLayouts = dsl;
VkPushConstantRange pcr; VkPushConstantRange pcr;
pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS; pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
@ -3748,7 +3748,10 @@ struct zink_shader_object
zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs) zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
{ {
nir_shader *nir = zink_shader_deserialize(screen, zs); nir_shader *nir = zink_shader_deserialize(screen, zs);
int set = nir->info.stage == MESA_SHADER_FRAGMENT; /* TODO: maybe compile multiple variants for different set counts for compact mode? */
int set = zs->info.stage == MESA_SHADER_FRAGMENT;
if (screen->info.have_EXT_shader_object)
set = zs->info.stage;
unsigned offsets[4]; unsigned offsets[4];
zink_descriptor_shader_get_binding_offsets(zs, offsets); zink_descriptor_shader_get_binding_offsets(zs, offsets);
nir_foreach_variable_with_modes(var, nir, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_uniform | nir_var_image) { nir_foreach_variable_with_modes(var, nir, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_uniform | nir_var_image) {
@ -3779,7 +3782,17 @@ zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
} }
optimize_nir(nir, zs); optimize_nir(nir, zs);
zink_descriptor_shader_init(screen, zs); zink_descriptor_shader_init(screen, zs);
zs->sinfo.last_vertex = zs->sinfo.have_xfb;
struct zink_shader_object obj = compile_module(screen, zs, nir, true); struct zink_shader_object obj = compile_module(screen, zs, nir, true);
/* always try to pre-generate a tcs in case it's needed */
if (zs->info.stage == MESA_SHADER_TESS_EVAL && screen->info.have_EXT_shader_object && !zs->info.internal) {
nir_shader *nir_tcs = NULL;
/* use max pcp for compat */
zs->non_fs.generated_tcs = zink_shader_tcs_create(screen, nir, 32, &nir_tcs);
nir_tcs->info.separate_shader = true;
zs->non_fs.generated_tcs->precompile.obj = zink_shader_compile_separate(screen, zs->non_fs.generated_tcs);
ralloc_free(nir_tcs);
}
ralloc_free(nir); ralloc_free(nir);
return obj; return obj;
} }

View file

@ -1125,7 +1125,8 @@ update_separable(struct zink_context *ctx, struct zink_program *pg)
} }
bs->dd.cur_db_offset[use_buffer] = bs->dd.db_offset; bs->dd.cur_db_offset[use_buffer] = bs->dd.db_offset;
bs->dd.db_offset += zs->precompile.db_size; bs->dd.db_offset += zs->precompile.db_size;
int set_idx = j == MESA_SHADER_FRAGMENT; /* TODO: maybe compile multiple variants for different set counts for compact mode? */
int set_idx = screen->info.have_EXT_shader_object ? j : j == MESA_SHADER_FRAGMENT;
VKCTX(CmdSetDescriptorBufferOffsetsEXT)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pg->layout, set_idx, 1, &use_buffer, &offset); VKCTX(CmdSetDescriptorBufferOffsetsEXT)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pg->layout, set_idx, 1, &use_buffer, &offset);
} }
} }

View file

@ -290,6 +290,7 @@ update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum
/* always rebind all stages */ /* always rebind all stages */
VKCTX(CmdBindShadersEXT)(bs->cmdbuf, ZINK_GFX_SHADER_COUNT, stages, ctx->curr_program->objects); VKCTX(CmdBindShadersEXT)(bs->cmdbuf, ZINK_GFX_SHADER_COUNT, stages, ctx->curr_program->objects);
VKCTX(CmdSetDepthBiasEnable)(bs->cmdbuf, VK_TRUE); VKCTX(CmdSetDepthBiasEnable)(bs->cmdbuf, VK_TRUE);
pipeline_changed = false;
} }
ctx->shobj_draw = true; ctx->shobj_draw = true;
} else { } else {

View file

@ -1132,13 +1132,11 @@ struct zink_gfx_program *
create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch) create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch)
{ {
struct zink_screen *screen = zink_screen(ctx->base.screen); struct zink_screen *screen = zink_screen(ctx->base.screen);
unsigned shader_stages = BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT);
bool is_separate = true; bool is_separate = true;
for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++)
is_separate &= !stages[i] || stages[i]->info.separate_shader; is_separate &= !stages[i] || stages[i]->info.separate_shader;
/* filter cases that need real pipelines */ /* filter cases that need real pipelines */
if (ctx->shader_stages != shader_stages || if (!is_separate ||
!is_separate ||
/* TODO: maybe try variants? grimace */ /* TODO: maybe try variants? grimace */
!ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) || !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) ||
!zink_can_use_pipeline_libs(ctx)) !zink_can_use_pipeline_libs(ctx))
@ -1164,6 +1162,11 @@ create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stag
memcpy(prog->shaders, stages, sizeof(prog->shaders)); memcpy(prog->shaders, stages, sizeof(prog->shaders));
prog->last_vertex_stage = ctx->last_vertex_stage; prog->last_vertex_stage = ctx->last_vertex_stage;
if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) {
prog->shaders[MESA_SHADER_TESS_CTRL] = stages[MESA_SHADER_VERTEX]->non_fs.generated_tcs;
prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
}
if (!screen->info.have_EXT_shader_object) { if (!screen->info.have_EXT_shader_object) {
prog->libs = create_lib_cache(prog, false); prog->libs = create_lib_cache(prog, false);
/* this libs cache is owned by the program */ /* this libs cache is owned by the program */
@ -1177,7 +1180,7 @@ create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stag
_mesa_set_add(prog->shaders[i]->programs, prog); _mesa_set_add(prog->shaders[i]->programs, prog);
simple_mtx_unlock(&prog->shaders[i]->lock); simple_mtx_unlock(&prog->shaders[i]->lock);
if (screen->info.have_EXT_shader_object) { if (screen->info.have_EXT_shader_object) {
prog->objects[i] = stages[i]->precompile.obj.obj; prog->objects[i] = prog->shaders[i]->precompile.obj.obj;
} }
refs++; refs++;
} }
@ -1200,7 +1203,7 @@ create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stag
for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) { for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
if (!prog->shaders[i] || !prog->shaders[i]->precompile.dsl) if (!prog->shaders[i] || !prog->shaders[i]->precompile.dsl)
continue; continue;
int idx = !i ? 0 : 1; int idx = !i ? 0 : screen->info.have_EXT_shader_object ? i : 1;
prog->base.dd.binding_usage |= BITFIELD_BIT(idx); prog->base.dd.binding_usage |= BITFIELD_BIT(idx);
prog->base.dsl[idx] = prog->shaders[i]->precompile.dsl; prog->base.dsl[idx] = prog->shaders[i]->precompile.dsl;
/* guarantee a null dsl if previous stages don't have descriptors */ /* guarantee a null dsl if previous stages don't have descriptors */
@ -2119,7 +2122,8 @@ zink_create_gfx_shader_state(struct pipe_context *pctx, const struct pipe_shader
void *ret = zink_shader_create(zink_screen(pctx->screen), nir, &shader->stream_output); void *ret = zink_shader_create(zink_screen(pctx->screen), nir, &shader->stream_output);
if (nir->info.separate_shader && zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB && if (nir->info.separate_shader && zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB &&
(screen->info.have_EXT_graphics_pipeline_library && (nir->info.stage == MESA_SHADER_FRAGMENT || nir->info.stage == MESA_SHADER_VERTEX))) { (screen->info.have_EXT_shader_object ||
(screen->info.have_EXT_graphics_pipeline_library && (nir->info.stage == MESA_SHADER_FRAGMENT || nir->info.stage == MESA_SHADER_VERTEX)))) {
struct zink_shader *zs = ret; struct zink_shader *zs = ret;
/* sample shading can't precompile */ /* sample shading can't precompile */
if (nir->info.stage != MESA_SHADER_FRAGMENT || !nir->info.fs.uses_sample_shading) if (nir->info.stage != MESA_SHADER_FRAGMENT || !nir->info.fs.uses_sample_shading)

View file

@ -280,6 +280,9 @@ disk_cache_init(struct zink_screen *screen)
*/ */
_mesa_sha1_update(&ctx, &screen->driconf, sizeof(screen->driconf)); _mesa_sha1_update(&ctx, &screen->driconf, sizeof(screen->driconf));
/* EXT_shader_object causes different descriptor layouts for separate shaders */
_mesa_sha1_update(&ctx, &screen->info.have_EXT_shader_object, sizeof(screen->info.have_EXT_shader_object));
/* Finish the sha1 and format it as text. */ /* Finish the sha1 and format it as text. */
unsigned char sha1[20]; unsigned char sha1[20];
_mesa_sha1_final(&ctx, sha1); _mesa_sha1_final(&ctx, sha1);
@ -2404,6 +2407,10 @@ init_driver_workarounds(struct zink_screen *screen)
default: default:
break; break;
} }
/* TODO: maybe compile multiple variants for different set counts for compact mode? */
if (screen->info.props.limits.maxBoundDescriptorSets < ZINK_DESCRIPTOR_ALL_TYPES ||
zink_debug & ZINK_DEBUG_COMPACT)
screen->info.have_EXT_shader_object = false;
if (screen->info.line_rast_feats.stippledRectangularLines && if (screen->info.line_rast_feats.stippledRectangularLines &&
screen->info.line_rast_feats.stippledBresenhamLines && screen->info.line_rast_feats.stippledBresenhamLines &&
screen->info.line_rast_feats.stippledSmoothLines && screen->info.line_rast_feats.stippledSmoothLines &&