zink: enable EXT_shader_object for generic precompiles

this should match the functionality of GPL, but it should also (theoretically) have significantly less CPU overhead, so I've enabled this to be the new default when available currently I'm not changing any of the requirements for shader object enablement, so this is probably only be usable on desktops Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22725>
2026-05-05 00:58:05 +02:00 · 2023-04-03 16:35:40 -04:00 · 2023-04-03 16:35:40 -04:00 · dfd39d1d9d
commit dfd39d1d9d
parent 29a62dd2ae
7 changed files with 89 additions and 45 deletions
--- a/src/gallium/drivers/zink/zink_compiler.c
+++ b/src/gallium/drivers/zink/zink_compiler.c
@ -3197,7 +3197,7 @@ zink_shader_dump(const struct zink_shader *zs, void *words, size_t size, const c
 }

 struct zink_shader_object
-zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj)
+zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj, struct zink_program *pg)
 {
   VkShaderModuleCreateInfo smci = {0};
   VkShaderCreateInfoEXT sci = {0};
@ -3220,10 +3220,15 @@ zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, st
   sci.codeSize = spirv->num_words * sizeof(uint32_t);
   sci.pCode = spirv->words;
   sci.pName = "main";
-   sci.setLayoutCount = zs->info.stage + 1;
   VkDescriptorSetLayout dsl[ZINK_GFX_SHADER_COUNT] = {0};
-   dsl[zs->info.stage] = zs->precompile.dsl;;
-   sci.pSetLayouts = dsl;
+   if (pg) {
+      sci.setLayoutCount = pg->num_dsl;
+      sci.pSetLayouts = pg->dsl;
+   } else {
+      sci.setLayoutCount = zs->info.stage + 1;
+      dsl[zs->info.stage] = zs->precompile.dsl;;
+      sci.pSetLayouts = dsl;
+   }
   VkPushConstantRange pcr;
   pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
   pcr.offset = 0;
@ -3525,7 +3530,7 @@ invert_point_coord(nir_shader *nir)
 }

 static struct zink_shader_object
-compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool can_shobj)
+compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool can_shobj, struct zink_program *pg)
 {
   struct zink_shader_info *sinfo = &zs->sinfo;
   prune_io(nir);
@ -3535,7 +3540,7 @@ compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *n
   struct zink_shader_object obj;
   struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version);
   if (spirv)
-      obj = zink_shader_spirv_compile(screen, zs, spirv, can_shobj);
+      obj = zink_shader_spirv_compile(screen, zs, spirv, can_shobj, pg);

   /* TODO: determine if there's any reason to cache spirv output? */
   if (zs->info.stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
@ -3547,7 +3552,7 @@ compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *n

 struct zink_shader_object
 zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs,
-                    nir_shader *nir, const struct zink_shader_key *key, const void *extra_data)
+                    nir_shader *nir, const struct zink_shader_key *key, const void *extra_data, struct zink_program *pg)
 {
   struct zink_shader_info *sinfo = &zs->sinfo;
   bool need_optimize = false;
@ -3739,7 +3744,7 @@ zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shad
   } else if (need_optimize)
      optimize_nir(nir, zs);
   
-   struct zink_shader_object obj = compile_module(screen, zs, nir, false);
+   struct zink_shader_object obj = compile_module(screen, zs, nir, can_shobj, pg);
   ralloc_free(nir);
   return obj;
 }
@ -3786,7 +3791,7 @@ zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
   nir_shader *nir_clone = NULL;
   if (screen->info.have_EXT_shader_object)
      nir_clone = nir_shader_clone(nir, nir);
-   struct zink_shader_object obj = compile_module(screen, zs, nir, true);
+   struct zink_shader_object obj = compile_module(screen, zs, nir, true, NULL);
   if (screen->info.have_EXT_shader_object && !zs->info.internal) {
      /* always try to pre-generate a tcs in case it's needed */
      if (zs->info.stage == MESA_SHADER_TESS_EVAL) {
@ -3810,7 +3815,7 @@ zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
            nir_fixup_deref_modes(nir_clone);
            NIR_PASS_V(nir_clone, nir_remove_dead_variables, nir_var_shader_temp, NULL);
            optimize_nir(nir_clone, NULL);
-            zs->precompile.no_psiz_obj = compile_module(screen, zs, nir_clone, true);
+            zs->precompile.no_psiz_obj = compile_module(screen, zs, nir_clone, true, NULL);
            spirv_shader_delete(zs->precompile.no_psiz_obj.spirv);
            zs->precompile.no_psiz_obj.spirv = NULL;
         }
@ -5251,12 +5256,12 @@ zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader)


 struct zink_shader_object
-zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices)
+zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices, bool can_shobj, struct zink_program *pg)
 {
   assert(zs->info.stage == MESA_SHADER_TESS_CTRL);
   /* shortcut all the nir passes since we just have to change this one word */
   zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
-   return zink_shader_spirv_compile(screen, zs, NULL, false);
+   return zink_shader_spirv_compile(screen, zs, NULL, can_shobj, pg);
 }

 /* creating a passthrough tcs shader that's roughly:
--- a/src/gallium/drivers/zink/zink_compiler.h
+++ b/src/gallium/drivers/zink/zink_compiler.h
@ -69,7 +69,7 @@ void
 zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer);
 /* pass very large shader key data with extra_data */
 struct zink_shader_object
-zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs, nir_shader *nir, const struct zink_shader_key *key, const void *extra_data);
+zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs, nir_shader *nir, const struct zink_shader_key *key, const void *extra_data, struct zink_program *pg);
 struct zink_shader_object
 zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs);
 struct zink_shader *
@ -85,9 +85,9 @@ void
 zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader);

 struct zink_shader_object
-zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj);
+zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj, struct zink_program *pg);
 struct zink_shader_object
-zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices);
+zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices, bool can_shobj, struct zink_program *pg);
 struct zink_shader *
 zink_shader_tcs_create(struct zink_screen *screen, nir_shader *tes, unsigned vertices_per_patch, nir_shader **nir_ret);

--- a/src/gallium/drivers/zink/zink_draw.cpp
+++ b/src/gallium/drivers/zink/zink_draw.cpp
@ -272,13 +272,25 @@ update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum
 {
   VkPipeline prev_pipeline = ctx->gfx_pipeline_state.pipeline;
   const struct zink_screen *screen = zink_screen(ctx->base.screen);
-   bool shaders_changed = ctx->gfx_dirty;
+   bool shaders_changed = ctx->gfx_dirty || ctx->dirty_gfx_stages;
   if (screen->optimal_keys && !ctx->is_generated_gs_bound)
      zink_gfx_program_update_optimal(ctx);
   else
      zink_gfx_program_update(ctx);
   bool pipeline_changed = false;
-   if (ctx->curr_program->base.uses_shobj) {
+   VkPipeline pipeline = VK_NULL_HANDLE;
+   if (!ctx->curr_program->base.uses_shobj) {
+      if (screen->info.have_EXT_graphics_pipeline_library)
+         pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, true>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
+      else
+         pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, false>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
+   }
+   if (pipeline) {
+      pipeline_changed = prev_pipeline != pipeline;
+      if (BATCH_CHANGED || pipeline_changed || ctx->shobj_draw)
+         VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+      ctx->shobj_draw = false;
+   } else {
      if (BATCH_CHANGED || shaders_changed || !ctx->shobj_draw) {
         VkShaderStageFlagBits stages[] = {
            VK_SHADER_STAGE_VERTEX_BIT,
@ -290,19 +302,8 @@ update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum
         /* always rebind all stages */
         VKCTX(CmdBindShadersEXT)(bs->cmdbuf, ZINK_GFX_SHADER_COUNT, stages, ctx->curr_program->objects);
         VKCTX(CmdSetDepthBiasEnable)(bs->cmdbuf, VK_TRUE);
-         pipeline_changed = false;
      }
      ctx->shobj_draw = true;
-   } else {
-      VkPipeline pipeline;
-      if (screen->info.have_EXT_graphics_pipeline_library)
-         pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, true>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
-      else
-         pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, false>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
-      pipeline_changed = prev_pipeline != pipeline;
-      if (BATCH_CHANGED || pipeline_changed || ctx->shobj_draw)
-         VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
-      ctx->shobj_draw = false;
   }
   return pipeline_changed;
 }
--- a/src/gallium/drivers/zink/zink_pipeline.c
+++ b/src/gallium/drivers/zink/zink_pipeline.c
@ -26,6 +26,7 @@
 #include "zink_pipeline.h"

 #include "zink_compiler.h"
+#include "nir_to_spirv/nir_to_spirv.h"
 #include "zink_context.h"
 #include "zink_program.h"
 #include "zink_render_pass.h"
@ -375,6 +376,7 @@ zink_create_gfx_pipeline(struct zink_screen *screen,
   }

   VkPipelineShaderStageCreateInfo shader_stages[ZINK_GFX_SHADER_COUNT];
+   VkShaderModuleCreateInfo smci[ZINK_GFX_SHADER_COUNT] = {0};
   uint32_t num_stages = 0;
   for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
      if (!prog->shaders[i])
@ -383,8 +385,15 @@ zink_create_gfx_pipeline(struct zink_screen *screen,
      VkPipelineShaderStageCreateInfo stage = {0};
      stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
      stage.stage = mesa_to_vk_shader_stage(i);
-      stage.module = objs[i].mod;
      stage.pName = "main";
+      if (objs[i].mod) {
+         stage.module = objs[i].mod;
+      } else {
+         smci[i].sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
+         stage.pNext = &smci[i];
+         smci[i].codeSize = objs[i].spirv->num_words * sizeof(uint32_t);
+         smci[i].pCode = objs[i].spirv->words;
+      }
      shader_stages[num_stages++] = stage;
   }
   assert(num_stages > 0);
--- a/src/gallium/drivers/zink/zink_program.c
+++ b/src/gallium/drivers/zink/zink_program.c
@ -149,15 +149,15 @@ create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *scr
   unsigned patch_vertices = state->shader_keys.key[MESA_SHADER_TESS_CTRL].key.tcs.patch_vertices;
   if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) {
      assert(ctx); //TODO async
-      zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices);
+      zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices, prog->base.uses_shobj, &prog->base);
   } else {
-      zm->obj = zink_shader_compile(screen, false, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]), key, &ctx->di.zs_swizzle[stage]);
+      zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]), key, &ctx->di.zs_swizzle[stage], &prog->base);
   }
   if (!zm->obj.mod) {
      FREE(zm);
      return NULL;
   }
-   zm->shobj = false;
+   zm->shobj = prog->base.uses_shobj;
   zm->num_uniforms = inline_size;
   if (!is_nongenerated_tcs) {
      zm->key_size = key->size;
@ -267,16 +267,16 @@ create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_scr
         struct zink_tcs_key *tcs = (struct zink_tcs_key*)key;
         patch_vertices = tcs->patch_vertices;
      }
-      zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices);
+      zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices, prog->base.uses_shobj, &prog->base);
   } else {
-      zm->obj = zink_shader_compile(screen, false, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]),
-                                    (struct zink_shader_key*)key, shadow_needs_shader_swizzle ? &ctx->di.zs_swizzle[stage] : NULL);
+      zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]),
+                                    (struct zink_shader_key*)key, shadow_needs_shader_swizzle ? &ctx->di.zs_swizzle[stage] : NULL, &prog->base);
   }
   if (!zm->obj.mod) {
      FREE(zm);
      return NULL;
   }
-   zm->shobj = false;
+   zm->shobj = prog->base.uses_shobj;
   /* non-generated tcs won't use the shader key */
   const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
   if (key && !is_nongenerated_tcs) {
@ -396,6 +396,7 @@ update_gfx_shader_modules(struct zink_context *ctx,
      hash_changed = true;
      default_variants &= zm->default_variant;
      prog->objs[i] = zm->obj;
+      prog->objects[i] = zm->obj.obj;
      prog->module_hash[i] = zm->hash;
      if (has_inline) {
         if (zm->num_uniforms)
@ -436,7 +437,8 @@ generate_gfx_program_modules(struct zink_context *ctx, struct zink_screen *scree
                                                                     inline_size, nonseamless_size,
                                                                     screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map);
      state->modules[i] = zm->obj.mod;
-      prog->objs[i] = zm->obj ;
+      prog->objs[i] = zm->obj;
+      prog->objects[i] = zm->obj.obj;
      prog->module_hash[i] = zm->hash;
      if (zm->num_uniforms)
         prog->inline_variants |= BITFIELD_BIT(i);
@ -464,6 +466,7 @@ generate_gfx_program_modules_optimal(struct zink_context *ctx, struct zink_scree

      struct zink_shader_module *zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[i], prog, i, state);
      prog->objs[i] = zm->obj;
+      prog->objects[i] = zm->obj.obj;
   }

   p_atomic_dec(&prog->base.reference.count);
@ -659,6 +662,7 @@ update_gfx_shader_module_optimal(struct zink_context *ctx, struct zink_gfx_progr

   bool changed = prog->objs[pstage].mod != zm->obj.mod;
   prog->objs[pstage] = zm->obj;
+   prog->objects[pstage] = zm->obj.obj;
   return changed;
 }

@ -788,13 +792,29 @@ optimized_compile_job(void *data, void *gdata, int thread_index)
   }
 }

+static void
+optimized_shobj_compile_job(void *data, void *gdata, int thread_index)
+{
+   struct zink_gfx_pipeline_cache_entry *pc_entry = data;
+   struct zink_screen *screen = gdata;
+
+   struct zink_shader_object objs[ZINK_GFX_SHADER_COUNT];
+   for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
+      objs[i].mod = VK_NULL_HANDLE;
+      objs[i].spirv = pc_entry->shobjs[i].spirv;
+   }
+   pc_entry->pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, objs, &pc_entry->state, NULL, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true);
+   /* no unoptimized_pipeline dance */
+}
+
 void
 zink_gfx_program_compile_queue(struct zink_context *ctx, struct zink_gfx_pipeline_cache_entry *pc_entry)
 {
   struct zink_screen *screen = zink_screen(ctx->base.screen);
   if (screen->driver_workarounds.disable_optimized_compile)
      return;
-   util_queue_add_job(&screen->cache_get_thread, pc_entry, &pc_entry->fence, optimized_compile_job, NULL, 0);
+   util_queue_add_job(&screen->cache_get_thread, pc_entry, &pc_entry->fence,
+                      pc_entry->prog->base.uses_shobj ? optimized_shobj_compile_job : optimized_compile_job, NULL, 0);
 }

 static void
@ -853,7 +873,7 @@ update_cs_shader_module(struct zink_context *ctx, struct zink_compute_program *c
         return;
      }
      zm->shobj = false;
-      zm->obj = zink_shader_compile(screen, false, zs, zink_shader_blob_deserialize(screen, &comp->shader->blob), key, zs_swizzle_size ? &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE] : NULL);
+      zm->obj = zink_shader_compile(screen, false, zs, zink_shader_blob_deserialize(screen, &comp->shader->blob), key, zs_swizzle_size ? &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE] : NULL, &comp->base);
      if (!zm->obj.spirv) {
         FREE(zm);
         return;
@ -1303,7 +1323,7 @@ precompile_compute_job(void *data, void *gdata, int thread_index)
   comp->curr = comp->module = CALLOC_STRUCT(zink_shader_module);
   assert(comp->module);
   comp->module->shobj = false;
-   comp->module->obj = zink_shader_compile(screen, false, comp->shader, comp->nir, NULL, NULL);
+   comp->module->obj = zink_shader_compile(screen, false, comp->shader, comp->nir, NULL, NULL, &comp->base);
   /* comp->nir will be freed by zink_shader_compile */
   comp->nir = NULL;
   assert(comp->module->obj.spirv);
@ -2033,9 +2053,11 @@ precompile_job(void *data, void *gdata, int thread_index)
   state.optimal_key = state.shader_keys_optimal.key.val;
   generate_gfx_program_modules_optimal(NULL, screen, prog, &state);
   zink_screen_get_pipeline_cache(screen, &prog->base, true);
-   simple_mtx_lock(&prog->libs->lock);
-   zink_create_pipeline_lib(screen, prog, &state);
-   simple_mtx_unlock(&prog->libs->lock);
+   if (!screen->info.have_EXT_shader_object) {
+      simple_mtx_lock(&prog->libs->lock);
+      zink_create_pipeline_lib(screen, prog, &state);
+      simple_mtx_unlock(&prog->libs->lock);
+   }
   zink_screen_update_pipeline_cache(screen, &prog->base, true);
 }

@ -2105,6 +2127,8 @@ zink_link_gfx_shader(struct pipe_context *pctx, void **shaders)
                                                     shaders[MESA_SHADER_TESS_EVAL] ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, true);
      print_pipeline_stats(screen, pipeline);
   } else {
+      if (zink_screen(pctx->screen)->info.have_EXT_shader_object)
+         prog->base.uses_shobj = !BITSET_TEST(zshaders[MESA_SHADER_FRAGMENT]->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
      util_queue_add_job(&zink_screen(pctx->screen)->cache_get_thread, prog, &prog->base.cache_fence, precompile_job, NULL, 0);
   }
 }
@ -2207,7 +2231,8 @@ zink_program_init(struct zink_context *ctx)

   STATIC_ASSERT(sizeof(union zink_shader_key_optimal) == sizeof(uint32_t));

-   if (zink_screen(ctx->base.screen)->info.have_EXT_graphics_pipeline_library || zink_debug & ZINK_DEBUG_SHADERDB)
+   struct zink_screen *screen = zink_screen(ctx->base.screen);
+   if (screen->info.have_EXT_graphics_pipeline_library || screen->info.have_EXT_shader_object || zink_debug & ZINK_DEBUG_SHADERDB)
      ctx->base.link_shader = zink_link_gfx_shader;
 }

--- a/src/gallium/drivers/zink/zink_program_state.hpp
+++ b/src/gallium/drivers/zink/zink_program_state.hpp
@ -186,7 +186,10 @@ zink_get_gfx_pipeline(struct zink_context *ctx,
      /* init the optimized background compile fence */
      util_queue_fence_init(&pc_entry->fence);
      entry = _mesa_hash_table_insert_pre_hashed(&prog->pipelines[rp_idx][idx], state->final_hash, pc_entry, pc_entry);
-      if (HAVE_LIB && zink_can_use_pipeline_libs(ctx)) {
+      if (prog->base.uses_shobj && !prog->is_separable) {
+         memcpy(pc_entry->shobjs, prog->objs, sizeof(prog->objs));
+         zink_gfx_program_compile_queue(ctx, pc_entry);
+      } else if (HAVE_LIB && zink_can_use_pipeline_libs(ctx)) {
         /* this is the graphics pipeline library path: find/construct all partial pipelines */
         simple_mtx_lock(&prog->libs->lock);
         struct set_entry *he = _mesa_set_search(&prog->libs->libs, &ctx->gfx_pipeline_state.optimal_key);
--- a/src/gallium/drivers/zink/zink_types.h
+++ b/src/gallium/drivers/zink/zink_types.h
@ -1028,6 +1028,7 @@ struct zink_gfx_pipeline_cache_entry {
         struct zink_gfx_output_key *okey;
         VkPipeline unoptimized_pipeline;
      } gpl;
+      struct zink_shader_object shobjs[ZINK_GFX_SHADER_COUNT];
   };
 };