zink: add partial async shader compile support

when compute shaders can be precompiled, they can be precompiled asynchronously which allows the implementation of the parallel shader compile hooks Reviewed-by: Dave Airlie <airlied@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18197>
2026-02-03 17:20:26 +01:00 · 2022-08-12 11:34:29 -04:00 · 2022-08-12 11:34:29 -04:00 · a327ff90d2
commit a327ff90d2
parent eb9ec4baa7
4 changed files with 27 additions and 3 deletions
--- a/docs/features.txt
+++ b/docs/features.txt
@ -301,7 +301,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
  GL_ARB_ES3_2_compatibility                            DONE (i965/gen8+, radeonsi, virgl, zink)
  GL_ARB_fragment_shader_interlock                      DONE (i965, zink)
  GL_ARB_gpu_shader_int64                               DONE (i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe, zink, d3d12)
-  GL_ARB_parallel_shader_compile                        DONE (freedreno, iris, radeonsi, etnaviv)
+  GL_ARB_parallel_shader_compile                        DONE (freedreno, iris, radeonsi, etnaviv, zink)
  GL_ARB_post_depth_coverage                            DONE (i965, nvc0, radeonsi, llvmpipe, zink)
  GL_ARB_robustness_isolation                           not started
  GL_ARB_sample_locations                               DONE (nvc0, zink)
--- a/src/gallium/drivers/zink/zink_program.c
+++ b/src/gallium/drivers/zink/zink_program.c
@ -733,8 +733,11 @@ equals_compute_pipeline_state_local_size(const void *a, const void *b)
 }

 static void
-precompile_compute(struct zink_compute_program *comp, struct zink_screen *screen)
+precompile_compute_job(void *data, void *gdata, int thread_index)
 {
+   struct zink_compute_program *comp = data;
+   struct zink_screen *screen = gdata;
+
   zink_screen_get_pipeline_cache(screen, &comp->base, true);
   comp->base_pipeline = zink_create_compute_pipeline(screen, comp, NULL);
   if (comp->base_pipeline)
@ -773,7 +776,8 @@ create_compute_program(struct zink_context *ctx, nir_shader *nir)
   if (comp->use_local_size || (!screen->info.have_EXT_non_seamless_cube_map && comp->shader->has_cubes)) {
      zink_screen_get_pipeline_cache(screen, &comp->base, false);
   } else {
-      precompile_compute(comp, screen);
+      comp->base.can_precompile = true;
+      util_queue_add_job(&screen->cache_get_thread, comp, &comp->base.cache_fence, precompile_compute_job, NULL, 0);
   }
   return comp;

--- a/src/gallium/drivers/zink/zink_screen.c
+++ b/src/gallium/drivers/zink/zink_screen.c
@ -163,6 +163,23 @@ zink_get_device_node_mask(struct pipe_screen *pscreen)
   }
 }

+static void
+zink_set_max_shader_compiler_threads(struct pipe_screen *pscreen, unsigned max_threads)
+{
+   struct zink_screen *screen = zink_screen(pscreen);
+   util_queue_adjust_num_threads(&screen->cache_get_thread, max_threads);
+}
+
+static bool
+zink_is_parallel_shader_compilation_finished(struct pipe_screen *screen, void *shader, enum pipe_shader_type shader_type)
+{
+   /* not supported yet */
+   if (shader_type != MESA_SHADER_COMPUTE)
+      return true;
+   struct zink_program *pg = shader;
+   return !pg->can_precompile || util_queue_fence_is_signalled(&pg->cache_fence);
+}
+
 static VkDeviceSize
 get_video_mem(struct zink_screen *screen)
 {
@ -2261,6 +2278,8 @@ zink_internal_create_screen(const struct pipe_screen_config *config)
      screen->base.get_device_luid = zink_get_device_luid;
      screen->base.get_device_node_mask = zink_get_device_node_mask;
   }
+   screen->base.set_max_shader_compiler_threads = zink_set_max_shader_compiler_threads;
+   screen->base.is_parallel_shader_compilation_finished = zink_is_parallel_shader_compilation_finished;
   screen->base.get_vendor = zink_get_vendor;
   screen->base.get_device_vendor = zink_get_device_vendor;
   screen->base.get_compute_param = zink_get_compute_param;
--- a/src/gallium/drivers/zink/zink_types.h
+++ b/src/gallium/drivers/zink/zink_types.h
@ -731,6 +731,7 @@ struct zink_program {
   size_t pipeline_cache_size;
   struct zink_batch_usage *batch_uses;
   bool is_compute;
+   bool can_precompile;

   struct zink_program_descriptor_data dd;