radv: Synchronize shader dumping

When RADV_DEBUG=shaders is set, printing e.g. different NIR shaders from different threads at the same time makes the output unreadable. Use a mutex to synchronize shader dumping so that all shaders get printed in once piece. Since we're writing everything to a file or terminal anyway, the performance impact of forcing singlethreaded compilation is negligible. Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25215>
2026-05-05 13:58:04 +02:00 · 2024-08-02 10:47:07 +02:00 · 2024-08-02 10:47:07 +02:00 · d3ff76d27b
commit d3ff76d27b
parent d6d8bb8657
5 changed files with 52 additions and 10 deletions
--- a/src/amd/vulkan/radv_instance.c
+++ b/src/amd/vulkan/radv_instance.c
@ -352,6 +352,8 @@ radv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, const VkAllocationC
   vk_instance_add_driver_trace_modes(&instance->vk, trace_options);
   radv_handle_legacy_sqtt_trigger(&instance->vk);

+   simple_mtx_init(&instance->shader_dump_mtx, mtx_plain);
+
   instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"), radv_debug_options);
   instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"), radv_perftest_options);
   instance->profile_pstate = radv_parse_pstate(debug_get_option("RADV_PROFILE_PSTATE", "peak"));
@ -389,6 +391,8 @@ radv_DestroyInstance(VkInstance _instance, const VkAllocationCallbacks *pAllocat

   VG(VALGRIND_DESTROY_MEMPOOL(instance));

+   simple_mtx_destroy(&instance->shader_dump_mtx);
+
   driDestroyOptionCache(&instance->drirc.options);
   driDestroyOptionInfo(&instance->drirc.available_options);

--- a/src/amd/vulkan/radv_instance.h
+++ b/src/amd/vulkan/radv_instance.h
@ -11,6 +11,7 @@
 #ifndef RADV_INSTANCE_H
 #define RADV_INSTANCE_H

+#include "util/simple_mtx.h"
 #include "util/xmlconfig.h"
 #include "radv_radeon_winsys.h"
 #include "vk_instance.h"
@ -37,6 +38,8 @@ struct radv_instance {

   VkAllocationCallbacks alloc;

+   simple_mtx_t shader_dump_mtx;
+
   uint64_t debug_flags;
   uint64_t perftest_flags;
   enum radeon_ctx_pstate profile_pstate;
--- a/src/amd/vulkan/radv_pipeline_compute.c
+++ b/src/amd/vulkan/radv_pipeline_compute.c
@ -100,6 +100,9 @@ radv_compile_cs(struct radv_device *device, struct vk_pipeline_cache *cache, str
                bool keep_executable_info, bool keep_statistic_info, bool is_internal,
                struct radv_shader_binary **cs_binary)
 {
+   struct radv_physical_device *pdev = radv_device_physical(device);
+   struct radv_instance *instance = radv_physical_device_instance(pdev);
+
   struct radv_shader *cs_shader;

   /* Compile SPIR-V shader to NIR. */
@ -123,12 +126,14 @@ radv_compile_cs(struct radv_device *device, struct vk_pipeline_cache *cache, str
   /* Postprocess NIR. */
   radv_postprocess_nir(device, NULL, cs_stage);

-   if (radv_can_dump_shader(device, cs_stage->nir, false))
-      nir_print_shader(cs_stage->nir, stderr);
-
-   /* Compile NIR shader to AMD assembly. */
   bool dump_shader = radv_can_dump_shader(device, cs_stage->nir, false);

+   if (dump_shader) {
+      simple_mtx_lock(&instance->shader_dump_mtx);
+      nir_print_shader(cs_stage->nir, stderr);
+   }
+
+   /* Compile NIR shader to AMD assembly. */
   *cs_binary =
      radv_shader_nir_to_asm(device, cs_stage, &cs_stage->nir, 1, NULL, keep_executable_info, keep_statistic_info);

@ -137,6 +142,9 @@ radv_compile_cs(struct radv_device *device, struct vk_pipeline_cache *cache, str
   radv_shader_generate_debug_info(device, dump_shader, keep_executable_info, *cs_binary, cs_shader, &cs_stage->nir, 1,
                                   &cs_stage->info);

+   if (dump_shader)
+      simple_mtx_unlock(&instance->shader_dump_mtx);
+
   if (keep_executable_info && cs_stage->spirv.size) {
      cs_shader->spirv = malloc(cs_stage->spirv.size);
      memcpy(cs_shader->spirv, cs_stage->spirv.data, cs_stage->spirv.size);
--- a/src/amd/vulkan/radv_pipeline_graphics.c
+++ b/src/amd/vulkan/radv_pipeline_graphics.c
@ -2059,6 +2059,8 @@ radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache
                           struct radv_shader_binary **gs_copy_binary)
 {
   const struct radv_physical_device *pdev = radv_device_physical(device);
+   struct radv_instance *instance = radv_physical_device_instance(pdev);
+
   const struct radv_shader_info *gs_info = &gs_stage->info;
   ac_nir_gs_output_info output_info = {
      .streams = gs_info->gs.output_streams,
@ -2101,6 +2103,9 @@ radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache
   struct radv_graphics_pipeline_key key = {0};
   bool dump_shader = radv_can_dump_shader(device, nir, true);

+   if (dump_shader)
+      simple_mtx_lock(&instance->shader_dump_mtx);
+
   *gs_copy_binary = radv_shader_nir_to_asm(device, &gs_copy_stage, &nir, 1, &key.gfx_state, keep_executable_info,
                                            keep_statistic_info);
   struct radv_shader *copy_shader =
@ -2108,6 +2113,10 @@ radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache
   if (copy_shader)
      radv_shader_generate_debug_info(device, dump_shader, keep_executable_info, *gs_copy_binary, copy_shader, &nir, 1,
                                      &gs_copy_stage.info);
+
+   if (dump_shader)
+      simple_mtx_unlock(&instance->shader_dump_mtx);
+
   return copy_shader;
 }

@ -2120,6 +2129,7 @@ radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_
                                 struct radv_shader_binary **gs_copy_binary)
 {
   const struct radv_physical_device *pdev = radv_device_physical(device);
+   struct radv_instance *instance = radv_physical_device_instance(pdev);

   for (int s = MESA_VULKAN_SHADER_STAGES - 1; s >= 0; s--) {
      if (!(active_nir_stages & (1 << s)))
@ -2150,12 +2160,21 @@ radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_

      bool dump_shader = radv_can_dump_shader(device, nir_shaders[0], false);

+      if (dump_shader) {
+         simple_mtx_lock(&instance->shader_dump_mtx);
+         for (uint32_t i = 0; i < shader_count; i++)
+            nir_print_shader(nir_shaders[i], stderr);
+      }
+
      binaries[s] = radv_shader_nir_to_asm(device, &stages[s], nir_shaders, shader_count, gfx_state,
                                           keep_executable_info, keep_statistic_info);
      shaders[s] = radv_shader_create(device, cache, binaries[s], keep_executable_info || dump_shader);
      radv_shader_generate_debug_info(device, dump_shader, keep_executable_info, binaries[s], shaders[s], nir_shaders,
                                      shader_count, &stages[s].info);

+      if (dump_shader)
+         simple_mtx_unlock(&instance->shader_dump_mtx);
+
      if (s == MESA_SHADER_GEOMETRY && !stages[s].info.is_ngg) {
         *gs_copy_shader = radv_create_gs_copy_shader(device, cache, &stages[MESA_SHADER_GEOMETRY], gfx_state,
                                                      keep_executable_info, keep_statistic_info, gs_copy_binary);
@ -2505,9 +2524,6 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
      radv_postprocess_nir(device, gfx_state, &stages[i]);

      stages[i].feedback.duration += os_time_get_nano() - stage_start;
-
-      if (radv_can_dump_shader(device, stages[i].nir, false))
-         nir_print_shader(stages[i].nir, stderr);
   }

   /* Compile NIR shaders to AMD assembly. */
--- a/src/amd/vulkan/radv_pipeline_rt.c
+++ b/src/amd/vulkan/radv_pipeline_rt.c
@ -351,6 +351,8 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
                   struct radv_serialized_shader_arena_block *replay_block, struct radv_shader **out_shader)
 {
   struct radv_physical_device *pdev = radv_device_physical(device);
+   struct radv_instance *instance = radv_physical_device_instance(pdev);
+
   struct radv_shader_binary *binary;
   bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.base.create_flags);
   bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pipeline->base.base.create_flags);
@ -418,15 +420,18 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,

      if (stage_info)
         radv_gather_unused_args(stage_info, shaders[i]);
-
-      if (radv_can_dump_shader(device, temp_stage.nir, false))
-         nir_print_shader(temp_stage.nir, stderr);
   }

   bool dump_shader = radv_can_dump_shader(device, shaders[0], false);
   bool replayable =
      pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR;

+   if (dump_shader) {
+      simple_mtx_lock(&instance->shader_dump_mtx);
+      for (uint32_t i = 0; i < num_shaders; i++)
+         nir_print_shader(shaders[i], stderr);
+   }
+
   /* Compile NIR shader to AMD assembly. */
   binary =
      radv_shader_nir_to_asm(device, stage, shaders, num_shaders, NULL, keep_executable_info, keep_statistic_info);
@ -434,6 +439,9 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
   if (replay_block || replayable) {
      VkResult result = radv_shader_create_uncached(device, binary, replayable, replay_block, &shader);
      if (result != VK_SUCCESS) {
+         if (dump_shader)
+            simple_mtx_unlock(&instance->shader_dump_mtx);
+
         free(binary);
         return result;
      }
@ -451,6 +459,9 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
      }
   }

+   if (dump_shader)
+      simple_mtx_unlock(&instance->shader_dump_mtx);
+
   free(binary);

   *out_shader = shader;