tu: use nir_opt_varyings_bulk for linking

Replace the use of nir_link_opt_varyings/nir_compact_varyings for linking with the new nir_opt_varyings linker using the nir_opt_varyings_bulk helper. This moves all the NIR lowering up to nir_lower_io (tu_lower_nir) to the pre-linking stage since nir_opt_varyings expects lowered IO. Totals from 38233 (21.69% of 176258) affected shaders: MaxWaves: 522636 -> 522578 (-0.01%); split: +0.14%, -0.15% Instrs: 15111014 -> 15062812 (-0.32%); split: -0.71%, +0.39% CodeSize: 31555448 -> 31530676 (-0.08%); split: -0.70%, +0.62% NOPs: 2605163 -> 2582030 (-0.89%); split: -2.38%, +1.49% MOVs: 519056 -> 511167 (-1.52%); split: -4.88%, +3.36% COVs: 244091 -> 243317 (-0.32%); split: -0.55%, +0.23% Full: 463796 -> 463307 (-0.11%); split: -0.47%, +0.36% (ss): 390558 -> 386374 (-1.07%); split: -3.07%, +2.00% (sy): 180298 -> 179347 (-0.53%); split: -1.55%, +1.02% (ss)-stall: 1485337 -> 1473362 (-0.81%); split: -3.92%, +3.11% (sy)-stall: 5441818 -> 5375690 (-1.22%); split: -2.99%, +1.78% Preamble Instrs: 3707325 -> 3724339 (+0.46%); split: -0.38%, +0.84% Early Preamble: 29397 -> 29392 (-0.02%); split: +0.10%, -0.12% Cat0: 2883908 -> 2860585 (-0.81%); split: -2.16%, +1.35% Cat1: 765447 -> 757066 (-1.09%); split: -3.46%, +2.36% Cat2: 5664380 -> 5663562 (-0.01%); split: -0.51%, +0.49% Cat3: 4393358 -> 4386474 (-0.16%); split: -0.27%, +0.12% Cat4: 443624 -> 443546 (-0.02%); split: -0.03%, +0.01% Cat5: 427389 -> 427239 (-0.04%); split: -0.27%, +0.24% Cat6: 173632 -> 164362 (-5.34%); split: -5.36%, +0.02% Cat7: 359276 -> 359978 (+0.20%); split: -1.33%, +1.53% Signed-off-by: Job Noorman <jnoorman@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40651>
2026-05-07 02:48:06 +02:00 · 2026-04-03 08:18:08 +02:00 · 2026-04-03 08:18:08 +02:00 · 893d3caf7b
commit 893d3caf7b
parent 99713d0c53
3 changed files with 46 additions and 97 deletions
--- a/src/freedreno/vulkan/tu_pipeline.cc
+++ b/src/freedreno/vulkan/tu_pipeline.cc
@ -4971,7 +4971,9 @@ tu_compute_pipeline_create(VkDevice device,
      nir_initial_disasm = executable_info ?
         nir_shader_as_str(nir, pipeline->base.executables_mem_ctx) : NULL;

-      result = tu_shader_create(dev, &shader, nir, &key, &ir3_key,
+      struct tu_shader_info info = {};
+      tu_lower_nir(dev, nir, &key, &info);
+      result = tu_shader_create(dev, &shader, nir, &key, &info, &ir3_key,
                                pipeline_blake3, sizeof(pipeline_blake3), layout,
                                executable_info);
      if (!shader) {
--- a/src/freedreno/vulkan/tu_shader.cc
+++ b/src/freedreno/vulkan/tu_shader.cc
@ -3083,6 +3083,7 @@ tu_shader_create(struct tu_device *dev,
                 struct tu_shader **shader_out,
                 nir_shader *nir,
                 const struct tu_shader_key *key,
+                 const struct tu_shader_info *info,
                 const struct ir3_shader_key *ir3_key,
                 const void *key_data,
                 size_t key_size,
@ -3094,10 +3095,7 @@ tu_shader_create(struct tu_device *dev,
   if (!shader)
      return VK_ERROR_OUT_OF_HOST_MEMORY;

-   struct tu_shader_info info = {};
-   tu_lower_nir(dev, nir, key, &info);
-
-   shader->per_layer_viewport = info.per_layer_viewport;
+   shader->per_layer_viewport = info->per_layer_viewport;

   if (nir->info.stage == MESA_SHADER_FRAGMENT &&
       key->fdm_per_layer) {
@ -3232,108 +3230,42 @@ tu_shader_create(struct tu_device *dev,
 }

 static void
-lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask)
+link_opts(nir_shader *shader, void *data)
 {
-   bool progress = false;
-   NIR_PASS(progress, nir, nir_lower_io_vars_to_scalar, mask);
+   struct ir3_compiler *compiler = static_cast<struct ir3_compiler *>(data);

-   if (progress) {
-      /* Optimize the new vector code and then remove dead vars. */
-      NIR_PASS(_, nir, nir_opt_copy_prop);
-
-      if (mask & nir_var_shader_out) {
-         /* Optimize swizzled movs of load_const for nir_link_opt_varyings's
-          * constant propagation.
-          */
-         NIR_PASS(_, nir, nir_opt_constant_folding);
-
-         /* For nir_link_opt_varyings's duplicate input opt. */
-         NIR_PASS(_, nir, nir_opt_cse);
-      }
-
-      /* Run copy-propagation to help remove dead output variables (some
-       * shaders have useless copies to/from an output), so compaction later
-       * will be more effective.
-       *
-       * This will have been done earlier but it might not have worked because
-       * the outputs were vector.
-       */
-      NIR_PASS(_, nir, nir_opt_copy_prop_vars);
-
-      /* This must be called before nir_link_opt_varyings() and after
-       * nir_opt_copy_prop_vars(), otherwise repeated (scalarized) stores in the
-       * last block will propagate the wrong values into the consumer.
-       */
-      NIR_PASS(_, nir, nir_opt_dead_write_vars);
-
-      NIR_PASS(_, nir, nir_opt_dce);
-
-      const nir_remove_dead_variables_options var_opts = {
-         .can_remove_var =
-            (mask & nir_var_shader_out) ? nir_vk_is_not_xfb_output : NULL,
-      };
-      NIR_PASS(_, nir, nir_remove_dead_variables, mask, &var_opts);
-   }
+   struct ir3_optimize_options optimize_options = {};
+   ir3_optimize_loop(compiler, &optimize_options, shader);
 }

 static void
-tu_link_shaders(nir_shader **shaders, unsigned shaders_count)
+tu_link_shaders(struct tu_device *dev,
+                nir_shader **shaders,
+                unsigned shaders_count)
 {
-   nir_shader *consumer = NULL;
-   for (mesa_shader_stage stage = (mesa_shader_stage) (shaders_count - 1);
-        stage >= MESA_SHADER_VERTEX; stage = (mesa_shader_stage) (stage - 1)) {
-      if (!shaders[stage])
-         continue;
+   nir_shader *link_shaders[MESA_SHADER_STAGES] = {};
+   assert(shaders_count <= ARRAY_SIZE(link_shaders));

-      nir_shader *producer = shaders[stage];
-      if (!consumer) {
-         consumer = producer;
-         continue;
+   unsigned link_shaders_count = 0;
+
+   for (unsigned i = 0; i < shaders_count; i++) {
+      if (shaders[i]) {
+         link_shaders[link_shaders_count++] = shaders[i];
      }
-
-      lower_io_to_scalar_early(producer, nir_var_shader_out);
-      lower_io_to_scalar_early(consumer, nir_var_shader_in);
-
-      if (nir_link_opt_varyings(producer, consumer)) {
-         NIR_PASS(_, consumer, nir_opt_constant_folding);
-         NIR_PASS(_, consumer, nir_opt_algebraic);
-         NIR_PASS(_, consumer, nir_opt_dce);
-      }
-
-      const nir_remove_dead_variables_options out_var_opts = {
-         .can_remove_var = nir_vk_is_not_xfb_output,
-      };
-      NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out,
-               &out_var_opts);
-
-      NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in,
-               NULL);
-
-      bool progress = nir_remove_unused_varyings(producer, consumer);
-
-      nir_compact_varyings(producer, consumer, true);
-      if (progress) {
-         if (nir_lower_global_vars_to_local(producer)) {
-            /* Remove dead writes, which can remove input loads */
-            NIR_PASS(_, producer, nir_remove_dead_variables,
-                     nir_var_shader_temp, NULL);
-            NIR_PASS(_, producer, nir_opt_dce);
-         }
-         nir_lower_global_vars_to_local(consumer);
-      }
-
-      NIR_PASS(_, producer, nir_opt_vectorize_io_vars, nir_var_shader_out);
-      NIR_PASS(_, consumer, nir_opt_vectorize_io_vars, nir_var_shader_in);
-      consumer = producer;
   }

-   /* Gather info after linking so that we can fill out the ir3 shader key.
+   nir_opt_varyings_bulk(link_shaders, link_shaders_count, true, UINT32_MAX,
+                         UINT32_MAX, link_opts, dev->compiler);
+
+   /* We have to make sure nir_recompute_io_bases is called at least once so
+    * that num_inputs/num_outputs is correctly set for all shaders.
+    * nir_opt_varyings_bulk will do this for us when linking multiple shaders
+    * but not when there is only a single shader. Call it manually in that
+    * case.
    */
-   for (mesa_shader_stage stage = MESA_SHADER_VERTEX;
-        stage <= MESA_SHADER_FRAGMENT; stage = (mesa_shader_stage) (stage + 1)) {
-      if (shaders[stage])
-         nir_shader_gather_info(shaders[stage],
-                                nir_shader_get_entrypoint(shaders[stage]));
+   if (link_shaders_count == 1) {
+      NIR_PASS(_, link_shaders[0], nir_recompute_io_bases,
+               nir_var_shader_in | nir_var_shader_out);
   }
 }

@ -3370,6 +3302,7 @@ tu_compile_shaders(struct tu_device *device,
                   VkPipelineCreationFeedback *stage_feedbacks)
 {
   struct ir3_shader_key ir3_key = {};
+   struct tu_shader_info info[MESA_SHADER_STAGES] = {};
   VkResult result = VK_SUCCESS;
   void *mem_ctx = ralloc_context(NULL);

@ -3407,7 +3340,19 @@ tu_compile_shaders(struct tu_device *device,
      }
   }

-   tu_link_shaders(nir, MESA_SHADER_STAGES);
+   for (mesa_shader_stage stage = MESA_SHADER_VERTEX; stage < MESA_SHADER_STAGES;
+        stage = (mesa_shader_stage) (stage + 1)) {
+      if (!nir[stage])
+         continue;
+
+      int64_t stage_start = os_time_get_nano();
+
+      tu_lower_nir(device, nir[stage], &keys[stage], &info[stage]);
+
+      stage_feedbacks[stage].duration += os_time_get_nano() - stage_start;
+   }
+
+   tu_link_shaders(device, nir, MESA_SHADER_STAGES);

   if (nir_out) {
      for (mesa_shader_stage stage = MESA_SHADER_VERTEX;
@ -3482,6 +3427,7 @@ tu_compile_shaders(struct tu_device *device,

      result = tu_shader_create(device,
                                &shaders[stage], nir[stage], &keys[stage],
+                                &info[stage],
                                &ir3_key, shader_blake3, sizeof(shader_blake3),
                                layout, !!nir_initial_disasm);
      if (result != VK_SUCCESS) {
--- a/src/freedreno/vulkan/tu_shader.h
+++ b/src/freedreno/vulkan/tu_shader.h
@ -209,6 +209,7 @@ tu_shader_create(struct tu_device *dev,
                 struct tu_shader **shader_out,
                 nir_shader *nir,
                 const struct tu_shader_key *key,
+                 const struct tu_shader_info *shader_info,
                 const struct ir3_shader_key *ir3_key,
                 const void *key_data,
                 size_t key_size,