tu: scalarize IO before linking

This allows nir_link_opt_varyings, nir_remove_unused_varyings and nir_compact_varyings to find a lot more optimization opportunities. The implementation has been shamelessly copied, with some minor tweaks, from radv_link_shaders. Note that the regression in "Early Preamble" is caused by more texture operations becoming uniform and being hoisted to the preamble (where they need GPRs). Totals from 72221 (43.88% of 164575) affected shaders: MaxWaves: 924390 -> 929534 (+0.56%); split: +0.62%, -0.06% Instrs: 29657203 -> 29265425 (-1.32%); split: -1.63%, +0.31% CodeSize: 61509010 -> 61032290 (-0.78%); split: -1.46%, +0.68% NOPs: 4810811 -> 4799957 (-0.23%); split: -2.49%, +2.27% MOVs: 923221 -> 830062 (-10.09%); split: -14.80%, +4.71% Full: 949533 -> 933312 (-1.71%); split: -1.82%, +0.11% (ss): 685957 -> 678810 (-1.04%); split: -3.68%, +2.63% (sy): 326800 -> 324295 (-0.77%); split: -2.56%, +1.79% (ss)-stall: 2710956 -> 2682550 (-1.05%); split: -4.19%, +3.15% (sy)-stall: 9480654 -> 9332777 (-1.56%); split: -4.39%, +2.83% STPs: 5907 -> 5885 (-0.37%) LDPs: 2622 -> 2596 (-0.99%) Preamble Instrs: 6728019 -> 6671785 (-0.84%); split: -1.75%, +0.92% Early Preamble: 52865 -> 52319 (-1.03%); split: +0.26%, -1.30% Cat0: 5280863 -> 5268118 (-0.24%); split: -2.33%, +2.08% Cat1: 1385055 -> 1271076 (-8.23%); split: -11.33%, +3.10% Cat2: 11333273 -> 11194153 (-1.23%); split: -1.25%, +0.02% Cat3: 8735603 -> 8618710 (-1.34%); split: -1.34%, +0.00% Cat4: 958143 -> 952511 (-0.59%) Cat5: 840520 -> 836190 (-0.52%); split: -0.53%, +0.02% Cat6: 242192 -> 232244 (-4.11%) Cat7: 881554 -> 892423 (+1.23%); split: -1.25%, +2.48% Signed-off-by: Job Noorman <jnoorman@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34784>
2025-12-24 21:50:12 +01:00 · 2025-05-08 10:07:17 +02:00 · 2025-05-08 10:07:17 +02:00 · b038cb3df1
commit b038cb3df1
parent 6a57bfb004
1 changed files with 44 additions and 0 deletions
--- a/src/freedreno/vulkan/tu_shader.cc
+++ b/src/freedreno/vulkan/tu_shader.cc
@ -2766,6 +2766,45 @@ tu_shader_create(struct tu_device *dev,
   return VK_SUCCESS;
 }

+static void
+lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask)
+{
+   bool progress = false;
+   NIR_PASS(progress, nir, nir_lower_io_to_scalar_early, mask);
+
+   if (progress) {
+      /* Optimize the new vector code and then remove dead vars. */
+      NIR_PASS(_, nir, nir_copy_prop);
+
+      if (mask & nir_var_shader_out) {
+         /* Optimize swizzled movs of load_const for nir_link_opt_varyings's
+          * constant propagation.
+          */
+         NIR_PASS(_, nir, nir_opt_constant_folding);
+
+         /* For nir_link_opt_varyings's duplicate input opt. */
+         NIR_PASS(_, nir, nir_opt_cse);
+      }
+
+      /* Run copy-propagation to help remove dead output variables (some
+       * shaders have useless copies to/from an output), so compaction later
+       * will be more effective.
+       *
+       * This will have been done earlier but it might not have worked because
+       * the outputs were vector.
+       */
+      NIR_PASS(_, nir, nir_opt_copy_prop_vars);
+
+      NIR_PASS(_, nir, nir_opt_dce);
+
+      const nir_remove_dead_variables_options var_opts = {
+         .can_remove_var =
+            (mask & nir_var_shader_out) ? nir_vk_is_not_xfb_output : NULL,
+      };
+      NIR_PASS(_, nir, nir_remove_dead_variables, mask, &var_opts);
+   }
+}
+
 static void
 tu_link_shaders(nir_shader **shaders, unsigned shaders_count)
 {
@ -2781,6 +2820,9 @@ tu_link_shaders(nir_shader **shaders, unsigned shaders_count)
         continue;
      }

+      lower_io_to_scalar_early(producer, nir_var_shader_out);
+      lower_io_to_scalar_early(consumer, nir_var_shader_in);
+
      if (nir_link_opt_varyings(producer, consumer)) {
         NIR_PASS(_, consumer, nir_opt_constant_folding);
         NIR_PASS(_, consumer, nir_opt_algebraic);
@ -2809,6 +2851,8 @@ tu_link_shaders(nir_shader **shaders, unsigned shaders_count)
         nir_lower_global_vars_to_local(consumer);
      }

+      NIR_PASS(_, producer, nir_lower_io_to_vector, nir_var_shader_out);
+      NIR_PASS(_, consumer, nir_lower_io_to_vector, nir_var_shader_in);
      consumer = producer;
   }