From b038cb3df1e59d334a85d765f11cc69c3e477d57 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Thu, 8 May 2025 10:07:17 +0200 Subject: [PATCH] tu: scalarize IO before linking This allows nir_link_opt_varyings, nir_remove_unused_varyings and nir_compact_varyings to find a lot more optimization opportunities. The implementation has been shamelessly copied, with some minor tweaks, from radv_link_shaders. Note that the regression in "Early Preamble" is caused by more texture operations becoming uniform and being hoisted to the preamble (where they need GPRs). Totals from 72221 (43.88% of 164575) affected shaders: MaxWaves: 924390 -> 929534 (+0.56%); split: +0.62%, -0.06% Instrs: 29657203 -> 29265425 (-1.32%); split: -1.63%, +0.31% CodeSize: 61509010 -> 61032290 (-0.78%); split: -1.46%, +0.68% NOPs: 4810811 -> 4799957 (-0.23%); split: -2.49%, +2.27% MOVs: 923221 -> 830062 (-10.09%); split: -14.80%, +4.71% Full: 949533 -> 933312 (-1.71%); split: -1.82%, +0.11% (ss): 685957 -> 678810 (-1.04%); split: -3.68%, +2.63% (sy): 326800 -> 324295 (-0.77%); split: -2.56%, +1.79% (ss)-stall: 2710956 -> 2682550 (-1.05%); split: -4.19%, +3.15% (sy)-stall: 9480654 -> 9332777 (-1.56%); split: -4.39%, +2.83% STPs: 5907 -> 5885 (-0.37%) LDPs: 2622 -> 2596 (-0.99%) Preamble Instrs: 6728019 -> 6671785 (-0.84%); split: -1.75%, +0.92% Early Preamble: 52865 -> 52319 (-1.03%); split: +0.26%, -1.30% Cat0: 5280863 -> 5268118 (-0.24%); split: -2.33%, +2.08% Cat1: 1385055 -> 1271076 (-8.23%); split: -11.33%, +3.10% Cat2: 11333273 -> 11194153 (-1.23%); split: -1.25%, +0.02% Cat3: 8735603 -> 8618710 (-1.34%); split: -1.34%, +0.00% Cat4: 958143 -> 952511 (-0.59%) Cat5: 840520 -> 836190 (-0.52%); split: -0.53%, +0.02% Cat6: 242192 -> 232244 (-4.11%) Cat7: 881554 -> 892423 (+1.23%); split: -1.25%, +2.48% Signed-off-by: Job Noorman Part-of: --- src/freedreno/vulkan/tu_shader.cc | 44 +++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index 75f9a592fe9..25a9db3e46c 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -2766,6 +2766,45 @@ tu_shader_create(struct tu_device *dev, return VK_SUCCESS; } +static void +lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask) +{ + bool progress = false; + NIR_PASS(progress, nir, nir_lower_io_to_scalar_early, mask); + + if (progress) { + /* Optimize the new vector code and then remove dead vars. */ + NIR_PASS(_, nir, nir_copy_prop); + + if (mask & nir_var_shader_out) { + /* Optimize swizzled movs of load_const for nir_link_opt_varyings's + * constant propagation. + */ + NIR_PASS(_, nir, nir_opt_constant_folding); + + /* For nir_link_opt_varyings's duplicate input opt. */ + NIR_PASS(_, nir, nir_opt_cse); + } + + /* Run copy-propagation to help remove dead output variables (some + * shaders have useless copies to/from an output), so compaction later + * will be more effective. + * + * This will have been done earlier but it might not have worked because + * the outputs were vector. + */ + NIR_PASS(_, nir, nir_opt_copy_prop_vars); + + NIR_PASS(_, nir, nir_opt_dce); + + const nir_remove_dead_variables_options var_opts = { + .can_remove_var = + (mask & nir_var_shader_out) ? nir_vk_is_not_xfb_output : NULL, + }; + NIR_PASS(_, nir, nir_remove_dead_variables, mask, &var_opts); + } +} + static void tu_link_shaders(nir_shader **shaders, unsigned shaders_count) { @@ -2781,6 +2820,9 @@ tu_link_shaders(nir_shader **shaders, unsigned shaders_count) continue; } + lower_io_to_scalar_early(producer, nir_var_shader_out); + lower_io_to_scalar_early(consumer, nir_var_shader_in); + if (nir_link_opt_varyings(producer, consumer)) { NIR_PASS(_, consumer, nir_opt_constant_folding); NIR_PASS(_, consumer, nir_opt_algebraic); @@ -2809,6 +2851,8 @@ tu_link_shaders(nir_shader **shaders, unsigned shaders_count) nir_lower_global_vars_to_local(consumer); } + NIR_PASS(_, producer, nir_lower_io_to_vector, nir_var_shader_out); + NIR_PASS(_, consumer, nir_lower_io_to_vector, nir_var_shader_in); consumer = producer; }