tu: scalarize IO before linking

This allows nir_link_opt_varyings, nir_remove_unused_varyings and
nir_compact_varyings to find a lot more optimization opportunities.

The implementation has been shamelessly copied, with some minor tweaks,
from radv_link_shaders.

Note that the regression in "Early Preamble" is caused by more texture
operations becoming uniform and being hoisted to the preamble (where
they need GPRs).

Totals from 72221 (43.88% of 164575) affected shaders:
MaxWaves: 924390 -> 929534 (+0.56%); split: +0.62%, -0.06%
Instrs: 29657203 -> 29265425 (-1.32%); split: -1.63%, +0.31%
CodeSize: 61509010 -> 61032290 (-0.78%); split: -1.46%, +0.68%
NOPs: 4810811 -> 4799957 (-0.23%); split: -2.49%, +2.27%
MOVs: 923221 -> 830062 (-10.09%); split: -14.80%, +4.71%
Full: 949533 -> 933312 (-1.71%); split: -1.82%, +0.11%
(ss): 685957 -> 678810 (-1.04%); split: -3.68%, +2.63%
(sy): 326800 -> 324295 (-0.77%); split: -2.56%, +1.79%
(ss)-stall: 2710956 -> 2682550 (-1.05%); split: -4.19%, +3.15%
(sy)-stall: 9480654 -> 9332777 (-1.56%); split: -4.39%, +2.83%
STPs: 5907 -> 5885 (-0.37%)
LDPs: 2622 -> 2596 (-0.99%)
Preamble Instrs: 6728019 -> 6671785 (-0.84%); split: -1.75%, +0.92%
Early Preamble: 52865 -> 52319 (-1.03%); split: +0.26%, -1.30%
Cat0: 5280863 -> 5268118 (-0.24%); split: -2.33%, +2.08%
Cat1: 1385055 -> 1271076 (-8.23%); split: -11.33%, +3.10%
Cat2: 11333273 -> 11194153 (-1.23%); split: -1.25%, +0.02%
Cat3: 8735603 -> 8618710 (-1.34%); split: -1.34%, +0.00%
Cat4: 958143 -> 952511 (-0.59%)
Cat5: 840520 -> 836190 (-0.52%); split: -0.53%, +0.02%
Cat6: 242192 -> 232244 (-4.11%)
Cat7: 881554 -> 892423 (+1.23%); split: -1.25%, +2.48%

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34784>
This commit is contained in:
Job Noorman 2025-05-08 10:07:17 +02:00 committed by Marge Bot
parent 6a57bfb004
commit b038cb3df1

View file

@ -2766,6 +2766,45 @@ tu_shader_create(struct tu_device *dev,
return VK_SUCCESS;
}
static void
lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask)
{
bool progress = false;
NIR_PASS(progress, nir, nir_lower_io_to_scalar_early, mask);
if (progress) {
/* Optimize the new vector code and then remove dead vars. */
NIR_PASS(_, nir, nir_copy_prop);
if (mask & nir_var_shader_out) {
/* Optimize swizzled movs of load_const for nir_link_opt_varyings's
* constant propagation.
*/
NIR_PASS(_, nir, nir_opt_constant_folding);
/* For nir_link_opt_varyings's duplicate input opt. */
NIR_PASS(_, nir, nir_opt_cse);
}
/* Run copy-propagation to help remove dead output variables (some
* shaders have useless copies to/from an output), so compaction later
* will be more effective.
*
* This will have been done earlier but it might not have worked because
* the outputs were vector.
*/
NIR_PASS(_, nir, nir_opt_copy_prop_vars);
NIR_PASS(_, nir, nir_opt_dce);
const nir_remove_dead_variables_options var_opts = {
.can_remove_var =
(mask & nir_var_shader_out) ? nir_vk_is_not_xfb_output : NULL,
};
NIR_PASS(_, nir, nir_remove_dead_variables, mask, &var_opts);
}
}
static void
tu_link_shaders(nir_shader **shaders, unsigned shaders_count)
{
@ -2781,6 +2820,9 @@ tu_link_shaders(nir_shader **shaders, unsigned shaders_count)
continue;
}
lower_io_to_scalar_early(producer, nir_var_shader_out);
lower_io_to_scalar_early(consumer, nir_var_shader_in);
if (nir_link_opt_varyings(producer, consumer)) {
NIR_PASS(_, consumer, nir_opt_constant_folding);
NIR_PASS(_, consumer, nir_opt_algebraic);
@ -2809,6 +2851,8 @@ tu_link_shaders(nir_shader **shaders, unsigned shaders_count)
nir_lower_global_vars_to_local(consumer);
}
NIR_PASS(_, producer, nir_lower_io_to_vector, nir_var_shader_out);
NIR_PASS(_, consumer, nir_lower_io_to_vector, nir_var_shader_in);
consumer = producer;
}