tu: use nir_opt_varyings_bulk for linking

Replace the use of nir_link_opt_varyings/nir_compact_varyings for
linking with the new nir_opt_varyings linker using the
nir_opt_varyings_bulk helper.

This moves all the NIR lowering up to nir_lower_io (tu_lower_nir) to the
pre-linking stage since nir_opt_varyings expects lowered IO.

Totals from 38233 (21.69% of 176258) affected shaders:
MaxWaves: 522636 -> 522578 (-0.01%); split: +0.14%, -0.15%
Instrs: 15111014 -> 15062812 (-0.32%); split: -0.71%, +0.39%
CodeSize: 31555448 -> 31530676 (-0.08%); split: -0.70%, +0.62%
NOPs: 2605163 -> 2582030 (-0.89%); split: -2.38%, +1.49%
MOVs: 519056 -> 511167 (-1.52%); split: -4.88%, +3.36%
COVs: 244091 -> 243317 (-0.32%); split: -0.55%, +0.23%
Full: 463796 -> 463307 (-0.11%); split: -0.47%, +0.36%
(ss): 390558 -> 386374 (-1.07%); split: -3.07%, +2.00%
(sy): 180298 -> 179347 (-0.53%); split: -1.55%, +1.02%
(ss)-stall: 1485337 -> 1473362 (-0.81%); split: -3.92%, +3.11%
(sy)-stall: 5441818 -> 5375690 (-1.22%); split: -2.99%, +1.78%
Preamble Instrs: 3707325 -> 3724339 (+0.46%); split: -0.38%, +0.84%
Early Preamble: 29397 -> 29392 (-0.02%); split: +0.10%, -0.12%
Cat0: 2883908 -> 2860585 (-0.81%); split: -2.16%, +1.35%
Cat1: 765447 -> 757066 (-1.09%); split: -3.46%, +2.36%
Cat2: 5664380 -> 5663562 (-0.01%); split: -0.51%, +0.49%
Cat3: 4393358 -> 4386474 (-0.16%); split: -0.27%, +0.12%
Cat4: 443624 -> 443546 (-0.02%); split: -0.03%, +0.01%
Cat5: 427389 -> 427239 (-0.04%); split: -0.27%, +0.24%
Cat6: 173632 -> 164362 (-5.34%); split: -5.36%, +0.02%
Cat7: 359276 -> 359978 (+0.20%); split: -1.33%, +1.53%

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40651>
This commit is contained in:
Job Noorman 2026-04-03 08:18:08 +02:00
parent 99713d0c53
commit 893d3caf7b
3 changed files with 46 additions and 97 deletions

View file

@ -4971,7 +4971,9 @@ tu_compute_pipeline_create(VkDevice device,
nir_initial_disasm = executable_info ?
nir_shader_as_str(nir, pipeline->base.executables_mem_ctx) : NULL;
result = tu_shader_create(dev, &shader, nir, &key, &ir3_key,
struct tu_shader_info info = {};
tu_lower_nir(dev, nir, &key, &info);
result = tu_shader_create(dev, &shader, nir, &key, &info, &ir3_key,
pipeline_blake3, sizeof(pipeline_blake3), layout,
executable_info);
if (!shader) {

View file

@ -3083,6 +3083,7 @@ tu_shader_create(struct tu_device *dev,
struct tu_shader **shader_out,
nir_shader *nir,
const struct tu_shader_key *key,
const struct tu_shader_info *info,
const struct ir3_shader_key *ir3_key,
const void *key_data,
size_t key_size,
@ -3094,10 +3095,7 @@ tu_shader_create(struct tu_device *dev,
if (!shader)
return VK_ERROR_OUT_OF_HOST_MEMORY;
struct tu_shader_info info = {};
tu_lower_nir(dev, nir, key, &info);
shader->per_layer_viewport = info.per_layer_viewport;
shader->per_layer_viewport = info->per_layer_viewport;
if (nir->info.stage == MESA_SHADER_FRAGMENT &&
key->fdm_per_layer) {
@ -3232,108 +3230,42 @@ tu_shader_create(struct tu_device *dev,
}
static void
lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask)
link_opts(nir_shader *shader, void *data)
{
bool progress = false;
NIR_PASS(progress, nir, nir_lower_io_vars_to_scalar, mask);
struct ir3_compiler *compiler = static_cast<struct ir3_compiler *>(data);
if (progress) {
/* Optimize the new vector code and then remove dead vars. */
NIR_PASS(_, nir, nir_opt_copy_prop);
if (mask & nir_var_shader_out) {
/* Optimize swizzled movs of load_const for nir_link_opt_varyings's
* constant propagation.
*/
NIR_PASS(_, nir, nir_opt_constant_folding);
/* For nir_link_opt_varyings's duplicate input opt. */
NIR_PASS(_, nir, nir_opt_cse);
}
/* Run copy-propagation to help remove dead output variables (some
* shaders have useless copies to/from an output), so compaction later
* will be more effective.
*
* This will have been done earlier but it might not have worked because
* the outputs were vector.
*/
NIR_PASS(_, nir, nir_opt_copy_prop_vars);
/* This must be called before nir_link_opt_varyings() and after
* nir_opt_copy_prop_vars(), otherwise repeated (scalarized) stores in the
* last block will propagate the wrong values into the consumer.
*/
NIR_PASS(_, nir, nir_opt_dead_write_vars);
NIR_PASS(_, nir, nir_opt_dce);
const nir_remove_dead_variables_options var_opts = {
.can_remove_var =
(mask & nir_var_shader_out) ? nir_vk_is_not_xfb_output : NULL,
};
NIR_PASS(_, nir, nir_remove_dead_variables, mask, &var_opts);
}
struct ir3_optimize_options optimize_options = {};
ir3_optimize_loop(compiler, &optimize_options, shader);
}
static void
tu_link_shaders(nir_shader **shaders, unsigned shaders_count)
tu_link_shaders(struct tu_device *dev,
nir_shader **shaders,
unsigned shaders_count)
{
nir_shader *consumer = NULL;
for (mesa_shader_stage stage = (mesa_shader_stage) (shaders_count - 1);
stage >= MESA_SHADER_VERTEX; stage = (mesa_shader_stage) (stage - 1)) {
if (!shaders[stage])
continue;
nir_shader *link_shaders[MESA_SHADER_STAGES] = {};
assert(shaders_count <= ARRAY_SIZE(link_shaders));
nir_shader *producer = shaders[stage];
if (!consumer) {
consumer = producer;
continue;
unsigned link_shaders_count = 0;
for (unsigned i = 0; i < shaders_count; i++) {
if (shaders[i]) {
link_shaders[link_shaders_count++] = shaders[i];
}
lower_io_to_scalar_early(producer, nir_var_shader_out);
lower_io_to_scalar_early(consumer, nir_var_shader_in);
if (nir_link_opt_varyings(producer, consumer)) {
NIR_PASS(_, consumer, nir_opt_constant_folding);
NIR_PASS(_, consumer, nir_opt_algebraic);
NIR_PASS(_, consumer, nir_opt_dce);
}
const nir_remove_dead_variables_options out_var_opts = {
.can_remove_var = nir_vk_is_not_xfb_output,
};
NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out,
&out_var_opts);
NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in,
NULL);
bool progress = nir_remove_unused_varyings(producer, consumer);
nir_compact_varyings(producer, consumer, true);
if (progress) {
if (nir_lower_global_vars_to_local(producer)) {
/* Remove dead writes, which can remove input loads */
NIR_PASS(_, producer, nir_remove_dead_variables,
nir_var_shader_temp, NULL);
NIR_PASS(_, producer, nir_opt_dce);
}
nir_lower_global_vars_to_local(consumer);
}
NIR_PASS(_, producer, nir_opt_vectorize_io_vars, nir_var_shader_out);
NIR_PASS(_, consumer, nir_opt_vectorize_io_vars, nir_var_shader_in);
consumer = producer;
}
/* Gather info after linking so that we can fill out the ir3 shader key.
nir_opt_varyings_bulk(link_shaders, link_shaders_count, true, UINT32_MAX,
UINT32_MAX, link_opts, dev->compiler);
/* We have to make sure nir_recompute_io_bases is called at least once so
* that num_inputs/num_outputs is correctly set for all shaders.
* nir_opt_varyings_bulk will do this for us when linking multiple shaders
* but not when there is only a single shader. Call it manually in that
* case.
*/
for (mesa_shader_stage stage = MESA_SHADER_VERTEX;
stage <= MESA_SHADER_FRAGMENT; stage = (mesa_shader_stage) (stage + 1)) {
if (shaders[stage])
nir_shader_gather_info(shaders[stage],
nir_shader_get_entrypoint(shaders[stage]));
if (link_shaders_count == 1) {
NIR_PASS(_, link_shaders[0], nir_recompute_io_bases,
nir_var_shader_in | nir_var_shader_out);
}
}
@ -3370,6 +3302,7 @@ tu_compile_shaders(struct tu_device *device,
VkPipelineCreationFeedback *stage_feedbacks)
{
struct ir3_shader_key ir3_key = {};
struct tu_shader_info info[MESA_SHADER_STAGES] = {};
VkResult result = VK_SUCCESS;
void *mem_ctx = ralloc_context(NULL);
@ -3407,7 +3340,19 @@ tu_compile_shaders(struct tu_device *device,
}
}
tu_link_shaders(nir, MESA_SHADER_STAGES);
for (mesa_shader_stage stage = MESA_SHADER_VERTEX; stage < MESA_SHADER_STAGES;
stage = (mesa_shader_stage) (stage + 1)) {
if (!nir[stage])
continue;
int64_t stage_start = os_time_get_nano();
tu_lower_nir(device, nir[stage], &keys[stage], &info[stage]);
stage_feedbacks[stage].duration += os_time_get_nano() - stage_start;
}
tu_link_shaders(device, nir, MESA_SHADER_STAGES);
if (nir_out) {
for (mesa_shader_stage stage = MESA_SHADER_VERTEX;
@ -3482,6 +3427,7 @@ tu_compile_shaders(struct tu_device *device,
result = tu_shader_create(device,
&shaders[stage], nir[stage], &keys[stage],
&info[stage],
&ir3_key, shader_blake3, sizeof(shader_blake3),
layout, !!nir_initial_disasm);
if (result != VK_SUCCESS) {

View file

@ -209,6 +209,7 @@ tu_shader_create(struct tu_device *dev,
struct tu_shader **shader_out,
nir_shader *nir,
const struct tu_shader_key *key,
const struct tu_shader_info *shader_info,
const struct ir3_shader_key *ir3_key,
const void *key_data,
size_t key_size,