From cbeccb0a09d4c705a723c83c5e2ab02e6dda9cdc Mon Sep 17 00:00:00 2001
From: Georg Lehmann <dadschoorse@gmail.com>
Date: Mon, 13 Apr 2026 14:48:51 +0200
Subject: [PATCH] radv: call radv_optimize_nir after lowering io
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We are moving more and more passes after lowering io
because they no longer operate on variables.

There doesn't really seem to be a need to optimize before
lowering io, so do it afterwards.

Foz-DB Navi48:
Totals from 2339 (1.14% of 205045) affected shaders:
MaxWaves: 66218 -> 66258 (+0.06%)
Instrs: 2009510 -> 2007711 (-0.09%); split: -0.15%, +0.06%
CodeSize: 10646476 -> 10648376 (+0.02%); split: -0.05%, +0.07%
VGPRs: 131304 -> 131232 (-0.05%)
Latency: 19249976 -> 19248715 (-0.01%); split: -0.02%, +0.02%
InvThroughput: 3133252 -> 3132291 (-0.03%); split: -0.05%, +0.02%
VClause: 32999 -> 33003 (+0.01%); split: -0.07%, +0.08%
SClause: 42959 -> 43101 (+0.33%); split: -0.27%, +0.60%
Copies: 143721 -> 143792 (+0.05%); split: -0.38%, +0.43%
Branches: 38736 -> 38738 (+0.01%)
PreSGPRs: 106104 -> 105846 (-0.24%); split: -0.27%, +0.03%
PreVGPRs: 95217 -> 95179 (-0.04%); split: -0.04%, +0.00%
VALU: 1146620 -> 1144783 (-0.16%); split: -0.22%, +0.06%
SALU: 275263 -> 275183 (-0.03%); split: -0.16%, +0.13%
VMEM: 58353 -> 58364 (+0.02%)
SMEM: 84810 -> 85215 (+0.48%); split: -0.00%, +0.48%

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40928>
---
 src/amd/vulkan/nir/radv_nir_lower_io.c  |  5 ---
 src/amd/vulkan/radv_pipeline_graphics.c | 49 ++++++++++++-------------
 2 files changed, 24 insertions(+), 30 deletions(-)

diff --git a/src/amd/vulkan/nir/radv_nir_lower_io.c b/src/amd/vulkan/nir/radv_nir_lower_io.c
index b44af9aa77f..0f42ca1087b 100644
--- a/src/amd/vulkan/nir/radv_nir_lower_io.c
+++ b/src/amd/vulkan/nir/radv_nir_lower_io.c
@@ -169,11 +169,6 @@ radv_nir_lower_io(struct radv_device *device, nir_shader *nir)
    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
       /* Lower explicit input load intrinsics to sysvals for the layer ID. */
       NIR_PASS(_, nir, nir_lower_system_values);
-
-      /* Recompute FS input intrinsic bases to assign a location to each FS input.
-       * The computed base will match the index of each input in SPI_PS_INPUT_CNTL_n.
-       */
-      radv_recompute_fs_input_bases(nir);
    }
 
    NIR_PASS(_, nir, nir_opt_dce);
diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c
index e076b9e4179..edffc081196 100644
--- a/src/amd/vulkan/radv_pipeline_graphics.c
+++ b/src/amd/vulkan/radv_pipeline_graphics.c
@@ -2806,38 +2806,34 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
    radv_foreach_stage (i, active_nir_stages) {
       int64_t stage_start = os_time_get_nano();
 
-      radv_optimize_nir(stages[i].nir, stages[i].key.optimisations_disabled);
-
       radv_nir_lower_io(device, stages[i].nir);
 
       stages[i].feedback.duration += os_time_get_nano() - stage_start;
    }
 
    if (stages[MESA_SHADER_FRAGMENT].nir) {
-      bool update_info = false;
       if (gfx_state->dynamic_line_rast_mode)
-         NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_lower_poly_line_smooth,
-                  RADV_NUM_SMOOTH_AA_SAMPLES);
+         NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_lower_poly_line_smooth, RADV_NUM_SMOOTH_AA_SAMPLES);
 
       if (!gfx_state->ps.has_epilog) {
-         NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_remap_color_attachment, gfx_state);
+         NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_remap_color_attachment, gfx_state);
 
          /* Lower FS outputs to scalar to allow dce. */
          NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
 
-         NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_trim_fs_color_exports, &gfx_state->ps.epilog);
+         NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_trim_fs_color_exports, &gfx_state->ps.epilog);
 
-         NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_copy_prop);
-         NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dce);
-         NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dead_cf);
+         NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_copy_prop);
+         NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dce);
+         NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dead_cf);
       }
 
-      NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_fs_input_attachment);
+      NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_fs_input_attachment);
 
-      NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_frag_coord_to_pixel_coord);
-      if (update_info)
-         nir_shader_gather_info(stages[MESA_SHADER_FRAGMENT].nir,
-                                nir_shader_get_entrypoint(stages[MESA_SHADER_FRAGMENT].nir));
+      NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_cse);
+      NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_copy_prop);
+      NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dce);
+      NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_frag_coord_to_pixel_coord);
    }
 
    radv_foreach_stage (i, active_nir_stages) {
@@ -2864,20 +2860,23 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
       if (!remove_as_varying && !remove_as_sysval)
          continue;
 
-      bool progress = false;
+      NIR_PASS(_, stages[i].nir, nir_remove_outputs, MESA_SHADER_FRAGMENT, remove_as_varying, remove_as_sysval);
+      break;
+   }
 
-      NIR_PASS(progress, stages[i].nir, nir_remove_outputs, MESA_SHADER_FRAGMENT, remove_as_varying, remove_as_sysval);
+   radv_foreach_stage (i, active_nir_stages) {
+      int64_t stage_start = os_time_get_nano();
 
-      if (progress) {
-         /* Remove dead code resulting from removed outputs. */
-         do {
-            progress = false;
-            NIR_PASS(progress, stages[i].nir, nir_opt_dce);
-            NIR_PASS(progress, stages[i].nir, nir_opt_dead_cf);
-         } while (progress);
+      radv_optimize_nir(stages[i].nir, stages[i].key.optimisations_disabled);
+
+      if (i == MESA_SHADER_FRAGMENT) {
+         /* Recompute FS input intrinsic bases to assign a location to each FS input.
+          * The computed base will match the index of each input in SPI_PS_INPUT_CNTL_n.
+          */
+         radv_recompute_fs_input_bases(stages[i].nir);
       }
 
-      break;
+      stages[i].feedback.duration += os_time_get_nano() - stage_start;
    }
 
    /* Optimize varyings on lowered shader I/O (more efficient than optimizing I/O derefs). */