From c63c6951494b598c66c4c718cb8d75ff73930cbb Mon Sep 17 00:00:00 2001
From: Rhys Perry <pendingchaos02@gmail.com>
Date: Fri, 29 Aug 2025 16:14:17 +0100
Subject: [PATCH] radv: move nir_opt_algebraic loop for NGG culling earlier
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Totals from 6913 (8.66% of 79825) affected shaders: (Navi21)
Instrs: 5373319 -> 5358717 (-0.27%); split: -0.30%, +0.03%
CodeSize: 27448536 -> 27345464 (-0.38%); split: -0.41%, +0.03%
SpillSGPRs: 982 -> 998 (+1.63%)
Latency: 22998827 -> 23011602 (+0.06%); split: -0.13%, +0.19%
InvThroughput: 4663749 -> 4664809 (+0.02%); split: -0.00%, +0.03%
VClause: 120845 -> 120461 (-0.32%); split: -0.49%, +0.17%
SClause: 119068 -> 116064 (-2.52%); split: -2.71%, +0.18%
Copies: 456590 -> 456450 (-0.03%); split: -0.19%, +0.16%
Branches: 145555 -> 145559 (+0.00%); split: -0.00%, +0.01%
PreSGPRs: 300465 -> 301154 (+0.23%); split: -0.01%, +0.24%
VALU: 3064127 -> 3064210 (+0.00%); split: -0.00%, +0.00%
SALU: 891257 -> 886368 (-0.55%); split: -0.71%, +0.16%
SMEM: 190500 -> 184624 (-3.08%); split: -3.11%, +0.02%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36936>
---
 src/amd/vulkan/radv_pipeline.c | 11 +++++++--
 src/amd/vulkan/radv_shader.c   | 44 +++++++++++++++++++++-------------
 src/amd/vulkan/radv_shader.h   |  2 ++
 3 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 884eff95f8d..204848f6e1d 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -433,6 +433,15 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
    if (constant_fold_for_push_const && stage->args.ac.inline_push_const_mask)
       NIR_PASS(_, stage->nir, nir_opt_constant_folding);
 
+   /* Optimize NIR before NGG culling */
+   bool is_last_vgt_stage = radv_is_last_vgt_stage(stage);
+   bool lowered_ngg = stage->info.is_ngg && is_last_vgt_stage;
+   if (lowered_ngg && stage->nir->info.stage != MESA_SHADER_GEOMETRY && stage->info.has_ngg_culling)
+      radv_optimize_nir_algebraic_early(stage->nir);
+
+   /* This has to be done after nir_opt_algebraic for best descriptor vectorization, but also before
+    * NGG culling.
+    */
    NIR_PASS(_, stage->nir, radv_nir_apply_pipeline_layout, device, stage);
 
    NIR_PASS(_, stage->nir, nir_lower_alu_width, opt_vectorize_callback, device);
@@ -466,9 +475,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
    }
 
    /* Lower I/O intrinsics to memory instructions. */
-   bool is_last_vgt_stage = radv_is_last_vgt_stage(stage);
    bool io_to_mem = radv_nir_lower_io_to_mem(device, stage);
-   bool lowered_ngg = stage->info.is_ngg && is_last_vgt_stage;
    if (lowered_ngg) {
       radv_lower_ngg(device, stage, gfx_state);
    } else if (is_last_vgt_stage) {
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index a702997fb26..4c7373c0266 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -234,7 +234,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively)
 }
 
 void
-radv_optimize_nir_algebraic(nir_shader *nir, bool opt_offsets, bool opt_mqsad, enum amd_gfx_level gfx_level)
+radv_optimize_nir_algebraic_early(nir_shader *nir)
 {
    bool more_algebraic = true;
    while (more_algebraic) {
@@ -258,21 +258,11 @@ radv_optimize_nir_algebraic(nir_shader *nir, bool opt_offsets, bool opt_mqsad, e
       NIR_PASS(_, nir, nir_opt_remove_phis);
       NIR_PASS(_, nir, nir_opt_dead_cf);
    }
+}
 
-   if (opt_offsets) {
-      const nir_opt_offsets_options offset_options = {
-         .uniform_max = 0,
-         .buffer_max = ~0,
-         .shared_max = UINT16_MAX,
-         .shared_atomic_max = UINT16_MAX,
-         .allow_offset_wrap_cb = ac_nir_allow_offset_wrap_cb,
-         .cb_data = &gfx_level,
-      };
-      NIR_PASS(_, nir, nir_opt_offsets, &offset_options);
-   }
-   if (opt_mqsad)
-      NIR_PASS(_, nir, nir_opt_mqsad);
-
+void
+radv_optimize_nir_algebraic_late(nir_shader *nir)
+{
    /* Do late algebraic optimization to turn add(a,
     * neg(b)) back into subs, then the mandatory cleanup
     * after algebraic.  Note that it may produce fnegs,
@@ -292,6 +282,28 @@ radv_optimize_nir_algebraic(nir_shader *nir, bool opt_offsets, bool opt_mqsad, e
    _mesa_set_destroy(skip, NULL);
 }
 
+void
+radv_optimize_nir_algebraic(nir_shader *nir, bool opt_offsets, bool opt_mqsad, enum amd_gfx_level gfx_level)
+{
+   radv_optimize_nir_algebraic_early(nir);
+
+   if (opt_offsets) {
+      const nir_opt_offsets_options offset_options = {
+         .uniform_max = 0,
+         .buffer_max = ~0,
+         .shared_max = UINT16_MAX,
+         .shared_atomic_max = UINT16_MAX,
+         .allow_offset_wrap_cb = ac_nir_allow_offset_wrap_cb,
+         .cb_data = &gfx_level,
+      };
+      NIR_PASS(_, nir, nir_opt_offsets, &offset_options);
+   }
+   if (opt_mqsad)
+      NIR_PASS(_, nir, nir_opt_mqsad);
+
+   radv_optimize_nir_algebraic_late(nir);
+}
+
 static void
 shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
 {
@@ -913,7 +925,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
       assert(info->is_ngg);
 
       if (info->has_ngg_culling)
-         radv_optimize_nir_algebraic(nir, false, false, pdev->info.gfx_level);
+         radv_optimize_nir_algebraic_late(nir);
 
       options.num_vertices_per_primitive = num_vertices_per_prim;
       options.early_prim_export = info->has_ngg_early_prim_export;
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index dacc6d9cc4d..c25a3101f04 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -487,6 +487,8 @@ struct radv_shader_dma_submission {
 struct radv_shader_stage;
 
 void radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively);
+void radv_optimize_nir_algebraic_early(nir_shader *shader);
+void radv_optimize_nir_algebraic_late(nir_shader *shader);
 void radv_optimize_nir_algebraic(nir_shader *shader, bool opt_offsets, bool opt_mqsad,
                                  enum amd_gfx_level gfx_level);