From 4f628c9e8c952724cbe78a008a286e673e3b1d43 Mon Sep 17 00:00:00 2001
From: Caio Oliveira <caio.oliveira@intel.com>
Date: Wed, 22 Oct 2025 08:57:01 -0700
Subject: [PATCH] brw: Consolidate late lowering of int64 operations

Instead of doing selectively and with different supporting passes, just
run the complete set (special algebraic before and cleanup optimizations
after) at the end of brw_postprocess_nir_opts().

No changes to fossil-db on ICL, TGL, ACM and BMG.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35844>
---
 src/intel/compiler/brw/brw_nir.c | 53 ++++++++++++++++----------------
 1 file changed, 26 insertions(+), 27 deletions(-)

diff --git a/src/intel/compiler/brw/brw_nir.c b/src/intel/compiler/brw/brw_nir.c
index b327dc3ded4..d8c6537eaf1 100644
--- a/src/intel/compiler/brw/brw_nir.c
+++ b/src/intel/compiler/brw/brw_nir.c
@@ -2164,6 +2164,21 @@ flag_fused_eu_disable_instr(nir_builder *b, nir_instr *instr, void *data)
    }
 }
 
+static void
+brw_nir_lower_int64(nir_shader *nir, const struct intel_device_info *devinfo)
+{
+   UNUSED bool progress; /* Written by OPT */
+
+   /* Potentially perform this optimization pass twice because it can create
+    * additional opportunities for itself.
+    */
+   if (OPT(nir_opt_algebraic_before_lower_int64))
+      OPT(nir_opt_algebraic_before_lower_int64);
+
+   if (OPT(nir_lower_int64))
+      brw_nir_optimize(nir, devinfo);
+}
+
 /* Prepare the given shader for codegen
  *
  * This function is intended to be called right before going into the actual
@@ -2264,14 +2279,7 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
     */
    OPT(intel_nir_lower_printf);
 
-   /* Potentially perform this optimization pass twice because it can create
-    * additional opportunities for itself.
-    */
-   if (OPT(nir_opt_algebraic_before_lower_int64))
-      OPT(nir_opt_algebraic_before_lower_int64);
-
-   if (OPT(nir_lower_int64))
-      brw_nir_optimize(nir, devinfo);
+   brw_nir_lower_int64(nir, devinfo);
 
    /* This pass specifically looks for sequences of fmul and fadd that
     * intel_nir_opt_peephole_ffma will try to eliminate. Call this
@@ -2331,11 +2339,7 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
    } while (progress);
 
 
-   if (OPT(nir_lower_fp16_casts, nir_lower_fp16_split_fp64)) {
-      if (OPT(nir_lower_int64)) {
-         brw_nir_optimize(nir, devinfo);
-      }
-   }
+   OPT(nir_lower_fp16_casts, nir_lower_fp16_split_fp64);
 
    OPT(nir_lower_alu_to_scalar, NULL, NULL);
 
@@ -2365,26 +2369,14 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
       .lower_subgroup_masks = true,
    };
 
-   if (OPT(nir_opt_uniform_atomics, false)) {
+   if (OPT(nir_opt_uniform_atomics, false))
       OPT(nir_lower_subgroups, &subgroups_options);
 
-      OPT(nir_opt_algebraic_before_lower_int64);
-
-      if (OPT(nir_lower_int64))
-         brw_nir_optimize(nir, devinfo);
-   }
-
    /* nir_opt_uniform_subgroup can create some operations (e.g.,
     * load_subgroup_lt_mask) that need to be lowered again.
     */
    if (OPT(nir_opt_uniform_subgroup, &subgroups_options)) {
-      /* Some of the optimizations can generate 64-bit integer multiplication
-       * that must be lowered.
-       */
-      OPT(nir_lower_int64);
-
-      /* Even if nir_lower_int64 did not make progress, re-run the main
-       * optimization loop. nir_opt_uniform_subgroup may have made some things
+      /* nir_opt_uniform_subgroup may have made some things
        * that previously appeared divergent be marked as convergent. This
        * allows the elimination of some loops over, say, a TXF instruction
        * with a non-uniform texture handle.
@@ -2394,6 +2386,13 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
       OPT(nir_lower_subgroups, &subgroups_options);
    }
 
+   /* A few passes that run after the initial int64 lowering may produce
+    * new int64 operations.  E.g. uniform subgroup may generate a 64-bit mul
+    * and peephole_select may generate a 64-bit select.  So do another
+    * round at the tail end.
+    */
+   brw_nir_lower_int64(nir, devinfo);
+
    /* Deal with EU fusion */
    if (devinfo->ver == 12) {
       nir_divergence_options options =