brw: Do cmod prop again after scheduling

After selecting the scheduling mode, do cmod prop again. It's possible that doing cmod prop between performing a schedule and trying to register allocate would cause a different scheduling mode to be selected. However, this would require fully restoring the pre-schedule set of instructions (via cloning). I have tried to implement this, and it's harder than it looks. :( v2: Delete unused variable `progress`. Noticed by Marge. shader-db: All Intel platforms had similar results. (Meteor Lake shown) total instructions in shared programs: 19967018 -> 19967006 (<.01%) instructions in affected programs: 10652 -> 10640 (-0.11%) helped: 4 / HURT: 0 total cycles in shared programs: 884129990 -> 884139590 (<.01%) cycles in affected programs: 20334512 -> 20344112 (0.05%) helped: 0 / HURT: 4 fossil-db: Lunar Lake Totals: Instrs: 924967191 -> 924963460 (-0.00%); split: -0.00%, +0.00% Cycle count: 105962414958 -> 105961925594 (-0.00%); split: -0.00%, +0.00% Spill count: 3423582 -> 3423564 (-0.00%); split: -0.00%, +0.00% Fill count: 4877121 -> 4876955 (-0.00%); split: -0.00%, +0.00% Totals from 2511 (0.12% of 2018786) affected shaders: Instrs: 12541707 -> 12537976 (-0.03%); split: -0.03%, +0.00% Cycle count: 4816359238 -> 4815869874 (-0.01%); split: -0.01%, +0.00% Spill count: 179536 -> 179518 (-0.01%); split: -0.03%, +0.02% Fill count: 279407 -> 279241 (-0.06%); split: -0.07%, +0.01% Meteor Lake, DG2, Tiger Lake, Ice Lake, and Skylake had similar results. (Meteor Lake shown) Totals: Instrs: 980252404 -> 980237686 (-0.00%); split: -0.00%, +0.00% Cycle count: 91758669556 -> 91764028404 (+0.01%); split: -0.00%, +0.01% Spill count: 3664771 -> 3664744 (-0.00%); split: -0.00%, +0.00% Fill count: 4962078 -> 4960482 (-0.03%); split: -0.04%, +0.01% Totals from 8472 (0.38% of 2251522) affected shaders: Instrs: 34977623 -> 34962905 (-0.04%); split: -0.04%, +0.00% Cycle count: 6251857553 -> 6257216401 (+0.09%); split: -0.04%, +0.13% Spill count: 480251 -> 480224 (-0.01%); split: -0.01%, +0.00% Fill count: 676539 -> 674943 (-0.24%); split: -0.28%, +0.05% Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38315>
2025-12-20 03:00:11 +01:00 · 2025-02-18 18:14:21 -08:00 · 2025-02-18 18:14:21 -08:00 · b967942b64
commit b967942b64
parent 09450faf6a
1 changed files with 18 additions and 12 deletions
--- a/src/intel/compiler/brw/brw_shader.cpp
+++ b/src/intel/compiler/brw/brw_shader.cpp
@ -1251,6 +1251,19 @@ brw_allocate_registers(brw_shader &s, bool allow_spilling)

   ralloc_free(scheduler_ctx);

+#define OPT(pass, ...) ({                                               \
+      pass_num++;                                                       \
+      bool this_progress = pass(s, ##__VA_ARGS__);                      \
+                                                                        \
+      if (this_progress)                                                \
+         s.debug_optimizer(nir, #pass, iteration, pass_num);            \
+                                                                        \
+      this_progress;                                                    \
+   })
+
+   int pass_num = 0;
+   int iteration = 95;
+
   if (!allocated) {
      if (0) {
         fprintf(stderr, "Spilling - using lowest-pressure mode \"%s\"\n",
@ -1259,6 +1272,9 @@ brw_allocate_registers(brw_shader &s, bool allow_spilling)
      restore_instruction_order(s, orders[best_press_idx]);
      s.shader_stats.scheduler_mode = scheduler_mode_name[pre_modes[best_press_idx]];

+      if (OPT(brw_opt_cmod_propagation))
+         OPT(brw_opt_dead_code_eliminate);
+
      allocated = brw_assign_regs(s, allow_spilling, spill_all);
   }

@ -1280,24 +1296,14 @@ brw_allocate_registers(brw_shader &s, bool allow_spilling)
   if (s.failed)
      return;

-#define OPT(pass, ...) ({                                               \
-      pass_num++;                                                       \
-      bool this_progress = pass(s, ##__VA_ARGS__);                      \
-                                                                        \
-      if (this_progress)                                                \
-         s.debug_optimizer(nir, #pass, iteration, pass_num);            \
-                                                                        \
-      this_progress;                                                    \
-   })
-
 #define OPT_V(pass, ...) do {                                           \
      pass_num++;                                                       \
      pass(s, ##__VA_ARGS__);                                           \
      s.debug_optimizer(nir, #pass, iteration, pass_num);               \
   } while (false)

-   int pass_num = 0;
-   int iteration = 96;
+   pass_num = 0;
+   iteration++;

   s.debug_optimizer(nir, "post_ra_alloc", iteration, pass_num);