nir: Add an optimization pass to reduce barrier modes

Many shaders issue full memory barriers, which may need to synchronize access to images, SSBOs, shared local memory, or global memory. However, many of them only use a subset of those memory types - say, only SSBOs. Shaders may also have patterns such as: 1. shared local memory access 2. barrier with full variable modes 3. more shared local memory access 4. image access In this case, the barrier is needed to ensure synchronization between the various shared memory operations. Image reads and writes do also exist, but they are all on one side of the barrier, so it is a no-op for image access. We can drop the image mode from the barrier here too. Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24842>
2025-12-27 12:40:09 +01:00 · 2023-08-21 18:53:20 -07:00 · 2023-08-21 18:53:20 -07:00 · 7dd897e1cd
commit 7dd897e1cd
parent 1c3706fc28
2 changed files with 136 additions and 0 deletions
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@ -5962,6 +5962,7 @@ typedef bool (*nir_combine_barrier_cb)(
 bool nir_opt_combine_barriers(nir_shader *shader,
                              nir_combine_barrier_cb combine_cb,
                              void *data);
+bool nir_opt_barrier_modes(nir_shader *shader);

 bool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes);

--- a/src/compiler/nir/nir_opt_barriers.c
+++ b/src/compiler/nir/nir_opt_barriers.c
@ -22,6 +22,8 @@
 */

 #include "nir.h"
+#include "nir_worklist.h"
+#include "util/u_vector.h"

 static bool
 combine_all_barriers(nir_intrinsic_instr *a, nir_intrinsic_instr *b, void *_)
@ -99,3 +101,136 @@ nir_opt_combine_barriers(nir_shader *shader,

   return progress;
 }
+
+static bool
+barrier_happens_before(const nir_instr *a, const nir_instr *b)
+{
+   if (a->block == b->block)
+      return a->index < b->index;
+
+   return nir_block_dominates(a->block, b->block);
+}
+
+static bool
+nir_opt_barrier_modes_impl(nir_function_impl *impl)
+{
+   bool progress = false;
+
+   nir_instr_worklist *barriers = nir_instr_worklist_create();
+   if (!barriers)
+      return false;
+
+   struct u_vector mem_derefs;
+   if (!u_vector_init(&mem_derefs, 32, sizeof(struct nir_instr *))) {
+      nir_instr_worklist_destroy(barriers);
+      return false;
+   }
+
+   const unsigned all_memory_modes = nir_var_image |
+                                     nir_var_mem_ssbo |
+                                     nir_var_mem_shared |
+                                     nir_var_mem_global;
+
+   nir_foreach_block_safe(block, impl) {
+      nir_foreach_instr_safe(instr, block) {
+         if (instr->type == nir_instr_type_intrinsic) {
+            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+            if (intrin->intrinsic == nir_intrinsic_barrier)
+               nir_instr_worklist_push_tail(barriers, instr);
+
+         } else if (instr->type == nir_instr_type_deref) {
+            nir_deref_instr *deref = nir_instr_as_deref(instr);
+
+            if (nir_deref_mode_may_be(deref, all_memory_modes) ||
+                glsl_contains_atomic(deref->type)) {
+               nir_deref_instr **tail = u_vector_add(&mem_derefs);
+               *tail = deref;
+            }
+         }
+      }
+   }
+
+   nir_foreach_instr_in_worklist(instr, barriers) {
+      nir_intrinsic_instr *barrier = nir_instr_as_intrinsic(instr);
+
+      const unsigned barrier_modes = nir_intrinsic_memory_modes(barrier);
+      unsigned new_modes = barrier_modes & ~all_memory_modes;
+
+      /* If a barrier dominates all memory accesses for a particular mode (or
+       * there are none), then the barrier cannot affect those accesses.  We
+       * can drop that mode from the barrier.
+       *
+       * For each barrier, we look at the list of memory derefs, and see if
+       * the barrier fails to dominate the deref.  If so, then there's at
+       * least one memory access that may happen before the barrier, so we
+       * need to keep the mode.  Any modes not kept are discarded.
+       */
+      nir_deref_instr **p_deref;
+      u_vector_foreach(p_deref, &mem_derefs) {
+         nir_deref_instr *deref = *p_deref;
+         const unsigned atomic_mode =
+            glsl_contains_atomic(deref->type) ? nir_var_mem_ssbo : 0;
+         const unsigned deref_modes =
+            (deref->modes | atomic_mode) & barrier_modes;
+
+         if (deref_modes &&
+             !barrier_happens_before(&barrier->instr, &deref->instr))
+            new_modes |= deref_modes;
+      }
+
+      /* If we don't need all the modes, update the barrier. */
+      if (barrier_modes != new_modes) {
+         nir_intrinsic_set_memory_modes(barrier, new_modes);
+         progress = true;
+      }
+   }
+
+   nir_instr_worklist_destroy(barriers);
+   u_vector_finish(&mem_derefs);
+
+   return progress;
+}
+
+/**
+ * Reduce barriers to remove unnecessary modes.
+ *
+ * This pass must be called before nir_lower_explicit_io lowers derefs!
+ *
+ * Many shaders issue full memory barriers, which may need to synchronize
+ * access to images, SSBOs, shared local memory, or global memory.  However,
+ * many of them only use a subset of those memory types - say, only SSBOs.
+ *
+ * Shaders may also have patterns such as:
+ *
+ *    1. shared local memory access
+ *    2. barrier with full variable modes
+ *    3. more shared local memory access
+ *    4. image access
+ *
+ * In this case, the barrier is needed to ensure synchronization between the
+ * various shared memory operations.  Image reads and writes do also exist,
+ * but they are all on one side of the barrier, so it is a no-op for image
+ * access.  We can drop the image mode from the barrier in this case too.
+ */
+bool
+nir_opt_barrier_modes(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_function_impl(impl, shader) {
+      nir_metadata_require(impl, nir_metadata_dominance |
+                                 nir_metadata_instr_index);
+
+      if (nir_opt_barrier_modes_impl(impl)) {
+         nir_metadata_preserve(impl, nir_metadata_block_index |
+                                     nir_metadata_dominance |
+                                     nir_metadata_live_defs);
+         progress = true;
+      } else {
+         nir_metadata_preserve(impl, nir_metadata_all);
+      }
+   }
+
+   return progress;
+}