nir: add options to lower only some image atomics to global

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33616>
2026-05-07 15:48:36 +02:00 · 2024-12-15 20:47:39 +02:00 · 2024-12-15 20:47:39 +02:00 · e22ab01dc7
commit e22ab01dc7
parent 656422df8a
5 changed files with 23 additions and 7 deletions
--- a/src/asahi/compiler/agx_nir_lower_texture.c
+++ b/src/asahi/compiler/agx_nir_lower_texture.c
@ -781,7 +781,7 @@ agx_nir_lower_texture(nir_shader *s)
   NIR_PASS(progress, s, nir_shader_intrinsics_pass, fence_image,
            nir_metadata_control_flow, NULL);

-   NIR_PASS(progress, s, nir_lower_image_atomics_to_global);
+   NIR_PASS(progress, s, nir_lower_image_atomics_to_global, NULL, NULL);

   NIR_PASS(progress, s, nir_shader_intrinsics_pass, legalize_image_lod,
            nir_metadata_control_flow, NULL);
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@ -5606,7 +5606,9 @@ bool nir_lower_image(nir_shader *nir,
                     const nir_lower_image_options *options);

 bool
-nir_lower_image_atomics_to_global(nir_shader *s);
+nir_lower_image_atomics_to_global(nir_shader *s,
+                                  nir_intrin_filter_cb filter,
+                                  const void *data);

 bool nir_lower_readonly_images_to_tex(nir_shader *shader, bool per_variable);

--- a/src/compiler/nir/nir_lower_image_atomics_to_global.c
+++ b/src/compiler/nir/nir_lower_image_atomics_to_global.c
@ -6,6 +6,11 @@
 #include "util/format/u_format.h"
 #include "nir_builder.h"

+struct lower_state {
+   nir_intrin_filter_cb filter;
+   const void *data;
+};
+
 /*
 * If shader images are uncompressed, dedicated image atomics are unnecessary.
 * Instead, there may be a "load texel address" instruction that does all the
@ -15,8 +20,9 @@
 */

 static bool
-lower(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *_)
+lower(nir_builder *b, nir_intrinsic_instr *intr, void *data)
 {
+   const struct lower_state *state = data;
   nir_intrinsic_op address_op;
   bool swap;

@ -41,6 +47,9 @@ lower(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *_)
   enum pipe_format format = nir_intrinsic_format(intr);
   unsigned bit_size = intr->def.bit_size;

+   if (state->filter && !state->filter(intr, state->data))
+      return false;
+
   /* Even for "formatless" access, we know the size of the texel accessed,
    * since it's the size of the atomic. We can use that to synthesize a
    * compatible format, which is good enough for texel address computations.
@ -95,9 +104,14 @@ lower(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *_)
 }

 bool
-nir_lower_image_atomics_to_global(nir_shader *shader)
+nir_lower_image_atomics_to_global(nir_shader *shader,
+                                  nir_intrin_filter_cb filter,
+                                  const void *data)
 {
+   struct lower_state state = {
+      .filter = filter, .data = data,
+   };
   return nir_shader_intrinsics_pass(shader, lower,
                                     nir_metadata_control_flow,
-                                     NULL);
+                                     (void *) &state);
 }
--- a/src/panfrost/compiler/bifrost_compile.c
+++ b/src/panfrost/compiler/bifrost_compile.c
@ -5476,7 +5476,7 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
               .lower_index_to_offset = true,
            });

-   NIR_PASS(_, nir, nir_lower_image_atomics_to_global);
+   NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL);

   /* on bifrost, lower MSAA load/stores to 3D load/stores */
   if (pan_arch(gpu_id) < 9)
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@ -462,7 +462,7 @@ midgard_preprocess_nir(nir_shader *nir, unsigned gpu_id)
   };

   NIR_PASS(_, nir, nir_lower_tex, &lower_tex_options);
-   NIR_PASS(_, nir, nir_lower_image_atomics_to_global);
+   NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL);

   /* TEX_GRAD fails to apply sampler descriptor settings on some
    * implementations, requiring a lowering.