radeonsi/gfx12: fix compute register settings for global_atomic_ordered_add

This is for future documentation/reference. It's likely radeonsi won't use the atomic in compute shaders. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30063>
2026-03-07 01:10:39 +01:00 · 2024-07-03 16:09:08 -04:00 · 2024-07-03 16:09:08 -04:00 · 641ec0ae6e
commit 641ec0ae6e
parent acb3d5f132
3 changed files with 13 additions and 2 deletions
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@ -936,7 +936,8 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_
                                  * allow launching waves out-of-order. (same as Vulkan)
                                  * Not available in gfx940.
                                  */
-                                 S_00B800_ORDER_MODE(sctx->gfx_level >= GFX7 &&
+                                 S_00B800_ORDER_MODE(!sctx->cs_shader_state.program->sel.info.uses_atomic_ordered_add &&
+                                                     sctx->gfx_level >= GFX7 &&
                                                     (sctx->family < CHIP_GFX940 || sctx->screen->info.has_graphics)) |
                                 S_00B800_CS_W32_EN(sctx->cs_shader_state.program->shader.wave_size == 32);

@ -972,7 +973,8 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_
      /* Set PING_PONG_EN for every other dispatch.
       * Only allowed on a gfx queue, and PARTIAL_TG_EN and USE_THREAD_DIMENSIONS must be 0.
       */
-      if (sctx->has_graphics && !partial_block_en) {
+      if (sctx->has_graphics && !partial_block_en &&
+          !sctx->cs_shader_state.program->sel.info.uses_atomic_ordered_add) {
         dispatch_initiator |= S_00B800_PING_PONG_EN(sctx->compute_ping_pong_launch);
         sctx->compute_ping_pong_launch ^= 1;
      }
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@ -497,6 +497,7 @@ struct si_shader_info {
   bool uses_variable_block_size;
   bool uses_grid_size;
   bool uses_tg_size;
+   bool uses_atomic_ordered_add;
   bool writes_position;
   bool writes_psize;
   bool writes_clipvertex;
--- a/src/gallium/drivers/radeonsi/si_shader_info.c
+++ b/src/gallium/drivers/radeonsi/si_shader_info.c
@ -501,6 +501,11 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info
          !nir_src_is_const(intr->src[0]))
         info->uses_indirect_descriptor = true;

+      if (nir_intrinsic_has_atomic_op(intr)) {
+         if (nir_intrinsic_atomic_op(intr) == nir_atomic_op_ordered_add_gfx12_amd)
+            info->uses_atomic_ordered_add = true;
+      }
+
      switch (intr->intrinsic) {
      case nir_intrinsic_store_ssbo:
         if (!nir_src_is_const(intr->src[1]))
@ -609,6 +614,9 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info
      case nir_intrinsic_interp_deref_at_offset:
         unreachable("these opcodes should have been lowered");
         break;
+      case nir_intrinsic_ordered_add_loop_gfx12_amd:
+         info->uses_atomic_ordered_add = true;
+         break;
      default:
         break;
      }