ac/nir/ngg: Add and use a has_attr_ring_wait_bug field to ac_gpu_info.

And apply the attribute ring wait workaround based on the new field. Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33218>
2026-05-06 05:08:08 +02:00 · 2025-01-27 12:09:37 +01:00 · 2025-01-27 12:09:37 +01:00 · b163ce51b1
commit b163ce51b1
parent e76361d626
4 changed files with 16 additions and 7 deletions
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@ -1302,6 +1302,17 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,

   info->has_export_conflict_bug = info->gfx_level == GFX11;

+   /* The hw starts culling after all exports are finished,
+    * not when all waves in an NGG workgroup are finished,
+    * and if all primitives are culled, the hw deallocates the attribute ring
+    * for the NGG workgroup and reuses it for next one while the previous NGG
+    * workgroup might still be issuing attribute stores.
+    * When there are 2 NGG workgroups in the system with the same attribute ring address,
+    * attributes may be corrupted.
+    * The workaround is to issue and wait for attribute stores before the last export.
+    */
+   info->has_attr_ring_wait_bug = info->gfx_level == GFX11 || info->gfx_level == GFX11_5;
+
   /* When LLVM is fixed to handle multiparts shaders, this value will depend
    * on the known good versions of LLVM. Until then, enable the equivalent WA
    * in the nir -> llvm backend.
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@ -116,6 +116,7 @@ struct radeon_info {
   bool never_send_perfcounter_stop;
   bool discardable_allows_big_page;
   bool has_export_conflict_bug;
+   bool has_attr_ring_wait_bug;
   bool has_vrs_ds_export_bug;
   bool has_taskmesh_indirect0_bug;
   bool sdma_supports_sparse;      /* Whether SDMA can safely access sparse resources. */
--- a/src/amd/common/nir/ac_nir_lower_ngg.c
+++ b/src/amd/common/nir/ac_nir_lower_ngg.c
@ -6,6 +6,7 @@

 #include "ac_nir.h"
 #include "ac_nir_helpers.h"
+#include "ac_gpu_info.h"
 #include "amdgfxregs.h"
 #include "nir_builder.h"
 #include "nir_xfb_info.h"
@ -1655,11 +1656,6 @@ ngg_nogs_gather_outputs(nir_builder *b, struct exec_list *cf_list, lower_ngg_nog
   }
 }

-static bool must_wait_attr_ring(enum amd_gfx_level gfx_level, bool has_param_exports)
-{
-   return (gfx_level == GFX11 || gfx_level == GFX11_5) && has_param_exports;
-}
-
 static void
 export_pos0_wait_attr_ring(nir_builder *b, nir_if *if_es_thread, nir_def *outputs[VARYING_SLOT_MAX][4], const ac_nir_lower_ngg_options *options)
 {
@ -1707,7 +1703,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
      options->can_cull ? nir_local_variable_create(impl, glsl_bool_type(), "gs_accepted") : NULL;
   nir_variable *gs_exported_var = nir_local_variable_create(impl, glsl_bool_type(), "gs_exported");

-   const bool wait_attr_ring = must_wait_attr_ring(options->gfx_level, options->has_param_exports);
+   const bool wait_attr_ring = options->has_param_exports && options->hw_info->has_attr_ring_wait_bug;
   bool streamout_enabled = shader->xfb_info && !options->disable_streamout;
   bool has_user_edgeflags =
      options->use_edgeflags && (shader->info.outputs_written & VARYING_BIT_EDGE);
@ -2408,7 +2404,7 @@ ngg_gs_export_vertices(nir_builder *b, nir_def *max_num_out_vtx, nir_def *tid_in
   if (s->options->kill_layer)
      export_outputs &= ~VARYING_BIT_LAYER;

-   const bool wait_attr_ring = must_wait_attr_ring(s->options->gfx_level, s->options->has_param_exports);
+   const bool wait_attr_ring = s->options->has_param_exports && s->options->hw_info->has_attr_ring_wait_bug;
   if (wait_attr_ring)
      export_outputs &= ~VARYING_BIT_POS;

--- a/src/amd/vulkan/winsys/null/radv_null_winsys.c
+++ b/src/amd/vulkan/winsys/null/radv_null_winsys.c
@ -117,6 +117,7 @@ radv_null_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *gpu_i

   gpu_info->has_3d_cube_border_color_mipmap = true;
   gpu_info->has_image_opcodes = true;
+   gpu_info->has_attr_ring_wait_bug = gpu_info->gfx_level == GFX11 || gpu_info->gfx_level == GFX11_5;

   if (gpu_info->family == CHIP_NAVI31 || gpu_info->family == CHIP_NAVI32)
      gpu_info->num_physical_wave64_vgprs_per_simd = 768;