ac: move has_cs_regalloc_hang_bug to ac_compiler_info

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41022>
2026-05-06 18:08:40 +02:00 · 2026-04-24 13:31:20 +01:00 · 2026-04-24 13:31:20 +01:00 · 5ee0935861
commit 5ee0935861
parent e40457b136
7 changed files with 17 additions and 17 deletions
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@ -401,6 +401,16 @@ ac_fill_compiler_info(struct radeon_info *info, const struct drm_amdgpu_info_dev
   out->has_attr_ring_wait_bug = info->gfx_level >= GFX11 && info->gfx_level < GFX12;

   out->has_primid_instancing_bug = info->gfx_level == GFX6 && info->max_se == 1;
+
+   /* HW bug workaround when CS threadgroups > 256 threads and async compute
+    * isn't used, i.e. only one compute job can run at a time.  If async
+    * compute is possible, the threadgroup size must be limited to 256 threads
+    * on all queues to avoid the bug.
+    * Only GFX6 and certain GFX7 chips are affected.
+    */
+   out->has_cs_regalloc_hang_bug = info->gfx_level == GFX6 ||
+                                   info->family == CHIP_BONAIRE ||
+                                   info->family == CHIP_KABINI;
 }

 void
@ -948,16 +958,6 @@ void ac_fill_bug_info(struct radeon_info *info)
    */
   info->has_vrs_export_bug = info->gfx_level == GFX12;

-   /* HW bug workaround when CS threadgroups > 256 threads and async compute
-    * isn't used, i.e. only one compute job can run at a time.  If async
-    * compute is possible, the threadgroup size must be limited to 256 threads
-    * on all queues to avoid the bug.
-    * Only GFX6 and certain GFX7 chips are affected.
-    */
-   info->has_cs_regalloc_hang_bug = info->gfx_level == GFX6 ||
-                                    info->family == CHIP_BONAIRE ||
-                                    info->family == CHIP_KABINI;
-
   /* HW bug workaround with async compute dispatches when threadgroup > 4096.
    * The workaround is to change the "threadgroup" dimension mode to "thread"
    * dimension mode.
@ -2078,6 +2078,7 @@ void ac_print_gpu_info(FILE *f, const struct radeon_info *info, int fd)
   fprintf(f, "    has_ngg_fully_culled_bug = %i\n", info->compiler_info.has_ngg_fully_culled_bug);
   fprintf(f, "    has_attr_ring_wait_bug = %i\n", info->compiler_info.has_attr_ring_wait_bug);
   fprintf(f, "    has_primid_instancing_bug = %i\n", info->compiler_info.has_primid_instancing_bug);
+   fprintf(f, "    has_cs_regalloc_hang_bug = %i\n", info->compiler_info.has_cs_regalloc_hang_bug);

   fprintf(f, "Ring info:\n");
   if (info->gfx_level >= GFX11) {
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@ -198,8 +198,10 @@ struct ac_compiler_info {
   uint32_t has_attr_ring_wait_bug : 1;
   /* GFX6: limit TCS workgroup to one patch if primitive ID is used. */
   uint32_t has_primid_instancing_bug : 1;
+   /* GFX6 and certain GFX7 chips: bug with compute workgroups larger 256 invocations. */
+   uint32_t has_cs_regalloc_hang_bug : 1;

-   uint32_t reserved : 5;
+   uint32_t reserved : 4;
 };

 struct radeon_info {
@ -269,7 +271,6 @@ struct radeon_info {
   bool has_two_planes_iterate256_bug;
   bool has_vgt_flush_ngg_legacy_bug;
   bool has_prim_restart_sync_bug;
-   bool has_cs_regalloc_hang_bug;
   bool has_async_compute_threadgroup_bug;
   bool has_async_compute_align32_bug;
   bool has_32bit_predication;
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@ -1146,7 +1146,6 @@ radv_device_init_compiler_info(struct radv_device *device)
            .family = pdev->info.family,
            .address32_hi = pdev->info.address32_hi,
            .rbplus_allowed = pdev->info.rbplus_allowed,
-            .has_cs_regalloc_hang_bug = pdev->info.has_cs_regalloc_hang_bug,
         },
      /* Debug/tracing */
      .debug =
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@ -860,7 +860,7 @@ radv_shader_spirv_to_nir(const struct radv_compiler_info *compiler_info, struct
   if (progress)
      nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));

-   if (compiler_info->hw.has_cs_regalloc_hang_bug && mesa_shader_stage_is_compute(nir->info.stage)) {
+   if (compiler_info->ac->has_cs_regalloc_hang_bug && mesa_shader_stage_is_compute(nir->info.stage)) {
      const uint32_t wg_size = nir->info.workgroup_size[0] *
                               nir->info.workgroup_size[1] *
                               nir->info.workgroup_size[2];
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@ -518,7 +518,6 @@ struct radv_compiler_info {
      uint32_t family;
      uint32_t address32_hi;
      bool rbplus_allowed;
-      bool has_cs_regalloc_hang_bug;
   } hw;

   /* Debug/tracing */
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@ -413,7 +413,7 @@ void si_init_compute_caps(struct si_screen *sscreen)
      caps->subgroup_sizes = sscreen->info.gfx_level < GFX10 ? 64 : 64 | 32;

   caps->max_variable_threads_per_block =
-      sscreen->info.has_cs_regalloc_hang_bug ? 256 : SI_MAX_VARIABLE_THREADS_PER_BLOCK;
+      sscreen->info.compiler_info.has_cs_regalloc_hang_bug ? 256 : SI_MAX_VARIABLE_THREADS_PER_BLOCK;
 }

 static void si_init_mesh_caps(struct si_screen *sscreen)
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@ -695,7 +695,7 @@ static void si_preprocess_nir(struct si_nir_shader_ctx *ctx)
   }

   if (mesa_shader_stage_is_compute(nir->info.stage)) {
-      if (sel->screen->info.has_cs_regalloc_hang_bug) {
+      if (sel->screen->info.compiler_info.has_cs_regalloc_hang_bug) {
         const uint32_t wg_size = nir->info.workgroup_size[0] *
                                  nir->info.workgroup_size[1] *
                                  nir->info.workgroup_size[2];