radeonsi: Use max_se instead of num_se where appropriate

Scratch allocation needs to happen using max_se, otherwise there can be hangs. Cc: mesa-stable Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29202> (cherry picked from commit 18c736bcfc)
2026-01-08 23:40:12 +01:00 · 2024-05-15 13:37:12 +02:00 · 2024-05-15 13:37:12 +02:00 · 2edbc666ba
commit 2edbc666ba
parent add46b79ea
3 changed files with 4 additions and 4 deletions
--- a/.pick_status.json
+++ b/.pick_status.json
@ -344,7 +344,7 @@
        "description": "radeonsi: Use max_se instead of num_se where appropriate",
        "nominated": true,
        "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
        "main_sha": null,
        "because_sha": null,
        "notes": null
--- a/src/amd/common/ac_shader_util.c
+++ b/src/amd/common/ac_shader_util.c
@ -1001,7 +1001,7 @@ void ac_get_scratch_tmpring_size(const struct radeon_info *info,

   unsigned max_scratch_waves = info->max_scratch_waves;
   if (info->gfx_level >= GFX11)
-      max_scratch_waves /= info->num_se; /* WAVES is per SE */
+      max_scratch_waves /= info->max_se; /* WAVES is per SE */

   /* TODO: We could decrease WAVES to make the whole buffer fit into the infinity cache. */
   *tmpring_size = S_0286E8_WAVES(max_scratch_waves) |
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@ -6289,7 +6289,7 @@ static void gfx10_init_gfx_preamble_state(struct si_context *sctx)

   for (unsigned i = 0; i < 4; ++i)
      si_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 + i * 4,
-                     i < sscreen->info.num_se ? compute_cu_en : 0x0);
+                     i < sscreen->info.max_se ? compute_cu_en : 0x0);

   si_pm4_set_reg(pm4, R_00B890_COMPUTE_USER_ACCUM_0, 0);
   si_pm4_set_reg(pm4, R_00B894_COMPUTE_USER_ACCUM_1, 0);
@ -6299,7 +6299,7 @@ static void gfx10_init_gfx_preamble_state(struct si_context *sctx)
   if (sctx->gfx_level >= GFX11) {
      for (unsigned i = 4; i < 8; ++i)
         si_pm4_set_reg(pm4, R_00B8AC_COMPUTE_STATIC_THREAD_MGMT_SE4 + (i - 4) * 4,
-                        i < sscreen->info.num_se ? compute_cu_en : 0x0);
+                        i < sscreen->info.max_se ? compute_cu_en : 0x0);

      /* How many threads should go to 1 SE before moving onto the next. Think of GL1 cache hits.
       * Only these values are valid: 0 (disabled), 64, 128, 256, 512