ac/gpu_info: add #define AMD_MEMCHANNEL_INTERLEAVE_BYTES

radeon_info::pipe_interleave_bytes is renamed to r600_pipe_interleave_bytes where it can be 512 on some chips. Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39120>
2026-05-08 06:58:05 +02:00 · 2026-01-05 16:07:47 -05:00 · 2026-01-05 16:07:47 -05:00 · 13cfd0176c
commit 13cfd0176c
parent 92133bb0ab
10 changed files with 32 additions and 32 deletions
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@ -946,11 +946,13 @@ ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
         info->gb_addr_config = 0;

      info->num_tile_pipes = 1 << G_0098F8_NUM_PIPES(info->gb_addr_config);
-      info->pipe_interleave_bytes = 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
+      assert((256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config)) ==
+             AMD_MEMCHANNEL_INTERLEAVE_BYTES);
   } else {
      unsigned pipe_config = G_009910_PIPE_CONFIG(amdinfo.gb_tile_mode[CIK_TILE_MODE_COLOR_2D]);
      info->num_tile_pipes = ac_pipe_config_to_num_pipes(pipe_config);
-      info->pipe_interleave_bytes = 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX6(info->gb_addr_config);
+      assert((256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX6(info->gb_addr_config)) ==
+             AMD_MEMCHANNEL_INTERLEAVE_BYTES);
   }
   info->r600_has_virtual_memory = true;

@ -1992,7 +1994,6 @@ void ac_print_gpu_info(FILE *f, const struct radeon_info *info, int fd)
   fprintf(f, "    pa_sc_tile_steering_override = 0x%x\n", info->pa_sc_tile_steering_override);
   fprintf(f, "    max_render_backends = %i\n", info->max_render_backends);
   fprintf(f, "    num_tile_pipes = %i\n", info->num_tile_pipes);
-   fprintf(f, "    pipe_interleave_bytes = %i\n", info->pipe_interleave_bytes);
   fprintf(f, "    enabled_rb_mask = 0x%" PRIx64 "\n", info->enabled_rb_mask);
   fprintf(f, "    max_alignment = %u\n", (unsigned)info->max_alignment);
   fprintf(f, "    pbb_max_alloc_count = %u\n", info->pbb_max_alloc_count);
@ -2390,7 +2391,7 @@ void ac_get_task_info(const struct radeon_info *info,
    *    64K        | 550               | 574               | +4.3%
    *    # Adding 256 mitigates the performance loss from increasing num_entries.
    */
-   const uint32_t payload_entry_size = 16384 + info->pipe_interleave_bytes;
+   const uint32_t payload_entry_size = 16384 + AMD_MEMCHANNEL_INTERLEAVE_BYTES;
   const uint16_t num_entries = get_task_num_entries(info->family);
   const uint32_t draw_ring_bytes = num_entries * AC_TASK_DRAW_ENTRY_BYTES;
   const uint32_t payload_ring_bytes = num_entries * payload_entry_size;
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@ -82,10 +82,10 @@ extern "C" {
 *   - Vertex 0 of each TCS output starts on an address aligned to 256 to make TCS output stores
 *     from each subgroup always store 256B-aligned blocks of 256*N bytes.
 *
- * Number 256 comes from GB_ADDR_CONFIG.PIPE_INTERLEAVE_SIZE and is stored in
- * radeon_info::pipe_interleave_bytes. It's always 256 on all GCN and RDNA chips. "Pipe" means
- * a memory channel in this context.
+ * Number 256 comes from GB_ADDR_CONFIG.PIPE_INTERLEAVE_SIZE. It's always 256 on all GCN and RDNA
+ * chips. "Pipe" means a memory channel in this context.
 */
+#define AMD_MEMCHANNEL_INTERLEAVE_BYTES 256 /* always equal to GB_ADDR_CONFIG.PIPE_INTERLEAVE_SIZE */

 struct amdgpu_gpu_info;
 struct drm_amdgpu_info_device;
@ -417,12 +417,12 @@ struct radeon_info {
   uint32_t r600_gb_backend_map; /* R600 harvest config */
   bool r600_gb_backend_map_valid;
   uint32_t r600_num_banks;
+   uint32_t r600_pipe_interleave_bytes;
   uint32_t mc_arb_ramcfg;
   uint32_t gb_addr_config;
   uint32_t pa_sc_tile_steering_override; /* CLEAR_STATE also sets this */
   uint32_t max_render_backends;  /* number of render backends incl. disabled ones */
   uint32_t num_tile_pipes; /* pipe count from PIPE_CONFIG */
-   uint32_t pipe_interleave_bytes;
   uint64_t enabled_rb_mask; /* bitmask of enabled physical RBs, up to max_render_backends bits */
   uint64_t max_alignment;   /* from addrlib */
   uint32_t pbb_max_alloc_count;
--- a/src/amd/common/ac_shader_util.c
+++ b/src/amd/common/ac_shader_util.c
@ -908,8 +908,7 @@ unsigned ac_compute_ngg_workgroup_size(unsigned es_verts, unsigned gs_inst_prims
   return CLAMP(workgroup_size, 1, 256);
 }

-static unsigned get_tcs_wg_output_mem_size(const struct radeon_info *info,
-                                           uint32_t num_tcs_output_cp, uint32_t num_mem_tcs_outputs,
+static unsigned get_tcs_wg_output_mem_size(uint32_t num_tcs_output_cp, uint32_t num_mem_tcs_outputs,
                                           uint32_t num_mem_tcs_patch_outputs, uint32_t num_patches)
 {
   /* Align each per-vertex and per-patch output to 16 vec4 elements = 256B. It's most optimal when
@ -920,8 +919,8 @@ static unsigned get_tcs_wg_output_mem_size(const struct radeon_info *info,
    * cover 5 channels (128B .. 1.125K) instead of 4, which could increase VMEM latency.
    */
   unsigned mem_one_pervertex_output = align(16 * num_tcs_output_cp * num_patches,
-                                             info->pipe_interleave_bytes);
-   unsigned mem_one_perpatch_output = align(16 * num_patches, info->pipe_interleave_bytes);
+                                             AMD_MEMCHANNEL_INTERLEAVE_BYTES);
+   unsigned mem_one_perpatch_output = align(16 * num_patches, AMD_MEMCHANNEL_INTERLEAVE_BYTES);

   return mem_one_pervertex_output * num_mem_tcs_outputs +
          mem_one_perpatch_output * num_mem_tcs_patch_outputs;
@ -957,20 +956,20 @@ uint32_t ac_compute_num_tess_patches(const struct radeon_info *info, uint32_t nu
      num_patches = MIN2(num_patches, 16); /* recommended */

   /* Make sure the output data fits in the offchip buffer */
-   unsigned mem_size = get_tcs_wg_output_mem_size(info, num_tcs_output_cp, num_mem_tcs_outputs,
+   unsigned mem_size = get_tcs_wg_output_mem_size(num_tcs_output_cp, num_mem_tcs_outputs,
                                                  num_mem_tcs_patch_outputs, num_patches);
   if (mem_size > info->hs_offchip_workgroup_dw_size * 4) {
      /* Find the number of patches that fit in memory. Each output is aligned separately,
       * so this division won't return a precise result.
       */
      num_patches = info->hs_offchip_workgroup_dw_size * 4 /
-                    get_tcs_wg_output_mem_size(info, num_tcs_output_cp, num_mem_tcs_outputs,
+                    get_tcs_wg_output_mem_size(num_tcs_output_cp, num_mem_tcs_outputs,
                                               num_mem_tcs_patch_outputs, 1);
-      assert(get_tcs_wg_output_mem_size(info, num_tcs_output_cp, num_mem_tcs_outputs,
+      assert(get_tcs_wg_output_mem_size(num_tcs_output_cp, num_mem_tcs_outputs,
                                        num_mem_tcs_patch_outputs, num_patches) <=
             info->hs_offchip_workgroup_dw_size * 4);

-      while (get_tcs_wg_output_mem_size(info, num_tcs_output_cp, num_mem_tcs_outputs,
+      while (get_tcs_wg_output_mem_size(num_tcs_output_cp, num_mem_tcs_outputs,
                                        num_mem_tcs_patch_outputs, num_patches + 1) <=
             info->hs_offchip_workgroup_dw_size * 4)
         num_patches++;
@ -1046,7 +1045,7 @@ ac_compute_scratch_wavesize(const struct radeon_info *info, uint32_t bytes_per_w
    * scratch performance by more randomly distributing scratch waves among
    * memory channels.
    *
-    * On GFX11+, this is exactly "|= info->pipe_interleave_bytes".
+    * On GFX11+, this is exactly "|= AMD_MEMCHANNEL_INTERLEAVE_BYTES".
    */
   if (bytes_per_wave)
      bytes_per_wave |= info->scratch_wavesize_granularity;
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@ -1276,7 +1276,7 @@ static int gfx6_surface_settings(struct ac_addrlib *addrlib, const struct radeon
 static void ac_compute_cmask(const struct radeon_info *info, const struct ac_surf_config *config,
                             struct radeon_surf *surf)
 {
-   unsigned pipe_interleave_bytes = info->pipe_interleave_bytes;
+   unsigned pipe_interleave_bytes = AMD_MEMCHANNEL_INTERLEAVE_BYTES;
   unsigned num_pipes = info->num_tile_pipes;
   unsigned cl_width, cl_height;

--- a/src/amd/common/nir/ac_nir_meta_cs_clear_copy_buffer.c
+++ b/src/amd/common/nir/ac_nir_meta_cs_clear_copy_buffer.c
@ -568,14 +568,12 @@ ac_prepare_cs_clear_copy_buffer(const struct ac_cs_clear_copy_buffer_options *op
    * the beginning a 256B block and clear/copy whole 256B blocks. Clearing/copying a 256B block
    * partially for each wave is inefficient, which happens when dst_offset isn't aligned to 256.
    * Clearing/copying whole 256B blocks per wave isn't possible if dwords_per_thread isn't 2^n.
-    *
-    * pipe_interleave_bytes is 256.
    */
   unsigned start_thread =
-      dst_offset_bound % options->info->pipe_interleave_bytes &&
+      dst_offset_bound % AMD_MEMCHANNEL_INTERLEAVE_BYTES &&
      util_is_power_of_two_nonzero(dwords_per_thread) ?
-            DIV_ROUND_UP(options->info->pipe_interleave_bytes -
-                         dst_offset_bound % options->info->pipe_interleave_bytes,
+            DIV_ROUND_UP(AMD_MEMCHANNEL_INTERLEAVE_BYTES -
+                         dst_offset_bound % AMD_MEMCHANNEL_INTERLEAVE_BYTES,
                         dwords_per_thread * 4) : 0;
   out->shader_key.has_start_thread = start_thread != 0;

--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@ -1076,7 +1076,7 @@ static void evergreen_set_color_surface_buffer(struct r600_context *rctx,
 	const struct util_format_description *desc;
 	unsigned block_size = util_format_get_blocksize(res->b.b.format);
 	unsigned pitch_alignment =
-		MAX2(64, rctx->screen->b.info.pipe_interleave_bytes / block_size);
+		MAX2(64, rctx->screen->b.info.r600_pipe_interleave_bytes / block_size);
 	unsigned pitch = align(res->b.b.width0, pitch_alignment);
 	int i;
 	unsigned width_elements;
--- a/src/gallium/drivers/r600/r600_pipe_common.c
+++ b/src/gallium/drivers/r600/r600_pipe_common.c
@ -1078,7 +1078,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
 		printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks);
 		printf("num_render_backends = %i\n", rscreen->info.max_render_backends);
 		printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes);
-		printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes);
+		printf("pipe_interleave_bytes = %i\n", rscreen->info.r600_pipe_interleave_bytes);
 		printf("enabled_rb_mask = 0x%" PRIx64 "\n", rscreen->info.enabled_rb_mask);
 		printf("max_alignment = %u\n", (unsigned)rscreen->info.max_alignment);
 	}
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@ -723,7 +723,7 @@ void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
 	unsigned element_bits = 4;
 	unsigned cmask_cache_bits = 1024;
 	unsigned num_pipes = rscreen->info.num_tile_pipes;
-	unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
+	unsigned pipe_interleave_bytes = rscreen->info.r600_pipe_interleave_bytes;

 	unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes;
 	unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements;
@ -843,7 +843,7 @@ static void r600_texture_get_htile_size(struct r600_common_screen *rscreen,
 	slice_elements = (width * height) / (8 * 8);
 	slice_bytes = slice_elements * 4;

-	pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
+	pipe_interleave_bytes = rscreen->info.r600_pipe_interleave_bytes;
 	base_align = num_pipes * pipe_interleave_bytes;

 	rtex->surface.meta_alignment_log2 = util_logbase2(base_align);
--- a/src/gallium/winsys/radeon/drm/radeon_drm_surface.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_surface.c
@ -204,7 +204,8 @@ static void si_compute_cmask(const struct radeon_info *info,
                             const struct ac_surf_config *config,
                             struct radeon_surf *surf)
 {
-   unsigned pipe_interleave_bytes = info->pipe_interleave_bytes;
+   unsigned pipe_interleave_bytes = info->gfx_level >= GFX6 ? AMD_MEMCHANNEL_INTERLEAVE_BYTES :
+                                                              info->r600_pipe_interleave_bytes;
   unsigned num_pipes = info->num_tile_pipes;
   unsigned cl_width, cl_height;

@ -315,7 +316,8 @@ static void si_compute_htile(const struct radeon_info *info,
   slice_elements = (width * height) / (8 * 8);
   slice_bytes = slice_elements * 4;

-   pipe_interleave_bytes = info->pipe_interleave_bytes;
+   pipe_interleave_bytes = info->gfx_level >= GFX6 ? AMD_MEMCHANNEL_INTERLEAVE_BYTES :
+                                                     info->r600_pipe_interleave_bytes;
   base_align = num_pipes * pipe_interleave_bytes;

   surf->meta_alignment_log2 = util_logbase2(base_align);
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@ -465,13 +465,13 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
               4 << ((tiling_config & 0xf0) >> 4) :
                    4 << ((tiling_config & 0x30) >> 4);

-      ws->info.pipe_interleave_bytes =
+      ws->info.r600_pipe_interleave_bytes =
            ws->info.gfx_level >= EVERGREEN ?
               256 << ((tiling_config & 0xf00) >> 8) :
                      256 << ((tiling_config & 0xc0) >> 6);

-      if (!ws->info.pipe_interleave_bytes)
-         ws->info.pipe_interleave_bytes =
+      if (!ws->info.r600_pipe_interleave_bytes)
+         ws->info.r600_pipe_interleave_bytes =
               ws->info.gfx_level >= EVERGREEN ? 512 : 256;

      radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL,