panfrost: Fix AFBC packing

Layout refactoring commits broke AFBC packing while removing several fields to simplify the logic. The stride and height are now derived when necessary at packing time based on the resource modifier. The problem is that the code assumes that the source and destination headers are the same although the source and destination modifiers might differ and create size mismatches when passed to the AFBC utilities in pan_afbc.h. The destination modifier is set as the source modifier without the AFBC_FORMAT_MOD_SPARSE and AFBC_FORMAT_MOD_TILED flags. While the AFBC_FORMAT_MOD_SPARSE flag doesn't have any impact on these utilities, the AFBC_FORMAT_MOD_TILED flag does. This commit fixes the issue by keeping the same header block layout (linear or tiled header layout) when packing a resource. This allows to simply parse header blocks linearly without having to bother with the internal layout (Morton order). The tiled packed resource might also benefit from better cache accesses. Fixes: a2e9ce39e9 pan/layout: Drop pan_image_slice_layout::afbc::{stride_sb,nr_sblocks} Fixes: 01d325ba63 pan/layout: Interleave header/body in AFBC(3D) Signed-off-by: Loïc Molinari <loic.molinari@collabora.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com> Acked-by: Eric R. Smith <eric.smith@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35001>
2026-05-09 08:58:02 +02:00 · 2025-07-01 15:37:25 +02:00 · 2025-07-01 15:37:25 +02:00 · 98c8c95755
commit 98c8c95755
parent 34b693914a
2 changed files with 25 additions and 71 deletions
--- a/src/gallium/drivers/panfrost/pan_mod_conv_cso.c
+++ b/src/gallium/drivers/panfrost/pan_mod_conv_cso.c
@ -75,27 +75,6 @@ write_afbc_header(nir_builder *b, nir_def *buf, nir_def *idx, nir_def *hdr)
   nir_store_global(b, nir_iadd(b, buf, nir_u2u64(b, offset)), 16, hdr, 0xF);
 }

-static nir_def *
-get_morton_index(nir_builder *b, nir_def *idx, nir_def *src_stride,
-                 nir_def *dst_stride)
-{
-   nir_def *x = nir_umod(b, idx, dst_stride);
-   nir_def *y = nir_udiv(b, idx, dst_stride);
-
-   nir_def *offset = nir_imul(b, nir_iand_imm(b, y, ~0x7), src_stride);
-   offset = nir_iadd(b, offset, nir_ishl_imm(b, nir_ushr_imm(b, x, 3), 6));
-
-   x = nir_iand_imm(b, x, 0x7);
-   x = nir_iand_imm(b, nir_ior(b, x, nir_ishl_imm(b, x, 2)), 0x13);
-   x = nir_iand_imm(b, nir_ior(b, x, nir_ishl_imm(b, x, 1)), 0x15);
-   y = nir_iand_imm(b, y, 0x7);
-   y = nir_iand_imm(b, nir_ior(b, y, nir_ishl_imm(b, y, 2)), 0x13);
-   y = nir_iand_imm(b, nir_ior(b, y, nir_ishl_imm(b, y, 1)), 0x15);
-   nir_def *tile_idx = nir_ior(b, x, nir_ishl_imm(b, y, 1));
-
-   return nir_iadd(b, offset, tile_idx);
-}
-
 static nir_def *
 get_superblock_size(nir_builder *b, unsigned arch, nir_def *hdr,
                    nir_def *uncompressed_size)
@ -167,16 +146,15 @@ get_packed_offset(nir_builder *b, nir_def *metadata, nir_def *idx,
 #define MAX_LINE_SIZE 16

 static void
-copy_superblock(nir_builder *b, nir_def *dst, nir_def *dst_idx, nir_def *hdr_sz,
-                nir_def *src, nir_def *src_idx, nir_def *metadata,
-                nir_def *meta_idx, unsigned align)
+copy_superblock(nir_builder *b, nir_def *dst, nir_def *hdr_sz, nir_def *src,
+                nir_def *metadata, nir_def *idx, unsigned align)
 {
-   nir_def *hdr = read_afbc_header(b, src, src_idx);
+   nir_def *hdr = read_afbc_header(b, src, idx);
   nir_def *src_body_base_ptr = nir_u2u64(b, nir_channel(b, hdr, 0));
   nir_def *src_bodyptr = nir_iadd(b, src, src_body_base_ptr);

   nir_def *size;
-   nir_def *dst_offset = get_packed_offset(b, metadata, meta_idx, &size);
+   nir_def *dst_offset = get_packed_offset(b, metadata, idx, &size);
   nir_def *dst_body_base_ptr = nir_iadd(b, dst_offset, hdr_sz);
   nir_def *dst_bodyptr = nir_iadd(b, dst, dst_body_base_ptr);

@ -184,7 +162,7 @@ copy_superblock(nir_builder *b, nir_def *dst, nir_def *dst_idx, nir_def *hdr_sz,
   nir_def *hdr2 =
      nir_vector_insert_imm(b, hdr, nir_u2u32(b, dst_body_base_ptr), 0);
   hdr = nir_bcsel(b, nir_ieq_imm(b, src_body_base_ptr, 0), hdr, hdr2);
-   write_afbc_header(b, dst, dst_idx, hdr);
+   write_afbc_header(b, dst, idx, hdr);

   nir_variable *offset_var =
      nir_local_variable_create(b->impl, glsl_uint_type(), "offset");
@ -256,7 +234,6 @@ panfrost_create_afbc_pack_shader(struct panfrost_screen *screen,
                                 const struct pan_mod_convert_shader_key *key)
 {
   unsigned align = key->afbc.align;
-   bool tiled = key->mod & AFBC_FORMAT_MOD_TILED;
   struct panfrost_device *dev = pan_device(&screen->base);
   nir_builder b = nir_builder_init_simple_shader(
      MESA_SHADER_COMPUTE, pan_shader_get_compiler_options(dev->arch),
@ -265,19 +242,14 @@ panfrost_create_afbc_pack_shader(struct panfrost_screen *screen,
   panfrost_afbc_add_info_ubo(pack, b);

   nir_def *coord = nir_load_global_invocation_id(&b, 32);
-   nir_def *src_stride = panfrost_afbc_pack_get_info_field(&b, src_stride);
-   nir_def *dst_stride = panfrost_afbc_pack_get_info_field(&b, dst_stride);
-   nir_def *dst_idx = nir_channel(&b, coord, 0);
-   nir_def *src_idx =
-      tiled ? get_morton_index(&b, dst_idx, src_stride, dst_stride) : dst_idx;
+   nir_def *idx = nir_channel(&b, coord, 0);
   nir_def *src = panfrost_afbc_pack_get_info_field(&b, src);
   nir_def *dst = panfrost_afbc_pack_get_info_field(&b, dst);
   nir_def *header_size =
      nir_u2u64(&b, panfrost_afbc_pack_get_info_field(&b, header_size));
   nir_def *metadata = panfrost_afbc_pack_get_info_field(&b, metadata);

-   copy_superblock(&b, dst, dst_idx, header_size, src, src_idx, metadata,
-                   src_idx, align);
+   copy_superblock(&b, dst, header_size, src, metadata, idx, align);

   return b.shader;
 }
@ -460,7 +432,7 @@ panfrost_get_afbc_pack_shaders(struct panfrost_context *ctx,
                               struct panfrost_resource *rsrc, unsigned align)
 {
   struct pan_mod_convert_shader_key key = {
-      .mod = DRM_FORMAT_MOD_ARM_AFBC(rsrc->modifier & AFBC_FORMAT_MOD_TILED),
+      .mod = DRM_FORMAT_MOD_ARM_AFBC(0),
      .afbc = {
         .bpp = util_format_get_blocksizebits(rsrc->base.format),
         .align = align,
--- a/src/gallium/drivers/panfrost/pan_resource.c
+++ b/src/gallium/drivers/panfrost/pan_resource.c
@ -1464,16 +1464,6 @@ pan_dump_resource(struct panfrost_context *ctx, struct panfrost_resource *rsc)

 #endif

-/* Get scan-order index from (x, y) position when blocks are
- * arranged in z-order in 8x8 tiles */
-static unsigned
-get_morton_index(unsigned x, unsigned y, unsigned stride)
-{
-   unsigned i = ((x << 0) & 1) | ((y << 1) & 2) | ((x << 1) & 4) |
-                ((y << 2) & 8) | ((x << 2) & 16) | ((y << 3) & 32);
-   return (((y & ~7) * stride) + ((x & ~7) << 3)) + i;
-}
-
 static void
 panfrost_store_tiled_images(struct panfrost_transfer *transfer,
                            struct panfrost_resource *rsrc)
@ -2020,10 +2010,7 @@ panfrost_pack_afbc(struct panfrost_context *ctx,

   assert(prsrc->base.array_size == 1);

-   uint64_t src_modifier = prsrc->modifier;
-   uint64_t dst_modifier =
-      src_modifier & ~(AFBC_FORMAT_MOD_TILED | AFBC_FORMAT_MOD_SPARSE);
-   bool is_tiled = src_modifier & AFBC_FORMAT_MOD_TILED;
+   uint64_t modifier = prsrc->modifier & ~AFBC_FORMAT_MOD_SPARSE;
   unsigned last_level = prsrc->base.last_level;
   struct pan_image_slice_layout slice_infos[PIPE_MAX_TEXTURE_LEVELS] = {0};
   unsigned total_size = 0;
@ -2049,14 +2036,13 @@ panfrost_pack_afbc(struct panfrost_context *ctx,
      struct pan_image_slice_layout *src_slice =
         &prsrc->plane.layout.slices[level];
      struct pan_image_slice_layout *dst_slice = &slice_infos[level];
-      unsigned src_stride = pan_afbc_stride_blocks(
-         src_modifier, src_slice->afbc.header.row_stride_B);
-      unsigned src_nr_sblocks =
-         src_stride *
+      unsigned nr_sblocks_total =
+         pan_afbc_stride_blocks(
+            modifier, src_slice->afbc.header.row_stride_B) *
         pan_afbc_height_blocks(
-            src_modifier, u_minify(prsrc->image.props.extent_px.height, level));
+            modifier, u_minify(prsrc->image.props.extent_px.height, level));
      uint32_t body_offset_B = pan_afbc_body_offset(
-         dev->arch, dst_modifier, src_slice->afbc.header.surface_size_B);
+         dev->arch, modifier, src_slice->afbc.header.surface_size_B);
      uint32_t offset = 0;
      struct pan_afbc_block_info *meta =
         metadata_bo->ptr.cpu + metadata_offsets[level];
@ -2069,28 +2055,24 @@ panfrost_pack_afbc(struct panfrost_context *ctx,
       * at most 8 kB can be copied at each iteration (smaller values tend to
       * increase latency). */
      alignas(16) struct pan_afbc_block_info meta_chunk[64 * 16];
-      unsigned nr_blocks_per_chunk = ARRAY_SIZE(meta_chunk);
+      unsigned nr_sblocks_per_chunk = ARRAY_SIZE(meta_chunk);

-      for (unsigned i = 0; i < src_nr_sblocks; i += nr_blocks_per_chunk) {
-         unsigned nr_sblocks = MIN2(nr_blocks_per_chunk, src_nr_sblocks - i);
+      for (unsigned i = 0; i < nr_sblocks_total; i += nr_sblocks_per_chunk) {
+         unsigned nr_sblocks =
+            MIN2(nr_sblocks_per_chunk, nr_sblocks_total - i);

         util_streaming_load_memcpy(
            meta_chunk, &meta[i],
            nr_sblocks * sizeof(struct pan_afbc_block_info));

         for (unsigned j = 0; j < nr_sblocks; j++) {
-            unsigned idx = j;
-            if (is_tiled) {
-               idx &= ~63;
-               idx += get_morton_index(j & 7, (j & 63) >> 3, src_stride);
-            }
-            meta[i + idx].offset = offset;
-            offset += meta_chunk[idx].size;
+            meta[i + j].offset = offset;
+            offset += meta_chunk[j].size;
         }
      }

      total_size =
-         ALIGN_POT(total_size, pan_afbc_header_align(dev->arch, dst_modifier));
+         ALIGN_POT(total_size, pan_afbc_header_align(dev->arch, modifier));
      {
         /* Header layout is exactly the same, only the body is shrunk. */
         dst_slice->afbc.header = src_slice->afbc.header;
@ -2125,7 +2107,7 @@ panfrost_pack_afbc(struct panfrost_context *ctx,
   }

   char *new_label =
-      panfrost_resource_new_label(prsrc, dst_modifier, old_user_label);
+      panfrost_resource_new_label(prsrc, modifier, old_user_label);

   struct panfrost_bo *dst =
      panfrost_bo_create(dev, new_size, 0, new_label);
@ -2156,9 +2138,9 @@ panfrost_pack_afbc(struct panfrost_context *ctx,
      free(old_label);
   }

-   assert(!panfrost_is_emulated_mod(dst_modifier));
-   prsrc->image.props.modifier = dst_modifier;
-   prsrc->modifier = dst_modifier;
+   assert(!panfrost_is_emulated_mod(modifier));
+   prsrc->image.props.modifier = modifier;
+   prsrc->modifier = modifier;
   panfrost_bo_unreference(prsrc->bo);
   prsrc->bo = dst;
   prsrc->plane.base = dst->ptr.gpu;