isl: Add support for creating layered surfaces for video encode/decode

Adds support for creating layered surfaces with slices that are addressable to the media engine for video encoding and decoding. Co-authored-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35651>
2025-12-23 13:20:14 +01:00 · 2025-06-19 21:27:03 -07:00 · 2025-06-19 21:27:03 -07:00 · 73608eb8b7
commit 73608eb8b7
parent 3cb77cb144
3 changed files with 191 additions and 2 deletions
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@ -2402,9 +2402,23 @@ isl_calc_array_pitch_el_rows_gfx4_2d(

   switch (array_pitch_span) {
   case ISL_ARRAY_PITCH_SPAN_COMPACT:
-      pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
+      /* If we have a request for a particular array pitch, inflate the physical
+       * image size to accomodate that pitch.
+       */
+      if (info->array_pitch_B) {
+         assert(ISL_GFX_VER(dev) >= 8);
+         uint32_t tiled_aligned_row_pitch_B =
+            align((fmtl->bpb / 8) * phys_slice0_sa->w, tile_info->phys_extent_B.width);
+         assert(info->array_pitch_B % tiled_aligned_row_pitch_B == 0);
+         pitch_sa_rows = DIV_ROUND_UP(
+            info->array_pitch_B, tiled_aligned_row_pitch_B);
+         assert(pitch_sa_rows % image_align_sa->h == 0);
+      } else {
+         pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
+      }
      break;
   case ISL_ARRAY_PITCH_SPAN_FULL: {
+      assert(!info->array_pitch_B);
      /* The QPitch equation is found in the Broadwell PRM >> Volume 5:
       * Memory Views >> Common Surface Formats >> Surface Layout >> 2D
       * Surfaces >> Surface Arrays.
@ -3323,8 +3337,20 @@ isl_calc_base_alignment(const struct isl_device *dev,
       *
       *     "For Linear memory, this field specifies the stride in chunks of
       *     64 bytes (1 cache line)."
+       *
+       * From the ATSM PRM Vol 2d,
+       * MFX_REFERENCE_PICTURE_BASE_ADDR::MFXReferencePictureAddress:
+       *
+       *     "Specifies the 64 byte aligned reference frame buffer addresses"
+       *
+       * From the ATSM PRM Vol 2a,
+       * HCP_PIPE_BUF_ADDR_STATE::ReferencePictureBaseAddress,
+       * AVP_PIPE_BUF_ADDR_STATE::ReferenceFrameBufferBaseAddress:
+       *
+       *     "Format: SplitBaseAddress64ByteAligned"
       */
-      if (isl_surf_usage_is_display(info->usage))
+      if (isl_surf_usage_is_display(info->usage) ||
+          (info->usage & ISL_SURF_USAGE_VIDEO_DECODE_BIT))
         base_alignment_B = MAX(base_alignment_B, 64);
   } else {
      const uint32_t tile_size_B = tile_info->phys_extent_B.width *
@ -3383,6 +3409,14 @@ isl_calc_base_alignment(const struct isl_device *dev,
   if (info->usage & ISL_SURF_USAGE_SPARSE_BIT)
      base_alignment_B = MAX(base_alignment_B, 64 * 1024);

+   /* ATS-M PRM Vol 2d, MFX_PIPE_BUF_ADDR_STATE::PostDeblockingDestinationAddress:
+    *
+    *     "Specifies the 4K byte aligned frame buffer address for outputting
+    *      the post-loop filtered reconstructed YUV picture"
+    */
+   if (info->usage & ISL_SURF_USAGE_VIDEO_DECODE_BIT)
+      base_alignment_B = MAX(base_alignment_B, 4 * 1024);
+
   return base_alignment_B;
 }

@ -3485,6 +3519,101 @@ isl_surf_init_s(const struct isl_device *dev,
   return true;
 }

+bool
+isl_surf_init_interleaved_arrays(const struct isl_device *dev,
+                                 uint32_t total_surf,
+                                 struct isl_surf **surfs,
+                                 uint32_t *surfs_offsets,
+                                 const struct isl_surf_init_info *infos)
+{
+   /* Adjusting the array pitch is only supported on GFX 8+ */
+   assert(ISL_GFX_VER(dev) >= 8);
+   assert(total_surf <= ISL_SURF_MAX_INTERLEAVED_ARRAYS);
+
+   /* Do a first pass to gather uninterleave surface layouts */
+   bool result = true;
+   struct isl_surf uninterleaved_surfs[ISL_SURF_MAX_INTERLEAVED_ARRAYS];
+   uint32_t offset_align_B[ISL_SURF_MAX_INTERLEAVED_ARRAYS];
+   for (uint32_t i = 0; i < total_surf; i++)
+      result &= isl_surf_init_s(dev, &uninterleaved_surfs[i], &infos[i]);
+
+   if (!result)
+      return result;
+
+   /* Compute a single slice pitch by adding up each of the surface's slice
+    * size. Take care to align the each surface to its alignment requirement
+    * and align the size of each slice to a full tile.
+    */
+   uint64_t array_pitch_B = 0;
+   for (uint32_t i = 0; i < total_surf; i++) {
+      struct isl_tile_info tile_info;
+      isl_surf_get_tile_info(&uninterleaved_surfs[i], &tile_info);
+
+      if (i > 0) {
+         /* Combining surfaces with different alignments, row pitches, or tiling
+          * is not handled properly, as NV12+TileY is the only layout currently
+          * supported by the driver in this type of surface.
+          *
+          * See this commit for a version that doesn't have this restriction:
+          * https://gitlab.freedesktop.org/mesa/mesa/-/commit/3c37183265f11e2ee6bc6d4d95e1580a41673636
+          */
+         assert(uninterleaved_surfs[0].alignment_B == uninterleaved_surfs[i].alignment_B);
+         assert(uninterleaved_surfs[0].row_pitch_B == uninterleaved_surfs[i].row_pitch_B);
+         assert(uninterleaved_surfs[0].tiling == uninterleaved_surfs[i].tiling);
+
+         offset_align_B[i] = uninterleaved_surfs[i].alignment_B;
+
+         /* If its a multi-planar video coding surface, make sure each offset
+          * is also aligned to a multiple of 16 * row_pitch_B relative to the
+          * first surface.
+          *
+          * SKL PRM Vol 2a, MFX_SURFACE_STATE::YOffsetForUCb:
+          *
+          *     "For PLANAR_420 and PLANAR_422 surface formats, this field
+          *      must be multiple of 16 pixels"
+          */
+         if (uninterleaved_surfs[i].usage & ISL_SURF_USAGE_VIDEO_DECODE_BIT) {
+            offset_align_B[i] =
+               isl_lcm_u32(offset_align_B[i],
+                           uninterleaved_surfs[0].row_pitch_B * 16);
+         }
+
+         array_pitch_B = isl_align_npot(array_pitch_B, offset_align_B[i]);
+      }
+
+      array_pitch_B +=
+         uninterleaved_surfs[i].row_pitch_B *
+         align(uninterleaved_surfs[i].array_pitch_el_rows, tile_info.logical_extent_el.h);
+   }
+   for (uint32_t i = 0; i < total_surf; i++) {
+      array_pitch_B = align(array_pitch_B, uninterleaved_surfs[i].alignment_B);
+   }
+
+   /* Recreate the surfaces using the computed interleaved array pitch. */
+   uint64_t offset = 0;
+   for (uint32_t i = 0; i < total_surf; i++) {
+      struct isl_surf_init_info interleaved_info = infos[i];
+      interleaved_info.array_pitch_B = array_pitch_B;
+
+      result &= isl_surf_init_s(dev, surfs[i], &interleaved_info);
+
+      struct isl_tile_info tile_info;
+      isl_surf_get_tile_info(&uninterleaved_surfs[i], &tile_info);
+
+      if (i > 0) {
+         offset = isl_align_npot(offset, offset_align_B[i]);
+      }
+
+      surfs_offsets[i] = offset;
+
+      offset += uninterleaved_surfs[i].row_pitch_B *
+         align(uninterleaved_surfs[i].array_pitch_el_rows,
+               tile_info.logical_extent_el.h);
+   }
+
+   return result;
+}
+
 /* Returns divisor+1 if divisor >= num. */
 static int64_t
 find_next_divisor(int64_t divisor, int64_t num)
--- a/src/intel/isl/isl.h
+++ b/src/intel/isl/isl.h
@ -1611,6 +1611,13 @@ struct isl_surf_init_info {
    */
   uint32_t row_pitch_B;

+   /**
+    * Exact value to compute :c:member:`isl_surf.array_pitch_el_rows`. Ignored
+    * if zero. isl_surf_init() will fail if this is misaligned or out of
+    * bounds.
+    */
+   uint64_t array_pitch_B;
+
   isl_surf_usage_flags_t usage;

   /** Flags that alter how ISL selects isl_surf::tiling.  */
@ -2741,6 +2748,27 @@ isl_surf_init_s(const struct isl_device *dev,
                struct isl_surf *surf,
                const struct isl_surf_init_info *restrict info);

+/* Maximum number of interleaved surfaces that can be created using
+ * isl_surf_init_interleaved_arrays
+ */
+#define ISL_SURF_MAX_INTERLEAVED_ARRAYS 3
+
+/* Initializes multiple 2D array surfaces in a layout where the array
+ * slices of the surface are interleaved. The memory ranges of the
+ * resulting surfaces overlap, however the individual slices all occupy
+ * discrete tiles and should not conflict. If the surfaces have video
+ * usage bits set, the offsets of each will also be aligned to 16x the
+ * row pitch of the first surface. All of this is done so that
+ * multi-planar YCbCr array textures can be created with individual
+ * slices that are addressable to the media engine. GFX 8+ only.
+ */
+bool
+isl_surf_init_interleaved_arrays(const struct isl_device *dev,
+                                 uint32_t total_surf,
+                                 struct isl_surf **surfs,
+                                 uint32_t *surfs_offsets,
+                                 const struct isl_surf_init_info *infos);
+
 /* Return the largest surface possible for the specified memory range. */
 void
 isl_surf_from_mem(const struct isl_device *isl_dev,
--- a/src/intel/isl/isl_priv.h
+++ b/src/intel/isl/isl_priv.h
@ -186,6 +186,38 @@ isl_minify(uint32_t n, uint32_t levels)
      return MAX(n >> levels, 1);
 }

+/**
+ * Returns the greatest common divisor of a and b using Stein's algorithm.
+ */
+static uint32_t
+isl_gcd_u32(uint32_t a, uint32_t b)
+{
+   assert(a > 0 || b > 0);
+   uint32_t k;
+   for (k = 0; ((a | b) & 1) == 0; ++k) {
+      a >>= 1;
+      b >>= 1;
+   }
+   while ((a & 1) == 0)
+      a >>= 1;
+   do {
+      while ((b & 1) == 0)
+         b >>= 1;
+      if (a > b) {
+         uint32_t tmp = a;
+         a = b;
+         b = tmp;
+      }
+      b = (b - a);
+   } while (b != 0);
+   return a << k;
+}
+
+static inline uint32_t
+isl_lcm_u32(uint32_t a, uint32_t b) {
+   return a / isl_gcd_u32(a, b) * b;
+}
+
 static inline struct isl_extent3d
 isl_extent3d_sa_to_el(enum isl_format fmt, struct isl_extent3d extent_sa)
 {