isl: Add support for creating layered surfaces for video encode/decode

Adds support for creating layered surfaces with slices that are addressable
to the media engine for video encoding and decoding.

Co-authored-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35651>
This commit is contained in:
Calder Young 2025-06-19 21:27:03 -07:00 committed by Marge Bot
parent 3cb77cb144
commit 73608eb8b7
3 changed files with 191 additions and 2 deletions

View file

@ -2402,9 +2402,23 @@ isl_calc_array_pitch_el_rows_gfx4_2d(
switch (array_pitch_span) {
case ISL_ARRAY_PITCH_SPAN_COMPACT:
/* If we have a request for a particular array pitch, inflate the physical
* image size to accomodate that pitch.
*/
if (info->array_pitch_B) {
assert(ISL_GFX_VER(dev) >= 8);
uint32_t tiled_aligned_row_pitch_B =
align((fmtl->bpb / 8) * phys_slice0_sa->w, tile_info->phys_extent_B.width);
assert(info->array_pitch_B % tiled_aligned_row_pitch_B == 0);
pitch_sa_rows = DIV_ROUND_UP(
info->array_pitch_B, tiled_aligned_row_pitch_B);
assert(pitch_sa_rows % image_align_sa->h == 0);
} else {
pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
}
break;
case ISL_ARRAY_PITCH_SPAN_FULL: {
assert(!info->array_pitch_B);
/* The QPitch equation is found in the Broadwell PRM >> Volume 5:
* Memory Views >> Common Surface Formats >> Surface Layout >> 2D
* Surfaces >> Surface Arrays.
@ -3323,8 +3337,20 @@ isl_calc_base_alignment(const struct isl_device *dev,
*
* "For Linear memory, this field specifies the stride in chunks of
* 64 bytes (1 cache line)."
*
* From the ATSM PRM Vol 2d,
* MFX_REFERENCE_PICTURE_BASE_ADDR::MFXReferencePictureAddress:
*
* "Specifies the 64 byte aligned reference frame buffer addresses"
*
* From the ATSM PRM Vol 2a,
* HCP_PIPE_BUF_ADDR_STATE::ReferencePictureBaseAddress,
* AVP_PIPE_BUF_ADDR_STATE::ReferenceFrameBufferBaseAddress:
*
* "Format: SplitBaseAddress64ByteAligned"
*/
if (isl_surf_usage_is_display(info->usage))
if (isl_surf_usage_is_display(info->usage) ||
(info->usage & ISL_SURF_USAGE_VIDEO_DECODE_BIT))
base_alignment_B = MAX(base_alignment_B, 64);
} else {
const uint32_t tile_size_B = tile_info->phys_extent_B.width *
@ -3383,6 +3409,14 @@ isl_calc_base_alignment(const struct isl_device *dev,
if (info->usage & ISL_SURF_USAGE_SPARSE_BIT)
base_alignment_B = MAX(base_alignment_B, 64 * 1024);
/* ATS-M PRM Vol 2d, MFX_PIPE_BUF_ADDR_STATE::PostDeblockingDestinationAddress:
*
* "Specifies the 4K byte aligned frame buffer address for outputting
* the post-loop filtered reconstructed YUV picture"
*/
if (info->usage & ISL_SURF_USAGE_VIDEO_DECODE_BIT)
base_alignment_B = MAX(base_alignment_B, 4 * 1024);
return base_alignment_B;
}
@ -3485,6 +3519,101 @@ isl_surf_init_s(const struct isl_device *dev,
return true;
}
bool
isl_surf_init_interleaved_arrays(const struct isl_device *dev,
uint32_t total_surf,
struct isl_surf **surfs,
uint32_t *surfs_offsets,
const struct isl_surf_init_info *infos)
{
/* Adjusting the array pitch is only supported on GFX 8+ */
assert(ISL_GFX_VER(dev) >= 8);
assert(total_surf <= ISL_SURF_MAX_INTERLEAVED_ARRAYS);
/* Do a first pass to gather uninterleave surface layouts */
bool result = true;
struct isl_surf uninterleaved_surfs[ISL_SURF_MAX_INTERLEAVED_ARRAYS];
uint32_t offset_align_B[ISL_SURF_MAX_INTERLEAVED_ARRAYS];
for (uint32_t i = 0; i < total_surf; i++)
result &= isl_surf_init_s(dev, &uninterleaved_surfs[i], &infos[i]);
if (!result)
return result;
/* Compute a single slice pitch by adding up each of the surface's slice
* size. Take care to align the each surface to its alignment requirement
* and align the size of each slice to a full tile.
*/
uint64_t array_pitch_B = 0;
for (uint32_t i = 0; i < total_surf; i++) {
struct isl_tile_info tile_info;
isl_surf_get_tile_info(&uninterleaved_surfs[i], &tile_info);
if (i > 0) {
/* Combining surfaces with different alignments, row pitches, or tiling
* is not handled properly, as NV12+TileY is the only layout currently
* supported by the driver in this type of surface.
*
* See this commit for a version that doesn't have this restriction:
* https://gitlab.freedesktop.org/mesa/mesa/-/commit/3c37183265f11e2ee6bc6d4d95e1580a41673636
*/
assert(uninterleaved_surfs[0].alignment_B == uninterleaved_surfs[i].alignment_B);
assert(uninterleaved_surfs[0].row_pitch_B == uninterleaved_surfs[i].row_pitch_B);
assert(uninterleaved_surfs[0].tiling == uninterleaved_surfs[i].tiling);
offset_align_B[i] = uninterleaved_surfs[i].alignment_B;
/* If its a multi-planar video coding surface, make sure each offset
* is also aligned to a multiple of 16 * row_pitch_B relative to the
* first surface.
*
* SKL PRM Vol 2a, MFX_SURFACE_STATE::YOffsetForUCb:
*
* "For PLANAR_420 and PLANAR_422 surface formats, this field
* must be multiple of 16 pixels"
*/
if (uninterleaved_surfs[i].usage & ISL_SURF_USAGE_VIDEO_DECODE_BIT) {
offset_align_B[i] =
isl_lcm_u32(offset_align_B[i],
uninterleaved_surfs[0].row_pitch_B * 16);
}
array_pitch_B = isl_align_npot(array_pitch_B, offset_align_B[i]);
}
array_pitch_B +=
uninterleaved_surfs[i].row_pitch_B *
align(uninterleaved_surfs[i].array_pitch_el_rows, tile_info.logical_extent_el.h);
}
for (uint32_t i = 0; i < total_surf; i++) {
array_pitch_B = align(array_pitch_B, uninterleaved_surfs[i].alignment_B);
}
/* Recreate the surfaces using the computed interleaved array pitch. */
uint64_t offset = 0;
for (uint32_t i = 0; i < total_surf; i++) {
struct isl_surf_init_info interleaved_info = infos[i];
interleaved_info.array_pitch_B = array_pitch_B;
result &= isl_surf_init_s(dev, surfs[i], &interleaved_info);
struct isl_tile_info tile_info;
isl_surf_get_tile_info(&uninterleaved_surfs[i], &tile_info);
if (i > 0) {
offset = isl_align_npot(offset, offset_align_B[i]);
}
surfs_offsets[i] = offset;
offset += uninterleaved_surfs[i].row_pitch_B *
align(uninterleaved_surfs[i].array_pitch_el_rows,
tile_info.logical_extent_el.h);
}
return result;
}
/* Returns divisor+1 if divisor >= num. */
static int64_t
find_next_divisor(int64_t divisor, int64_t num)

View file

@ -1611,6 +1611,13 @@ struct isl_surf_init_info {
*/
uint32_t row_pitch_B;
/**
* Exact value to compute :c:member:`isl_surf.array_pitch_el_rows`. Ignored
* if zero. isl_surf_init() will fail if this is misaligned or out of
* bounds.
*/
uint64_t array_pitch_B;
isl_surf_usage_flags_t usage;
/** Flags that alter how ISL selects isl_surf::tiling. */
@ -2741,6 +2748,27 @@ isl_surf_init_s(const struct isl_device *dev,
struct isl_surf *surf,
const struct isl_surf_init_info *restrict info);
/* Maximum number of interleaved surfaces that can be created using
* isl_surf_init_interleaved_arrays
*/
#define ISL_SURF_MAX_INTERLEAVED_ARRAYS 3
/* Initializes multiple 2D array surfaces in a layout where the array
* slices of the surface are interleaved. The memory ranges of the
* resulting surfaces overlap, however the individual slices all occupy
* discrete tiles and should not conflict. If the surfaces have video
* usage bits set, the offsets of each will also be aligned to 16x the
* row pitch of the first surface. All of this is done so that
* multi-planar YCbCr array textures can be created with individual
* slices that are addressable to the media engine. GFX 8+ only.
*/
bool
isl_surf_init_interleaved_arrays(const struct isl_device *dev,
uint32_t total_surf,
struct isl_surf **surfs,
uint32_t *surfs_offsets,
const struct isl_surf_init_info *infos);
/* Return the largest surface possible for the specified memory range. */
void
isl_surf_from_mem(const struct isl_device *isl_dev,

View file

@ -186,6 +186,38 @@ isl_minify(uint32_t n, uint32_t levels)
return MAX(n >> levels, 1);
}
/**
* Returns the greatest common divisor of a and b using Stein's algorithm.
*/
static uint32_t
isl_gcd_u32(uint32_t a, uint32_t b)
{
assert(a > 0 || b > 0);
uint32_t k;
for (k = 0; ((a | b) & 1) == 0; ++k) {
a >>= 1;
b >>= 1;
}
while ((a & 1) == 0)
a >>= 1;
do {
while ((b & 1) == 0)
b >>= 1;
if (a > b) {
uint32_t tmp = a;
a = b;
b = tmp;
}
b = (b - a);
} while (b != 0);
return a << k;
}
static inline uint32_t
isl_lcm_u32(uint32_t a, uint32_t b) {
return a / isl_gcd_u32(a, b) * b;
}
static inline struct isl_extent3d
isl_extent3d_sa_to_el(enum isl_format fmt, struct isl_extent3d extent_sa)
{