diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index a73b54f08b1..0d6b2f08c2d 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -2402,9 +2402,23 @@ isl_calc_array_pitch_el_rows_gfx4_2d( switch (array_pitch_span) { case ISL_ARRAY_PITCH_SPAN_COMPACT: - pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h); + /* If we have a request for a particular array pitch, inflate the physical + * image size to accomodate that pitch. + */ + if (info->array_pitch_B) { + assert(ISL_GFX_VER(dev) >= 8); + uint32_t tiled_aligned_row_pitch_B = + align((fmtl->bpb / 8) * phys_slice0_sa->w, tile_info->phys_extent_B.width); + assert(info->array_pitch_B % tiled_aligned_row_pitch_B == 0); + pitch_sa_rows = DIV_ROUND_UP( + info->array_pitch_B, tiled_aligned_row_pitch_B); + assert(pitch_sa_rows % image_align_sa->h == 0); + } else { + pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h); + } break; case ISL_ARRAY_PITCH_SPAN_FULL: { + assert(!info->array_pitch_B); /* The QPitch equation is found in the Broadwell PRM >> Volume 5: * Memory Views >> Common Surface Formats >> Surface Layout >> 2D * Surfaces >> Surface Arrays. @@ -3323,8 +3337,20 @@ isl_calc_base_alignment(const struct isl_device *dev, * * "For Linear memory, this field specifies the stride in chunks of * 64 bytes (1 cache line)." + * + * From the ATSM PRM Vol 2d, + * MFX_REFERENCE_PICTURE_BASE_ADDR::MFXReferencePictureAddress: + * + * "Specifies the 64 byte aligned reference frame buffer addresses" + * + * From the ATSM PRM Vol 2a, + * HCP_PIPE_BUF_ADDR_STATE::ReferencePictureBaseAddress, + * AVP_PIPE_BUF_ADDR_STATE::ReferenceFrameBufferBaseAddress: + * + * "Format: SplitBaseAddress64ByteAligned" */ - if (isl_surf_usage_is_display(info->usage)) + if (isl_surf_usage_is_display(info->usage) || + (info->usage & ISL_SURF_USAGE_VIDEO_DECODE_BIT)) base_alignment_B = MAX(base_alignment_B, 64); } else { const uint32_t tile_size_B = tile_info->phys_extent_B.width * @@ -3383,6 +3409,14 @@ isl_calc_base_alignment(const struct isl_device *dev, if (info->usage & ISL_SURF_USAGE_SPARSE_BIT) base_alignment_B = MAX(base_alignment_B, 64 * 1024); + /* ATS-M PRM Vol 2d, MFX_PIPE_BUF_ADDR_STATE::PostDeblockingDestinationAddress: + * + * "Specifies the 4K byte aligned frame buffer address for outputting + * the post-loop filtered reconstructed YUV picture" + */ + if (info->usage & ISL_SURF_USAGE_VIDEO_DECODE_BIT) + base_alignment_B = MAX(base_alignment_B, 4 * 1024); + return base_alignment_B; } @@ -3485,6 +3519,101 @@ isl_surf_init_s(const struct isl_device *dev, return true; } +bool +isl_surf_init_interleaved_arrays(const struct isl_device *dev, + uint32_t total_surf, + struct isl_surf **surfs, + uint32_t *surfs_offsets, + const struct isl_surf_init_info *infos) +{ + /* Adjusting the array pitch is only supported on GFX 8+ */ + assert(ISL_GFX_VER(dev) >= 8); + assert(total_surf <= ISL_SURF_MAX_INTERLEAVED_ARRAYS); + + /* Do a first pass to gather uninterleave surface layouts */ + bool result = true; + struct isl_surf uninterleaved_surfs[ISL_SURF_MAX_INTERLEAVED_ARRAYS]; + uint32_t offset_align_B[ISL_SURF_MAX_INTERLEAVED_ARRAYS]; + for (uint32_t i = 0; i < total_surf; i++) + result &= isl_surf_init_s(dev, &uninterleaved_surfs[i], &infos[i]); + + if (!result) + return result; + + /* Compute a single slice pitch by adding up each of the surface's slice + * size. Take care to align the each surface to its alignment requirement + * and align the size of each slice to a full tile. + */ + uint64_t array_pitch_B = 0; + for (uint32_t i = 0; i < total_surf; i++) { + struct isl_tile_info tile_info; + isl_surf_get_tile_info(&uninterleaved_surfs[i], &tile_info); + + if (i > 0) { + /* Combining surfaces with different alignments, row pitches, or tiling + * is not handled properly, as NV12+TileY is the only layout currently + * supported by the driver in this type of surface. + * + * See this commit for a version that doesn't have this restriction: + * https://gitlab.freedesktop.org/mesa/mesa/-/commit/3c37183265f11e2ee6bc6d4d95e1580a41673636 + */ + assert(uninterleaved_surfs[0].alignment_B == uninterleaved_surfs[i].alignment_B); + assert(uninterleaved_surfs[0].row_pitch_B == uninterleaved_surfs[i].row_pitch_B); + assert(uninterleaved_surfs[0].tiling == uninterleaved_surfs[i].tiling); + + offset_align_B[i] = uninterleaved_surfs[i].alignment_B; + + /* If its a multi-planar video coding surface, make sure each offset + * is also aligned to a multiple of 16 * row_pitch_B relative to the + * first surface. + * + * SKL PRM Vol 2a, MFX_SURFACE_STATE::YOffsetForUCb: + * + * "For PLANAR_420 and PLANAR_422 surface formats, this field + * must be multiple of 16 pixels" + */ + if (uninterleaved_surfs[i].usage & ISL_SURF_USAGE_VIDEO_DECODE_BIT) { + offset_align_B[i] = + isl_lcm_u32(offset_align_B[i], + uninterleaved_surfs[0].row_pitch_B * 16); + } + + array_pitch_B = isl_align_npot(array_pitch_B, offset_align_B[i]); + } + + array_pitch_B += + uninterleaved_surfs[i].row_pitch_B * + align(uninterleaved_surfs[i].array_pitch_el_rows, tile_info.logical_extent_el.h); + } + for (uint32_t i = 0; i < total_surf; i++) { + array_pitch_B = align(array_pitch_B, uninterleaved_surfs[i].alignment_B); + } + + /* Recreate the surfaces using the computed interleaved array pitch. */ + uint64_t offset = 0; + for (uint32_t i = 0; i < total_surf; i++) { + struct isl_surf_init_info interleaved_info = infos[i]; + interleaved_info.array_pitch_B = array_pitch_B; + + result &= isl_surf_init_s(dev, surfs[i], &interleaved_info); + + struct isl_tile_info tile_info; + isl_surf_get_tile_info(&uninterleaved_surfs[i], &tile_info); + + if (i > 0) { + offset = isl_align_npot(offset, offset_align_B[i]); + } + + surfs_offsets[i] = offset; + + offset += uninterleaved_surfs[i].row_pitch_B * + align(uninterleaved_surfs[i].array_pitch_el_rows, + tile_info.logical_extent_el.h); + } + + return result; +} + /* Returns divisor+1 if divisor >= num. */ static int64_t find_next_divisor(int64_t divisor, int64_t num) diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index 7947839562c..2c09ae52a5a 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -1611,6 +1611,13 @@ struct isl_surf_init_info { */ uint32_t row_pitch_B; + /** + * Exact value to compute :c:member:`isl_surf.array_pitch_el_rows`. Ignored + * if zero. isl_surf_init() will fail if this is misaligned or out of + * bounds. + */ + uint64_t array_pitch_B; + isl_surf_usage_flags_t usage; /** Flags that alter how ISL selects isl_surf::tiling. */ @@ -2741,6 +2748,27 @@ isl_surf_init_s(const struct isl_device *dev, struct isl_surf *surf, const struct isl_surf_init_info *restrict info); +/* Maximum number of interleaved surfaces that can be created using + * isl_surf_init_interleaved_arrays + */ +#define ISL_SURF_MAX_INTERLEAVED_ARRAYS 3 + +/* Initializes multiple 2D array surfaces in a layout where the array + * slices of the surface are interleaved. The memory ranges of the + * resulting surfaces overlap, however the individual slices all occupy + * discrete tiles and should not conflict. If the surfaces have video + * usage bits set, the offsets of each will also be aligned to 16x the + * row pitch of the first surface. All of this is done so that + * multi-planar YCbCr array textures can be created with individual + * slices that are addressable to the media engine. GFX 8+ only. + */ +bool +isl_surf_init_interleaved_arrays(const struct isl_device *dev, + uint32_t total_surf, + struct isl_surf **surfs, + uint32_t *surfs_offsets, + const struct isl_surf_init_info *infos); + /* Return the largest surface possible for the specified memory range. */ void isl_surf_from_mem(const struct isl_device *isl_dev, diff --git a/src/intel/isl/isl_priv.h b/src/intel/isl/isl_priv.h index a660569d4af..38967722897 100644 --- a/src/intel/isl/isl_priv.h +++ b/src/intel/isl/isl_priv.h @@ -186,6 +186,38 @@ isl_minify(uint32_t n, uint32_t levels) return MAX(n >> levels, 1); } +/** + * Returns the greatest common divisor of a and b using Stein's algorithm. + */ +static uint32_t +isl_gcd_u32(uint32_t a, uint32_t b) +{ + assert(a > 0 || b > 0); + uint32_t k; + for (k = 0; ((a | b) & 1) == 0; ++k) { + a >>= 1; + b >>= 1; + } + while ((a & 1) == 0) + a >>= 1; + do { + while ((b & 1) == 0) + b >>= 1; + if (a > b) { + uint32_t tmp = a; + a = b; + b = tmp; + } + b = (b - a); + } while (b != 0); + return a << k; +} + +static inline uint32_t +isl_lcm_u32(uint32_t a, uint32_t b) { + return a / isl_gcd_u32(a, b) * b; +} + static inline struct isl_extent3d isl_extent3d_sa_to_el(enum isl_format fmt, struct isl_extent3d extent_sa) {