mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 13:20:14 +01:00
isl: Add support for creating layered surfaces for video encode/decode
Adds support for creating layered surfaces with slices that are addressable to the media engine for video encoding and decoding. Co-authored-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35651>
This commit is contained in:
parent
3cb77cb144
commit
73608eb8b7
3 changed files with 191 additions and 2 deletions
|
|
@ -2402,9 +2402,23 @@ isl_calc_array_pitch_el_rows_gfx4_2d(
|
|||
|
||||
switch (array_pitch_span) {
|
||||
case ISL_ARRAY_PITCH_SPAN_COMPACT:
|
||||
pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
|
||||
/* If we have a request for a particular array pitch, inflate the physical
|
||||
* image size to accomodate that pitch.
|
||||
*/
|
||||
if (info->array_pitch_B) {
|
||||
assert(ISL_GFX_VER(dev) >= 8);
|
||||
uint32_t tiled_aligned_row_pitch_B =
|
||||
align((fmtl->bpb / 8) * phys_slice0_sa->w, tile_info->phys_extent_B.width);
|
||||
assert(info->array_pitch_B % tiled_aligned_row_pitch_B == 0);
|
||||
pitch_sa_rows = DIV_ROUND_UP(
|
||||
info->array_pitch_B, tiled_aligned_row_pitch_B);
|
||||
assert(pitch_sa_rows % image_align_sa->h == 0);
|
||||
} else {
|
||||
pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
|
||||
}
|
||||
break;
|
||||
case ISL_ARRAY_PITCH_SPAN_FULL: {
|
||||
assert(!info->array_pitch_B);
|
||||
/* The QPitch equation is found in the Broadwell PRM >> Volume 5:
|
||||
* Memory Views >> Common Surface Formats >> Surface Layout >> 2D
|
||||
* Surfaces >> Surface Arrays.
|
||||
|
|
@ -3323,8 +3337,20 @@ isl_calc_base_alignment(const struct isl_device *dev,
|
|||
*
|
||||
* "For Linear memory, this field specifies the stride in chunks of
|
||||
* 64 bytes (1 cache line)."
|
||||
*
|
||||
* From the ATSM PRM Vol 2d,
|
||||
* MFX_REFERENCE_PICTURE_BASE_ADDR::MFXReferencePictureAddress:
|
||||
*
|
||||
* "Specifies the 64 byte aligned reference frame buffer addresses"
|
||||
*
|
||||
* From the ATSM PRM Vol 2a,
|
||||
* HCP_PIPE_BUF_ADDR_STATE::ReferencePictureBaseAddress,
|
||||
* AVP_PIPE_BUF_ADDR_STATE::ReferenceFrameBufferBaseAddress:
|
||||
*
|
||||
* "Format: SplitBaseAddress64ByteAligned"
|
||||
*/
|
||||
if (isl_surf_usage_is_display(info->usage))
|
||||
if (isl_surf_usage_is_display(info->usage) ||
|
||||
(info->usage & ISL_SURF_USAGE_VIDEO_DECODE_BIT))
|
||||
base_alignment_B = MAX(base_alignment_B, 64);
|
||||
} else {
|
||||
const uint32_t tile_size_B = tile_info->phys_extent_B.width *
|
||||
|
|
@ -3383,6 +3409,14 @@ isl_calc_base_alignment(const struct isl_device *dev,
|
|||
if (info->usage & ISL_SURF_USAGE_SPARSE_BIT)
|
||||
base_alignment_B = MAX(base_alignment_B, 64 * 1024);
|
||||
|
||||
/* ATS-M PRM Vol 2d, MFX_PIPE_BUF_ADDR_STATE::PostDeblockingDestinationAddress:
|
||||
*
|
||||
* "Specifies the 4K byte aligned frame buffer address for outputting
|
||||
* the post-loop filtered reconstructed YUV picture"
|
||||
*/
|
||||
if (info->usage & ISL_SURF_USAGE_VIDEO_DECODE_BIT)
|
||||
base_alignment_B = MAX(base_alignment_B, 4 * 1024);
|
||||
|
||||
return base_alignment_B;
|
||||
}
|
||||
|
||||
|
|
@ -3485,6 +3519,101 @@ isl_surf_init_s(const struct isl_device *dev,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
isl_surf_init_interleaved_arrays(const struct isl_device *dev,
|
||||
uint32_t total_surf,
|
||||
struct isl_surf **surfs,
|
||||
uint32_t *surfs_offsets,
|
||||
const struct isl_surf_init_info *infos)
|
||||
{
|
||||
/* Adjusting the array pitch is only supported on GFX 8+ */
|
||||
assert(ISL_GFX_VER(dev) >= 8);
|
||||
assert(total_surf <= ISL_SURF_MAX_INTERLEAVED_ARRAYS);
|
||||
|
||||
/* Do a first pass to gather uninterleave surface layouts */
|
||||
bool result = true;
|
||||
struct isl_surf uninterleaved_surfs[ISL_SURF_MAX_INTERLEAVED_ARRAYS];
|
||||
uint32_t offset_align_B[ISL_SURF_MAX_INTERLEAVED_ARRAYS];
|
||||
for (uint32_t i = 0; i < total_surf; i++)
|
||||
result &= isl_surf_init_s(dev, &uninterleaved_surfs[i], &infos[i]);
|
||||
|
||||
if (!result)
|
||||
return result;
|
||||
|
||||
/* Compute a single slice pitch by adding up each of the surface's slice
|
||||
* size. Take care to align the each surface to its alignment requirement
|
||||
* and align the size of each slice to a full tile.
|
||||
*/
|
||||
uint64_t array_pitch_B = 0;
|
||||
for (uint32_t i = 0; i < total_surf; i++) {
|
||||
struct isl_tile_info tile_info;
|
||||
isl_surf_get_tile_info(&uninterleaved_surfs[i], &tile_info);
|
||||
|
||||
if (i > 0) {
|
||||
/* Combining surfaces with different alignments, row pitches, or tiling
|
||||
* is not handled properly, as NV12+TileY is the only layout currently
|
||||
* supported by the driver in this type of surface.
|
||||
*
|
||||
* See this commit for a version that doesn't have this restriction:
|
||||
* https://gitlab.freedesktop.org/mesa/mesa/-/commit/3c37183265f11e2ee6bc6d4d95e1580a41673636
|
||||
*/
|
||||
assert(uninterleaved_surfs[0].alignment_B == uninterleaved_surfs[i].alignment_B);
|
||||
assert(uninterleaved_surfs[0].row_pitch_B == uninterleaved_surfs[i].row_pitch_B);
|
||||
assert(uninterleaved_surfs[0].tiling == uninterleaved_surfs[i].tiling);
|
||||
|
||||
offset_align_B[i] = uninterleaved_surfs[i].alignment_B;
|
||||
|
||||
/* If its a multi-planar video coding surface, make sure each offset
|
||||
* is also aligned to a multiple of 16 * row_pitch_B relative to the
|
||||
* first surface.
|
||||
*
|
||||
* SKL PRM Vol 2a, MFX_SURFACE_STATE::YOffsetForUCb:
|
||||
*
|
||||
* "For PLANAR_420 and PLANAR_422 surface formats, this field
|
||||
* must be multiple of 16 pixels"
|
||||
*/
|
||||
if (uninterleaved_surfs[i].usage & ISL_SURF_USAGE_VIDEO_DECODE_BIT) {
|
||||
offset_align_B[i] =
|
||||
isl_lcm_u32(offset_align_B[i],
|
||||
uninterleaved_surfs[0].row_pitch_B * 16);
|
||||
}
|
||||
|
||||
array_pitch_B = isl_align_npot(array_pitch_B, offset_align_B[i]);
|
||||
}
|
||||
|
||||
array_pitch_B +=
|
||||
uninterleaved_surfs[i].row_pitch_B *
|
||||
align(uninterleaved_surfs[i].array_pitch_el_rows, tile_info.logical_extent_el.h);
|
||||
}
|
||||
for (uint32_t i = 0; i < total_surf; i++) {
|
||||
array_pitch_B = align(array_pitch_B, uninterleaved_surfs[i].alignment_B);
|
||||
}
|
||||
|
||||
/* Recreate the surfaces using the computed interleaved array pitch. */
|
||||
uint64_t offset = 0;
|
||||
for (uint32_t i = 0; i < total_surf; i++) {
|
||||
struct isl_surf_init_info interleaved_info = infos[i];
|
||||
interleaved_info.array_pitch_B = array_pitch_B;
|
||||
|
||||
result &= isl_surf_init_s(dev, surfs[i], &interleaved_info);
|
||||
|
||||
struct isl_tile_info tile_info;
|
||||
isl_surf_get_tile_info(&uninterleaved_surfs[i], &tile_info);
|
||||
|
||||
if (i > 0) {
|
||||
offset = isl_align_npot(offset, offset_align_B[i]);
|
||||
}
|
||||
|
||||
surfs_offsets[i] = offset;
|
||||
|
||||
offset += uninterleaved_surfs[i].row_pitch_B *
|
||||
align(uninterleaved_surfs[i].array_pitch_el_rows,
|
||||
tile_info.logical_extent_el.h);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Returns divisor+1 if divisor >= num. */
|
||||
static int64_t
|
||||
find_next_divisor(int64_t divisor, int64_t num)
|
||||
|
|
|
|||
|
|
@ -1611,6 +1611,13 @@ struct isl_surf_init_info {
|
|||
*/
|
||||
uint32_t row_pitch_B;
|
||||
|
||||
/**
|
||||
* Exact value to compute :c:member:`isl_surf.array_pitch_el_rows`. Ignored
|
||||
* if zero. isl_surf_init() will fail if this is misaligned or out of
|
||||
* bounds.
|
||||
*/
|
||||
uint64_t array_pitch_B;
|
||||
|
||||
isl_surf_usage_flags_t usage;
|
||||
|
||||
/** Flags that alter how ISL selects isl_surf::tiling. */
|
||||
|
|
@ -2741,6 +2748,27 @@ isl_surf_init_s(const struct isl_device *dev,
|
|||
struct isl_surf *surf,
|
||||
const struct isl_surf_init_info *restrict info);
|
||||
|
||||
/* Maximum number of interleaved surfaces that can be created using
|
||||
* isl_surf_init_interleaved_arrays
|
||||
*/
|
||||
#define ISL_SURF_MAX_INTERLEAVED_ARRAYS 3
|
||||
|
||||
/* Initializes multiple 2D array surfaces in a layout where the array
|
||||
* slices of the surface are interleaved. The memory ranges of the
|
||||
* resulting surfaces overlap, however the individual slices all occupy
|
||||
* discrete tiles and should not conflict. If the surfaces have video
|
||||
* usage bits set, the offsets of each will also be aligned to 16x the
|
||||
* row pitch of the first surface. All of this is done so that
|
||||
* multi-planar YCbCr array textures can be created with individual
|
||||
* slices that are addressable to the media engine. GFX 8+ only.
|
||||
*/
|
||||
bool
|
||||
isl_surf_init_interleaved_arrays(const struct isl_device *dev,
|
||||
uint32_t total_surf,
|
||||
struct isl_surf **surfs,
|
||||
uint32_t *surfs_offsets,
|
||||
const struct isl_surf_init_info *infos);
|
||||
|
||||
/* Return the largest surface possible for the specified memory range. */
|
||||
void
|
||||
isl_surf_from_mem(const struct isl_device *isl_dev,
|
||||
|
|
|
|||
|
|
@ -186,6 +186,38 @@ isl_minify(uint32_t n, uint32_t levels)
|
|||
return MAX(n >> levels, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the greatest common divisor of a and b using Stein's algorithm.
|
||||
*/
|
||||
static uint32_t
|
||||
isl_gcd_u32(uint32_t a, uint32_t b)
|
||||
{
|
||||
assert(a > 0 || b > 0);
|
||||
uint32_t k;
|
||||
for (k = 0; ((a | b) & 1) == 0; ++k) {
|
||||
a >>= 1;
|
||||
b >>= 1;
|
||||
}
|
||||
while ((a & 1) == 0)
|
||||
a >>= 1;
|
||||
do {
|
||||
while ((b & 1) == 0)
|
||||
b >>= 1;
|
||||
if (a > b) {
|
||||
uint32_t tmp = a;
|
||||
a = b;
|
||||
b = tmp;
|
||||
}
|
||||
b = (b - a);
|
||||
} while (b != 0);
|
||||
return a << k;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
isl_lcm_u32(uint32_t a, uint32_t b) {
|
||||
return a / isl_gcd_u32(a, b) * b;
|
||||
}
|
||||
|
||||
static inline struct isl_extent3d
|
||||
isl_extent3d_sa_to_el(enum isl_format fmt, struct isl_extent3d extent_sa)
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue