mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 17:40:11 +01:00
isl: Add support for creating layered surfaces for video encode/decode
Adds support for creating layered surfaces with slices that are addressable to the media engine for video encoding and decoding. Co-authored-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35651>
This commit is contained in:
parent
3cb77cb144
commit
73608eb8b7
3 changed files with 191 additions and 2 deletions
|
|
@ -2402,9 +2402,23 @@ isl_calc_array_pitch_el_rows_gfx4_2d(
|
||||||
|
|
||||||
switch (array_pitch_span) {
|
switch (array_pitch_span) {
|
||||||
case ISL_ARRAY_PITCH_SPAN_COMPACT:
|
case ISL_ARRAY_PITCH_SPAN_COMPACT:
|
||||||
pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
|
/* If we have a request for a particular array pitch, inflate the physical
|
||||||
|
* image size to accomodate that pitch.
|
||||||
|
*/
|
||||||
|
if (info->array_pitch_B) {
|
||||||
|
assert(ISL_GFX_VER(dev) >= 8);
|
||||||
|
uint32_t tiled_aligned_row_pitch_B =
|
||||||
|
align((fmtl->bpb / 8) * phys_slice0_sa->w, tile_info->phys_extent_B.width);
|
||||||
|
assert(info->array_pitch_B % tiled_aligned_row_pitch_B == 0);
|
||||||
|
pitch_sa_rows = DIV_ROUND_UP(
|
||||||
|
info->array_pitch_B, tiled_aligned_row_pitch_B);
|
||||||
|
assert(pitch_sa_rows % image_align_sa->h == 0);
|
||||||
|
} else {
|
||||||
|
pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case ISL_ARRAY_PITCH_SPAN_FULL: {
|
case ISL_ARRAY_PITCH_SPAN_FULL: {
|
||||||
|
assert(!info->array_pitch_B);
|
||||||
/* The QPitch equation is found in the Broadwell PRM >> Volume 5:
|
/* The QPitch equation is found in the Broadwell PRM >> Volume 5:
|
||||||
* Memory Views >> Common Surface Formats >> Surface Layout >> 2D
|
* Memory Views >> Common Surface Formats >> Surface Layout >> 2D
|
||||||
* Surfaces >> Surface Arrays.
|
* Surfaces >> Surface Arrays.
|
||||||
|
|
@ -3323,8 +3337,20 @@ isl_calc_base_alignment(const struct isl_device *dev,
|
||||||
*
|
*
|
||||||
* "For Linear memory, this field specifies the stride in chunks of
|
* "For Linear memory, this field specifies the stride in chunks of
|
||||||
* 64 bytes (1 cache line)."
|
* 64 bytes (1 cache line)."
|
||||||
|
*
|
||||||
|
* From the ATSM PRM Vol 2d,
|
||||||
|
* MFX_REFERENCE_PICTURE_BASE_ADDR::MFXReferencePictureAddress:
|
||||||
|
*
|
||||||
|
* "Specifies the 64 byte aligned reference frame buffer addresses"
|
||||||
|
*
|
||||||
|
* From the ATSM PRM Vol 2a,
|
||||||
|
* HCP_PIPE_BUF_ADDR_STATE::ReferencePictureBaseAddress,
|
||||||
|
* AVP_PIPE_BUF_ADDR_STATE::ReferenceFrameBufferBaseAddress:
|
||||||
|
*
|
||||||
|
* "Format: SplitBaseAddress64ByteAligned"
|
||||||
*/
|
*/
|
||||||
if (isl_surf_usage_is_display(info->usage))
|
if (isl_surf_usage_is_display(info->usage) ||
|
||||||
|
(info->usage & ISL_SURF_USAGE_VIDEO_DECODE_BIT))
|
||||||
base_alignment_B = MAX(base_alignment_B, 64);
|
base_alignment_B = MAX(base_alignment_B, 64);
|
||||||
} else {
|
} else {
|
||||||
const uint32_t tile_size_B = tile_info->phys_extent_B.width *
|
const uint32_t tile_size_B = tile_info->phys_extent_B.width *
|
||||||
|
|
@ -3383,6 +3409,14 @@ isl_calc_base_alignment(const struct isl_device *dev,
|
||||||
if (info->usage & ISL_SURF_USAGE_SPARSE_BIT)
|
if (info->usage & ISL_SURF_USAGE_SPARSE_BIT)
|
||||||
base_alignment_B = MAX(base_alignment_B, 64 * 1024);
|
base_alignment_B = MAX(base_alignment_B, 64 * 1024);
|
||||||
|
|
||||||
|
/* ATS-M PRM Vol 2d, MFX_PIPE_BUF_ADDR_STATE::PostDeblockingDestinationAddress:
|
||||||
|
*
|
||||||
|
* "Specifies the 4K byte aligned frame buffer address for outputting
|
||||||
|
* the post-loop filtered reconstructed YUV picture"
|
||||||
|
*/
|
||||||
|
if (info->usage & ISL_SURF_USAGE_VIDEO_DECODE_BIT)
|
||||||
|
base_alignment_B = MAX(base_alignment_B, 4 * 1024);
|
||||||
|
|
||||||
return base_alignment_B;
|
return base_alignment_B;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -3485,6 +3519,101 @@ isl_surf_init_s(const struct isl_device *dev,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
isl_surf_init_interleaved_arrays(const struct isl_device *dev,
|
||||||
|
uint32_t total_surf,
|
||||||
|
struct isl_surf **surfs,
|
||||||
|
uint32_t *surfs_offsets,
|
||||||
|
const struct isl_surf_init_info *infos)
|
||||||
|
{
|
||||||
|
/* Adjusting the array pitch is only supported on GFX 8+ */
|
||||||
|
assert(ISL_GFX_VER(dev) >= 8);
|
||||||
|
assert(total_surf <= ISL_SURF_MAX_INTERLEAVED_ARRAYS);
|
||||||
|
|
||||||
|
/* Do a first pass to gather uninterleave surface layouts */
|
||||||
|
bool result = true;
|
||||||
|
struct isl_surf uninterleaved_surfs[ISL_SURF_MAX_INTERLEAVED_ARRAYS];
|
||||||
|
uint32_t offset_align_B[ISL_SURF_MAX_INTERLEAVED_ARRAYS];
|
||||||
|
for (uint32_t i = 0; i < total_surf; i++)
|
||||||
|
result &= isl_surf_init_s(dev, &uninterleaved_surfs[i], &infos[i]);
|
||||||
|
|
||||||
|
if (!result)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
/* Compute a single slice pitch by adding up each of the surface's slice
|
||||||
|
* size. Take care to align the each surface to its alignment requirement
|
||||||
|
* and align the size of each slice to a full tile.
|
||||||
|
*/
|
||||||
|
uint64_t array_pitch_B = 0;
|
||||||
|
for (uint32_t i = 0; i < total_surf; i++) {
|
||||||
|
struct isl_tile_info tile_info;
|
||||||
|
isl_surf_get_tile_info(&uninterleaved_surfs[i], &tile_info);
|
||||||
|
|
||||||
|
if (i > 0) {
|
||||||
|
/* Combining surfaces with different alignments, row pitches, or tiling
|
||||||
|
* is not handled properly, as NV12+TileY is the only layout currently
|
||||||
|
* supported by the driver in this type of surface.
|
||||||
|
*
|
||||||
|
* See this commit for a version that doesn't have this restriction:
|
||||||
|
* https://gitlab.freedesktop.org/mesa/mesa/-/commit/3c37183265f11e2ee6bc6d4d95e1580a41673636
|
||||||
|
*/
|
||||||
|
assert(uninterleaved_surfs[0].alignment_B == uninterleaved_surfs[i].alignment_B);
|
||||||
|
assert(uninterleaved_surfs[0].row_pitch_B == uninterleaved_surfs[i].row_pitch_B);
|
||||||
|
assert(uninterleaved_surfs[0].tiling == uninterleaved_surfs[i].tiling);
|
||||||
|
|
||||||
|
offset_align_B[i] = uninterleaved_surfs[i].alignment_B;
|
||||||
|
|
||||||
|
/* If its a multi-planar video coding surface, make sure each offset
|
||||||
|
* is also aligned to a multiple of 16 * row_pitch_B relative to the
|
||||||
|
* first surface.
|
||||||
|
*
|
||||||
|
* SKL PRM Vol 2a, MFX_SURFACE_STATE::YOffsetForUCb:
|
||||||
|
*
|
||||||
|
* "For PLANAR_420 and PLANAR_422 surface formats, this field
|
||||||
|
* must be multiple of 16 pixels"
|
||||||
|
*/
|
||||||
|
if (uninterleaved_surfs[i].usage & ISL_SURF_USAGE_VIDEO_DECODE_BIT) {
|
||||||
|
offset_align_B[i] =
|
||||||
|
isl_lcm_u32(offset_align_B[i],
|
||||||
|
uninterleaved_surfs[0].row_pitch_B * 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
array_pitch_B = isl_align_npot(array_pitch_B, offset_align_B[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
array_pitch_B +=
|
||||||
|
uninterleaved_surfs[i].row_pitch_B *
|
||||||
|
align(uninterleaved_surfs[i].array_pitch_el_rows, tile_info.logical_extent_el.h);
|
||||||
|
}
|
||||||
|
for (uint32_t i = 0; i < total_surf; i++) {
|
||||||
|
array_pitch_B = align(array_pitch_B, uninterleaved_surfs[i].alignment_B);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Recreate the surfaces using the computed interleaved array pitch. */
|
||||||
|
uint64_t offset = 0;
|
||||||
|
for (uint32_t i = 0; i < total_surf; i++) {
|
||||||
|
struct isl_surf_init_info interleaved_info = infos[i];
|
||||||
|
interleaved_info.array_pitch_B = array_pitch_B;
|
||||||
|
|
||||||
|
result &= isl_surf_init_s(dev, surfs[i], &interleaved_info);
|
||||||
|
|
||||||
|
struct isl_tile_info tile_info;
|
||||||
|
isl_surf_get_tile_info(&uninterleaved_surfs[i], &tile_info);
|
||||||
|
|
||||||
|
if (i > 0) {
|
||||||
|
offset = isl_align_npot(offset, offset_align_B[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
surfs_offsets[i] = offset;
|
||||||
|
|
||||||
|
offset += uninterleaved_surfs[i].row_pitch_B *
|
||||||
|
align(uninterleaved_surfs[i].array_pitch_el_rows,
|
||||||
|
tile_info.logical_extent_el.h);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/* Returns divisor+1 if divisor >= num. */
|
/* Returns divisor+1 if divisor >= num. */
|
||||||
static int64_t
|
static int64_t
|
||||||
find_next_divisor(int64_t divisor, int64_t num)
|
find_next_divisor(int64_t divisor, int64_t num)
|
||||||
|
|
|
||||||
|
|
@ -1611,6 +1611,13 @@ struct isl_surf_init_info {
|
||||||
*/
|
*/
|
||||||
uint32_t row_pitch_B;
|
uint32_t row_pitch_B;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Exact value to compute :c:member:`isl_surf.array_pitch_el_rows`. Ignored
|
||||||
|
* if zero. isl_surf_init() will fail if this is misaligned or out of
|
||||||
|
* bounds.
|
||||||
|
*/
|
||||||
|
uint64_t array_pitch_B;
|
||||||
|
|
||||||
isl_surf_usage_flags_t usage;
|
isl_surf_usage_flags_t usage;
|
||||||
|
|
||||||
/** Flags that alter how ISL selects isl_surf::tiling. */
|
/** Flags that alter how ISL selects isl_surf::tiling. */
|
||||||
|
|
@ -2741,6 +2748,27 @@ isl_surf_init_s(const struct isl_device *dev,
|
||||||
struct isl_surf *surf,
|
struct isl_surf *surf,
|
||||||
const struct isl_surf_init_info *restrict info);
|
const struct isl_surf_init_info *restrict info);
|
||||||
|
|
||||||
|
/* Maximum number of interleaved surfaces that can be created using
|
||||||
|
* isl_surf_init_interleaved_arrays
|
||||||
|
*/
|
||||||
|
#define ISL_SURF_MAX_INTERLEAVED_ARRAYS 3
|
||||||
|
|
||||||
|
/* Initializes multiple 2D array surfaces in a layout where the array
|
||||||
|
* slices of the surface are interleaved. The memory ranges of the
|
||||||
|
* resulting surfaces overlap, however the individual slices all occupy
|
||||||
|
* discrete tiles and should not conflict. If the surfaces have video
|
||||||
|
* usage bits set, the offsets of each will also be aligned to 16x the
|
||||||
|
* row pitch of the first surface. All of this is done so that
|
||||||
|
* multi-planar YCbCr array textures can be created with individual
|
||||||
|
* slices that are addressable to the media engine. GFX 8+ only.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
isl_surf_init_interleaved_arrays(const struct isl_device *dev,
|
||||||
|
uint32_t total_surf,
|
||||||
|
struct isl_surf **surfs,
|
||||||
|
uint32_t *surfs_offsets,
|
||||||
|
const struct isl_surf_init_info *infos);
|
||||||
|
|
||||||
/* Return the largest surface possible for the specified memory range. */
|
/* Return the largest surface possible for the specified memory range. */
|
||||||
void
|
void
|
||||||
isl_surf_from_mem(const struct isl_device *isl_dev,
|
isl_surf_from_mem(const struct isl_device *isl_dev,
|
||||||
|
|
|
||||||
|
|
@ -186,6 +186,38 @@ isl_minify(uint32_t n, uint32_t levels)
|
||||||
return MAX(n >> levels, 1);
|
return MAX(n >> levels, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the greatest common divisor of a and b using Stein's algorithm.
|
||||||
|
*/
|
||||||
|
static uint32_t
|
||||||
|
isl_gcd_u32(uint32_t a, uint32_t b)
|
||||||
|
{
|
||||||
|
assert(a > 0 || b > 0);
|
||||||
|
uint32_t k;
|
||||||
|
for (k = 0; ((a | b) & 1) == 0; ++k) {
|
||||||
|
a >>= 1;
|
||||||
|
b >>= 1;
|
||||||
|
}
|
||||||
|
while ((a & 1) == 0)
|
||||||
|
a >>= 1;
|
||||||
|
do {
|
||||||
|
while ((b & 1) == 0)
|
||||||
|
b >>= 1;
|
||||||
|
if (a > b) {
|
||||||
|
uint32_t tmp = a;
|
||||||
|
a = b;
|
||||||
|
b = tmp;
|
||||||
|
}
|
||||||
|
b = (b - a);
|
||||||
|
} while (b != 0);
|
||||||
|
return a << k;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint32_t
|
||||||
|
isl_lcm_u32(uint32_t a, uint32_t b) {
|
||||||
|
return a / isl_gcd_u32(a, b) * b;
|
||||||
|
}
|
||||||
|
|
||||||
static inline struct isl_extent3d
|
static inline struct isl_extent3d
|
||||||
isl_extent3d_sa_to_el(enum isl_format fmt, struct isl_extent3d extent_sa)
|
isl_extent3d_sa_to_el(enum isl_format fmt, struct isl_extent3d extent_sa)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue