anv: support in-loop super resolution for AV1 decoding

Signed-off-by: Hyunjun Ko <zzoon@igalia.com>
Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32775>
This commit is contained in:
Hyunjun Ko 2024-12-23 10:39:17 +09:00 committed by Marge Bot
parent 788263501d
commit e510efed05

View file

@ -1272,6 +1272,20 @@ static const uint32_t av1_gm_trans_prec_diff = 10; /* Warp model precision
static const uint32_t av1_gm_trans_only_prec_diff = 13; /* Warp model precision bits - 3 */
static const uint32_t av1_gm_alpha_prec_diff = 1; /* Warp model precision bits - gm alpha precision bits */
static const uint32_t av1_max_mib_size_log2 = 5;
static const uint32_t av1_min_mib_size_log2 = 4;
static const uint32_t av1_mi_size_log2 = 2;
static const uint32_t av1_rs_scale_subpel_bits = 14;
static const uint32_t av1_rs_scale_subpel_mask = 16383;
static const uint32_t av1_rs_scale_extra_off = 128;
static const uint32_t av1_mfmv_stack_size = 3;
static int32_t chroma_xstep_qn = 0;
static int32_t luma_xstep_qn = 0;
static int32_t chroma_x0_qn[64] = { 0, };
static int32_t luma_x0_qn[64] = { 0, };
static uint32_t
get_qindex(const VkVideoDecodeAV1PictureInfoKHR *av1_pic_info,
uint32_t segment_id)
@ -1318,7 +1332,7 @@ anv_av1_decode_video_tile(struct anv_cmd_buffer *cmd_buffer,
ANV_FROM_HANDLE(anv_buffer, src_buffer, frame_info->srcBuffer);
struct anv_video_session *vid = cmd_buffer->video.vid;
struct anv_video_session_params *params = cmd_buffer->video.params;
const struct VkVideoDecodeAV1PictureInfoKHR *av1_pic_info =
const VkVideoDecodeAV1PictureInfoKHR *av1_pic_info =
vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_AV1_PICTURE_INFO_KHR);
const StdVideoDecodeAV1PictureInfo *std_pic_info = av1_pic_info->pStdPictureInfo;
const StdVideoAV1SequenceHeader *seq_hdr = &params->vk.av1_dec.seq_hdr.base;
@ -1370,6 +1384,8 @@ anv_av1_decode_video_tile(struct anv_cmd_buffer *cmd_buffer,
uint8_t disable_frame_end_update_cdf;
uint8_t idx;
uint8_t frame_type;
uint32_t frame_width;
uint32_t frame_height;
} ref_info[STD_VIDEO_AV1_NUM_REF_FRAMES] = {};
const struct anv_image_view *dst_iv =
@ -1381,7 +1397,13 @@ anv_av1_decode_video_tile(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *dpb_img = dpb_iv->image;
const bool is_10bit = seq_hdr->pColorConfig->BitDepth == 10;
VkExtent2D frameExtent = frame_info->dstPictureResource.codedExtent;
int denom = std_pic_info->coded_denom + 9;
unsigned downscaled_width = (frameExtent.width * 8 + denom / 2) / denom;
ref_info[AV1_INTRA_FRAME].img = dpb_img;
ref_info[AV1_INTRA_FRAME].frame_width = frameExtent.width;
ref_info[AV1_INTRA_FRAME].frame_height = frameExtent.height;
if (dpb_img && frame_info->referenceSlotCount) {
ref_info[AV1_INTRA_FRAME].order_hint = std_pic_info->OrderHint;
@ -1405,6 +1427,8 @@ anv_av1_decode_video_tile(struct anv_cmd_buffer *cmd_buffer,
ref_info[i + 1].idx = idx;
ref_info[i + 1].frame_type = std_ref_info->frame_type;
ref_info[i + 1].frame_width = frameExtent.width;
ref_info[i + 1].frame_height = frameExtent.height;
ref_info[i + 1].img = ref_img;
ref_info[i + 1].order_hint = std_ref_info->OrderHint;
memcpy(ref_info[i + 1].ref_order_hints, std_ref_info->SavedOrderHints, STD_VIDEO_AV1_NUM_REF_FRAMES);
@ -1932,13 +1956,13 @@ anv_av1_decode_video_tile(struct anv_cmd_buffer *cmd_buffer,
assert (seq_hdr->flags.enable_order_hint);
int total = 2;
int total = av1_mfmv_stack_size - 1;
if (ref_info[AV1_LAST_FRAME].ref_order_hints[AV1_ALTREF_FRAME - AV1_LAST_FRAME + 1] !=
ref_info[AV1_GOLDEN_FRAME].order_hint) {
if (!frame_is_key_or_intra(ref_info[0 + 1].frame_type)) {
total = 3;
total = av1_mfmv_stack_size;
mfmv_ref[num_mfmv++] = AV1_LAST_FRAME - AV1_LAST_FRAME;
}
}
@ -2000,9 +2024,12 @@ anv_av1_decode_video_tile(struct anv_cmd_buffer *cmd_buffer,
frame_lossless &= lossless[i];
}
VkExtent2D frameExtent = frame_info->dstPictureResource.codedExtent;
anv_batch_emit(&cmd_buffer->batch, GENX(AVP_PIC_STATE), pic) {
pic.FrameWidth = frameExtent.width - 1;
if (std_pic_info->flags.use_superres) {
pic.FrameWidth = downscaled_width - 1;
} else {
pic.FrameWidth = frameExtent.width - 1;
}
pic.FrameHeight = frameExtent.height - 1;
if (seq_hdr->pColorConfig->BitDepth == 12)
@ -2136,25 +2163,22 @@ anv_av1_decode_video_tile(struct anv_cmd_buffer *cmd_buffer,
if (!frame_is_key_or_intra(std_pic_info->frame_type)) {
for (enum av1_ref_frame r = AV1_INTRA_FRAME; r <= AV1_ALTREF_FRAME; r++) {
const struct anv_image *ref_img = ref_info[r].img;
if (!ref_img)
continue;
int ref_width = ref_info[r].frame_width - 1;
int ref_height = ref_info[r].frame_height - 1;
int ref_width = ref_img->vk.extent.width - 1;
int ref_height = ref_img->vk.extent.height - 1;
int cur_frame_width = std_pic_info->flags.use_superres ? downscaled_width : frameExtent.width;
int cur_frame_height = frameExtent.height;
uint32_t h_scale_factor =
(ref_img->vk.extent.width * av1_scaling_factor + (frameExtent.width >> 1)) /
frameExtent.width;
((ref_width + 1) * av1_scaling_factor + (cur_frame_width >> 1)) / cur_frame_width;
uint32_t v_scale_factor =
(ref_img->vk.extent.height * av1_scaling_factor + (frameExtent.height >> 1)) /
frameExtent.height;
((ref_height + 1) * av1_scaling_factor + (cur_frame_height >> 1)) / cur_frame_height;
switch (r) {
case AV1_INTRA_FRAME:
pic.IntraFrameWidthinPixelMinus1 = frameExtent.width - 1;
pic.IntraFrameHeightinPixelMinus1 = frameExtent.height - 1;
pic.IntraFrameWidthinPixelMinus1 = cur_frame_width - 1;
pic.IntraFrameHeightinPixelMinus1 = cur_frame_height - 1;
pic.VerticalScaleFactorForIntra = av1_scaling_factor;
pic.HorizontalScaleFactorForIntra = av1_scaling_factor;
break;
@ -2329,7 +2353,7 @@ anv_av1_decode_video_tile(struct anv_cmd_buffer *cmd_buffer,
fil.CDEFUVStrength6 = cdef_uv_strengths[6];
fil.CDEFUVStrength7 = cdef_uv_strengths[7];
fil.SuperResUpscaledFrameWidthMinus1 = frameExtent.width - 1;
fil.SuperResDenom = std_pic_info->flags.use_superres ? 0 /*TODO*/ : 8;
fil.SuperResDenom = std_pic_info->flags.use_superres ? denom : 8;
fil.FrameLoopRestorationFilterLumaY = frame_restoration_type[0];
fil.FrameLoopRestorationFilterChromaU = frame_restoration_type[1];
fil.FrameLoopRestorationFilterChromaV = frame_restoration_type[2];
@ -2342,10 +2366,11 @@ anv_av1_decode_video_tile(struct anv_cmd_buffer *cmd_buffer,
fil.UseSameLoopRestorationUnitSizeChromasUVFlag = (frame_restoration_type[1] != 0 || frame_restoration_type[2] !=0) ?
std_pic_info->pLoopRestoration->LoopRestorationSize[0] == std_pic_info->pLoopRestoration->LoopRestorationSize[1] : false;
fil.LumaPlanex_step_qn = 0;
fil.LumaPlanex0_qn = 0;
fil.ChromaPlanex_step_qn = 0;
fil.ChromaPlanex0_qn = 0;
fil.LumaPlanex_step_qn = luma_xstep_qn;
fil.LumaPlanex0_qn = luma_x0_qn[tile_idx];
fil.ChromaPlanex_step_qn = chroma_xstep_qn;
fil.ChromaPlanex0_qn = chroma_x0_qn[tile_idx];
};
unsigned column = tile_idx % std_pic_info->pTileInfo->TileCols;
@ -2394,13 +2419,103 @@ anv_av1_decode_video_tile(struct anv_cmd_buffer *cmd_buffer,
}
}
static void
anv_av1_calculate_xstep_qn(struct anv_cmd_buffer *cmd_buffer,
const VkVideoDecodeInfoKHR *frame_info)
{
const VkVideoDecodeAV1PictureInfoKHR *av1_pic_info =
vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_AV1_PICTURE_INFO_KHR);
const StdVideoDecodeAV1PictureInfo *std_pic_info = av1_pic_info->pStdPictureInfo;
struct anv_video_session_params *params = cmd_buffer->video.params;
const StdVideoAV1SequenceHeader *seq_hdr = &params->vk.av1_dec.seq_hdr.base;
VkExtent2D frameExtent = frame_info->dstPictureResource.codedExtent;
unsigned tile_cols = std_pic_info->pTileInfo->TileCols;
if (!std_pic_info->flags.use_superres) {
luma_xstep_qn = chroma_xstep_qn = 0;
memset(luma_x0_qn, 0, sizeof(luma_x0_qn));
memset(chroma_x0_qn, 0, sizeof(chroma_x0_qn));
return;
}
int32_t mib_size_log2 = seq_hdr->flags.use_128x128_superblock ?
av1_max_mib_size_log2 : av1_min_mib_size_log2;
int32_t mi_cols = ALIGN(frameExtent.width, 8) >> mib_size_log2;
int denom = std_pic_info->coded_denom + 9;
unsigned downscaled_width = (frameExtent.width * 8 + denom / 2) / denom;
for (uint8_t i = 0; i < 2; i++) { /* i == 0 : luma, i == 1 : chroma */
int subsampling_x = seq_hdr->pColorConfig->subsampling_x;
int ssx = i & subsampling_x;
int downscaled = ALIGN(downscaled_width, 2) >> ssx;
int upscaled = ALIGN(frameExtent.width, 2) >> ssx;
int xstep_qn = ((downscaled << av1_rs_scale_subpel_bits) + upscaled / 2) / upscaled;
if (i == 0)
luma_xstep_qn = xstep_qn;
else
chroma_xstep_qn = xstep_qn;
int32_t err = upscaled * xstep_qn - (downscaled << av1_rs_scale_subpel_bits);
int32_t x0 = (-((upscaled - downscaled) << (av1_rs_scale_subpel_bits - 1)) + upscaled / 2) /
upscaled + av1_rs_scale_extra_off - err / 2;
x0 = (int32_t)(x0 & av1_rs_scale_subpel_mask);
for (unsigned j = 0; j < tile_cols; j++) {
int32_t tile_col_end_sb;
bool last_col = (j == tile_cols - 1);
if (i == 0)
luma_x0_qn[j] = x0;
else
chroma_x0_qn[j] = x0;
if (!last_col) {
tile_col_end_sb = std_pic_info->pTileInfo->pMiColStarts[j + 1];
} else {
tile_col_end_sb = std_pic_info->pTileInfo->pMiColStarts[tile_cols - 1] +
std_pic_info->pTileInfo->pWidthInSbsMinus1[tile_cols - 1];
}
int32_t mi_col_end = tile_col_end_sb >> mib_size_log2;
mi_col_end = MIN2(mi_col_end, mi_cols);
int32_t downscaled_x1 = mi_col_end << (av1_mi_size_log2 - ssx);
int32_t downscaled_x0 = std_pic_info->pTileInfo->pMiColStarts[j] << mib_size_log2 << (av1_mi_size_log2 - ssx);
int32_t src_w = downscaled_x1 - downscaled_x0;
int32_t upscaled_x0 = (downscaled_x0 * denom) / 8;
int32_t upscaled_x1;
if (last_col) {
upscaled_x1 = upscaled;
} else
upscaled_x1 = (downscaled_x1 * denom) / 8;
int32_t dst_w = upscaled_x1 - upscaled_x0;
x0 += (dst_w * xstep_qn) - (src_w << av1_rs_scale_subpel_bits);
}
}
}
static void
anv_av1_decode_video(struct anv_cmd_buffer *cmd_buffer,
const VkVideoDecodeInfoKHR *frame_info)
{
const struct VkVideoDecodeAV1PictureInfoKHR *av1_pic_info =
const VkVideoDecodeAV1PictureInfoKHR *av1_pic_info =
vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_AV1_PICTURE_INFO_KHR);
anv_av1_calculate_xstep_qn(cmd_buffer, frame_info);
for (unsigned t = 0; t < av1_pic_info->tileCount; t++)
anv_av1_decode_video_tile(cmd_buffer, frame_info, t);
}