anv: support in-loop super resolution for AV1 decoding

Signed-off-by: Hyunjun Ko <zzoon@igalia.com> Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32775>
2026-01-04 02:40:11 +01:00 · 2024-12-23 10:39:17 +09:00 · 2024-12-23 10:39:17 +09:00 · e510efed05
commit e510efed05
parent 788263501d
1 changed files with 137 additions and 22 deletions
--- a/src/intel/vulkan/genX_cmd_video.c
+++ b/src/intel/vulkan/genX_cmd_video.c
@ -1272,6 +1272,20 @@ static const uint32_t av1_gm_trans_prec_diff      = 10;  /* Warp model precision
 static const uint32_t av1_gm_trans_only_prec_diff = 13;  /* Warp model precision bits - 3 */
 static const uint32_t av1_gm_alpha_prec_diff      = 1;   /* Warp model precision bits - gm alpha precision bits */

+static const uint32_t av1_max_mib_size_log2 = 5;
+static const uint32_t av1_min_mib_size_log2 = 4;
+static const uint32_t av1_mi_size_log2 = 2;
+
+static const uint32_t av1_rs_scale_subpel_bits = 14;
+static const uint32_t av1_rs_scale_subpel_mask = 16383;
+static const uint32_t av1_rs_scale_extra_off = 128;
+static const uint32_t av1_mfmv_stack_size = 3;
+
+static int32_t chroma_xstep_qn = 0;
+static int32_t luma_xstep_qn = 0;
+static int32_t chroma_x0_qn[64] = { 0, };
+static int32_t luma_x0_qn[64] = { 0, };
+
 static uint32_t
 get_qindex(const VkVideoDecodeAV1PictureInfoKHR *av1_pic_info,
           uint32_t segment_id)
@ -1318,7 +1332,7 @@ anv_av1_decode_video_tile(struct anv_cmd_buffer *cmd_buffer,
   ANV_FROM_HANDLE(anv_buffer, src_buffer, frame_info->srcBuffer);
   struct anv_video_session *vid = cmd_buffer->video.vid;
   struct anv_video_session_params *params = cmd_buffer->video.params;
-   const struct VkVideoDecodeAV1PictureInfoKHR *av1_pic_info =
+   const VkVideoDecodeAV1PictureInfoKHR *av1_pic_info =
      vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_AV1_PICTURE_INFO_KHR);
   const StdVideoDecodeAV1PictureInfo *std_pic_info = av1_pic_info->pStdPictureInfo;
   const StdVideoAV1SequenceHeader *seq_hdr = &params->vk.av1_dec.seq_hdr.base;
@ -1370,6 +1384,8 @@ anv_av1_decode_video_tile(struct anv_cmd_buffer *cmd_buffer,
      uint8_t disable_frame_end_update_cdf;
      uint8_t idx;
      uint8_t frame_type;
+      uint32_t frame_width;
+      uint32_t frame_height;
   } ref_info[STD_VIDEO_AV1_NUM_REF_FRAMES] = {};

   const struct anv_image_view *dst_iv =
@ -1381,7 +1397,13 @@ anv_av1_decode_video_tile(struct anv_cmd_buffer *cmd_buffer,
   const struct anv_image *dpb_img = dpb_iv->image;
   const bool is_10bit = seq_hdr->pColorConfig->BitDepth == 10;

+   VkExtent2D frameExtent = frame_info->dstPictureResource.codedExtent;
+   int denom = std_pic_info->coded_denom + 9;
+   unsigned downscaled_width = (frameExtent.width * 8 + denom / 2) / denom;
+
   ref_info[AV1_INTRA_FRAME].img = dpb_img;
+   ref_info[AV1_INTRA_FRAME].frame_width = frameExtent.width;
+   ref_info[AV1_INTRA_FRAME].frame_height = frameExtent.height;

   if (dpb_img && frame_info->referenceSlotCount) {
      ref_info[AV1_INTRA_FRAME].order_hint = std_pic_info->OrderHint;
@ -1405,6 +1427,8 @@ anv_av1_decode_video_tile(struct anv_cmd_buffer *cmd_buffer,

            ref_info[i + 1].idx = idx;
            ref_info[i + 1].frame_type = std_ref_info->frame_type;
+            ref_info[i + 1].frame_width = frameExtent.width;
+            ref_info[i + 1].frame_height = frameExtent.height;
            ref_info[i + 1].img = ref_img;
            ref_info[i + 1].order_hint = std_ref_info->OrderHint;
            memcpy(ref_info[i + 1].ref_order_hints, std_ref_info->SavedOrderHints, STD_VIDEO_AV1_NUM_REF_FRAMES);
@ -1932,13 +1956,13 @@ anv_av1_decode_video_tile(struct anv_cmd_buffer *cmd_buffer,

      assert (seq_hdr->flags.enable_order_hint);

-      int total = 2;
+      int total = av1_mfmv_stack_size - 1;

      if (ref_info[AV1_LAST_FRAME].ref_order_hints[AV1_ALTREF_FRAME - AV1_LAST_FRAME + 1] !=
          ref_info[AV1_GOLDEN_FRAME].order_hint) {

         if (!frame_is_key_or_intra(ref_info[0 + 1].frame_type)) {
-            total = 3;
+            total = av1_mfmv_stack_size;
            mfmv_ref[num_mfmv++] = AV1_LAST_FRAME - AV1_LAST_FRAME;
         }
      }
@ -2000,9 +2024,12 @@ anv_av1_decode_video_tile(struct anv_cmd_buffer *cmd_buffer,
      frame_lossless &= lossless[i];
   }

-   VkExtent2D frameExtent = frame_info->dstPictureResource.codedExtent;
   anv_batch_emit(&cmd_buffer->batch, GENX(AVP_PIC_STATE), pic) {
-      pic.FrameWidth = frameExtent.width - 1;
+      if (std_pic_info->flags.use_superres) {
+         pic.FrameWidth = downscaled_width - 1;
+      } else {
+         pic.FrameWidth = frameExtent.width - 1;
+      }
      pic.FrameHeight = frameExtent.height - 1;

      if (seq_hdr->pColorConfig->BitDepth == 12)
@ -2136,25 +2163,22 @@ anv_av1_decode_video_tile(struct anv_cmd_buffer *cmd_buffer,

      if (!frame_is_key_or_intra(std_pic_info->frame_type)) {
         for (enum av1_ref_frame r = AV1_INTRA_FRAME; r <= AV1_ALTREF_FRAME; r++) {
-            const struct anv_image *ref_img = ref_info[r].img;

-            if (!ref_img)
-                continue;
+            int ref_width = ref_info[r].frame_width - 1;
+            int ref_height = ref_info[r].frame_height - 1;

-            int ref_width = ref_img->vk.extent.width - 1;
-            int ref_height = ref_img->vk.extent.height - 1;
+            int cur_frame_width = std_pic_info->flags.use_superres ? downscaled_width : frameExtent.width;
+            int cur_frame_height = frameExtent.height;

            uint32_t h_scale_factor =
-               (ref_img->vk.extent.width * av1_scaling_factor + (frameExtent.width >> 1)) /
-               frameExtent.width;
+               ((ref_width + 1) * av1_scaling_factor + (cur_frame_width >> 1)) / cur_frame_width;
            uint32_t v_scale_factor =
-               (ref_img->vk.extent.height * av1_scaling_factor + (frameExtent.height >> 1)) /
-               frameExtent.height;
+               ((ref_height + 1) * av1_scaling_factor + (cur_frame_height >> 1)) / cur_frame_height;

            switch (r) {
            case AV1_INTRA_FRAME:
-               pic.IntraFrameWidthinPixelMinus1 = frameExtent.width - 1;
-               pic.IntraFrameHeightinPixelMinus1 = frameExtent.height - 1;
+               pic.IntraFrameWidthinPixelMinus1 = cur_frame_width - 1;
+               pic.IntraFrameHeightinPixelMinus1 = cur_frame_height - 1;
               pic.VerticalScaleFactorForIntra = av1_scaling_factor;
               pic.HorizontalScaleFactorForIntra = av1_scaling_factor;
               break;
@ -2329,7 +2353,7 @@ anv_av1_decode_video_tile(struct anv_cmd_buffer *cmd_buffer,
      fil.CDEFUVStrength6 = cdef_uv_strengths[6];
      fil.CDEFUVStrength7 = cdef_uv_strengths[7];
      fil.SuperResUpscaledFrameWidthMinus1 = frameExtent.width - 1;
-      fil.SuperResDenom = std_pic_info->flags.use_superres ? 0 /*TODO*/ : 8;
+      fil.SuperResDenom = std_pic_info->flags.use_superres ? denom : 8;
      fil.FrameLoopRestorationFilterLumaY = frame_restoration_type[0];
      fil.FrameLoopRestorationFilterChromaU = frame_restoration_type[1];
      fil.FrameLoopRestorationFilterChromaV = frame_restoration_type[2];
@ -2342,10 +2366,11 @@ anv_av1_decode_video_tile(struct anv_cmd_buffer *cmd_buffer,
      fil.UseSameLoopRestorationUnitSizeChromasUVFlag = (frame_restoration_type[1] != 0 || frame_restoration_type[2] !=0) ?
         std_pic_info->pLoopRestoration->LoopRestorationSize[0] == std_pic_info->pLoopRestoration->LoopRestorationSize[1] : false;

-      fil.LumaPlanex_step_qn = 0;
-      fil.LumaPlanex0_qn = 0;
-      fil.ChromaPlanex_step_qn = 0;
-      fil.ChromaPlanex0_qn = 0;
+      fil.LumaPlanex_step_qn = luma_xstep_qn;
+      fil.LumaPlanex0_qn = luma_x0_qn[tile_idx];
+      fil.ChromaPlanex_step_qn = chroma_xstep_qn;
+      fil.ChromaPlanex0_qn = chroma_x0_qn[tile_idx];
+
   };

   unsigned column = tile_idx % std_pic_info->pTileInfo->TileCols;
@ -2394,13 +2419,103 @@ anv_av1_decode_video_tile(struct anv_cmd_buffer *cmd_buffer,
   }
 }

+static void
+anv_av1_calculate_xstep_qn(struct anv_cmd_buffer *cmd_buffer,
+                           const VkVideoDecodeInfoKHR *frame_info)
+{
+   const VkVideoDecodeAV1PictureInfoKHR *av1_pic_info =
+      vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_AV1_PICTURE_INFO_KHR);
+
+   const StdVideoDecodeAV1PictureInfo *std_pic_info = av1_pic_info->pStdPictureInfo;
+   struct anv_video_session_params *params = cmd_buffer->video.params;
+   const StdVideoAV1SequenceHeader *seq_hdr = &params->vk.av1_dec.seq_hdr.base;
+   VkExtent2D frameExtent = frame_info->dstPictureResource.codedExtent;
+   unsigned tile_cols = std_pic_info->pTileInfo->TileCols;
+
+   if (!std_pic_info->flags.use_superres) {
+      luma_xstep_qn = chroma_xstep_qn = 0;
+      memset(luma_x0_qn, 0, sizeof(luma_x0_qn));
+      memset(chroma_x0_qn, 0, sizeof(chroma_x0_qn));
+
+      return;
+   }
+
+   int32_t mib_size_log2 = seq_hdr->flags.use_128x128_superblock ?
+      av1_max_mib_size_log2 : av1_min_mib_size_log2;
+
+   int32_t mi_cols = ALIGN(frameExtent.width, 8) >> mib_size_log2;
+
+   int denom = std_pic_info->coded_denom + 9;
+   unsigned downscaled_width = (frameExtent.width * 8 + denom / 2) / denom;
+
+   for (uint8_t i = 0; i < 2; i++) { /* i == 0 : luma, i == 1 : chroma */
+      int subsampling_x = seq_hdr->pColorConfig->subsampling_x;
+      int ssx = i & subsampling_x;
+      int downscaled = ALIGN(downscaled_width, 2) >> ssx;
+      int upscaled = ALIGN(frameExtent.width, 2) >> ssx;
+
+      int xstep_qn = ((downscaled << av1_rs_scale_subpel_bits) + upscaled / 2) / upscaled;
+
+      if (i == 0)
+         luma_xstep_qn = xstep_qn;
+      else
+         chroma_xstep_qn = xstep_qn;
+
+      int32_t err = upscaled * xstep_qn - (downscaled << av1_rs_scale_subpel_bits);
+      int32_t x0 = (-((upscaled - downscaled) << (av1_rs_scale_subpel_bits - 1)) + upscaled / 2) /
+         upscaled + av1_rs_scale_extra_off - err / 2;
+
+      x0 = (int32_t)(x0 & av1_rs_scale_subpel_mask);
+
+      for (unsigned j = 0; j < tile_cols; j++) {
+         int32_t tile_col_end_sb;
+         bool last_col = (j == tile_cols - 1);
+
+         if (i == 0)
+            luma_x0_qn[j] = x0;
+         else
+            chroma_x0_qn[j] = x0;
+
+         if (!last_col) {
+            tile_col_end_sb = std_pic_info->pTileInfo->pMiColStarts[j + 1];
+         } else {
+            tile_col_end_sb = std_pic_info->pTileInfo->pMiColStarts[tile_cols - 1] +
+                              std_pic_info->pTileInfo->pWidthInSbsMinus1[tile_cols - 1];
+         }
+
+
+         int32_t mi_col_end = tile_col_end_sb >> mib_size_log2;
+         mi_col_end = MIN2(mi_col_end, mi_cols);
+
+         int32_t downscaled_x1 = mi_col_end << (av1_mi_size_log2 - ssx);
+         int32_t downscaled_x0 = std_pic_info->pTileInfo->pMiColStarts[j] << mib_size_log2 << (av1_mi_size_log2 - ssx);
+
+         int32_t src_w = downscaled_x1 - downscaled_x0;
+         int32_t upscaled_x0 = (downscaled_x0 * denom) / 8;
+         int32_t upscaled_x1;
+
+         if (last_col) {
+            upscaled_x1 = upscaled;
+         } else
+            upscaled_x1 = (downscaled_x1 * denom) / 8;
+
+         int32_t dst_w = upscaled_x1 - upscaled_x0;
+
+         x0 += (dst_w * xstep_qn) - (src_w << av1_rs_scale_subpel_bits);
+      }
+
+   }
+}
+
 static void
 anv_av1_decode_video(struct anv_cmd_buffer *cmd_buffer,
                     const VkVideoDecodeInfoKHR *frame_info)
 {
-   const struct VkVideoDecodeAV1PictureInfoKHR *av1_pic_info =
+   const VkVideoDecodeAV1PictureInfoKHR *av1_pic_info =
      vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_AV1_PICTURE_INFO_KHR);

+   anv_av1_calculate_xstep_qn(cmd_buffer, frame_info);
+
   for (unsigned t = 0; t < av1_pic_info->tileCount; t++)
      anv_av1_decode_video_tile(cmd_buffer, frame_info, t);
 }