From cc14724d73b2ec03720c1f7366d7697e50bd9f16 Mon Sep 17 00:00:00 2001 From: David Rosca Date: Fri, 2 Aug 2024 15:50:53 +0200 Subject: [PATCH] frontends/va: Implement DPB management for H264/5 encode In VAAPI applications already need to do DPB management, but for each picture to encode we only get the reconstructed surfaces needed to encode this picture instead of entire DPB. Drivers need to know the current size and layout of DPB, so keep track of entire DPB contents in frontend. This allows drivers to directly get the DPB layout as used by application instead of trying to re-build it from limited information they have available (frame numbers), which only works in the basic cases and otherwise will de-sync from application (driver may remove pictures from DPB that the application still want to use). Reviewed-by: Ruijing Dong Reviewed-By: Sil Vilerino Part-of: --- src/gallium/frontends/va/picture.c | 2 +- src/gallium/frontends/va/picture_h264_enc.c | 41 ++++++++++++++++++ src/gallium/frontends/va/picture_hevc_enc.c | 41 +++++++++++++++++- src/gallium/frontends/va/surface.c | 30 ++++++++++++++ src/gallium/frontends/va/va_private.h | 2 + src/gallium/include/pipe/p_video_state.h | 46 +++++++++++++++++---- 6 files changed, 152 insertions(+), 10 deletions(-) diff --git a/src/gallium/frontends/va/picture.c b/src/gallium/frontends/va/picture.c index 92c62969e5a..e808dd6a276 100644 --- a/src/gallium/frontends/va/picture.c +++ b/src/gallium/frontends/va/picture.c @@ -38,7 +38,7 @@ #include "va_private.h" -static void +void vlVaSetSurfaceContext(vlVaDriver *drv, vlVaSurface *surf, vlVaContext *context) { if (surf->ctx == context) diff --git a/src/gallium/frontends/va/picture_h264_enc.c b/src/gallium/frontends/va/picture_h264_enc.c index eddc6fddf72..fbd5241aaf6 100644 --- a/src/gallium/frontends/va/picture_h264_enc.c +++ b/src/gallium/frontends/va/picture_h264_enc.c @@ -42,6 +42,8 @@ vlVaHandleVAEncPictureParameterBufferTypeH264(vlVaDriver *drv, vlVaContext *cont { VAEncPictureParameterBufferH264 *h264; vlVaBuffer *coded_buf; + vlVaSurface *surf; + unsigned i; h264 = buf->data; if (h264->pic_fields.bits.idr_pic_flag == 1) @@ -56,6 +58,31 @@ vlVaHandleVAEncPictureParameterBufferTypeH264(vlVaDriver *drv, vlVaContext *cont else if (context->desc.h264enc.frame_num == 1) context->desc.h264enc.i_remain--; + surf = handle_table_get(drv->htab, h264->CurrPic.picture_id); + if (!surf) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + for (i = 0; i < ARRAY_SIZE(context->desc.h264enc.dpb); i++) { + if (context->desc.h264enc.dpb[i].id == h264->CurrPic.picture_id) { + assert(surf->is_dpb); + break; + } + if (!context->desc.h264enc.dpb[i].id) { + assert(!surf->is_dpb); + surf->is_dpb = true; + vlVaSetSurfaceContext(drv, surf, context); + context->desc.h264enc.dpb_size++; + break; + } + } + if (i == ARRAY_SIZE(context->desc.h264enc.dpb)) + return VA_STATUS_ERROR_INVALID_PARAMETER; + context->desc.h264enc.dpb_curr_pic = i; + context->desc.h264enc.dpb[i].id = h264->CurrPic.picture_id; + context->desc.h264enc.dpb[i].frame_idx = h264->CurrPic.frame_idx; + context->desc.h264enc.dpb[i].pic_order_cnt = h264->CurrPic.TopFieldOrderCnt; + context->desc.h264enc.dpb[i].is_ltr = h264->CurrPic.flags & VA_PICTURE_H264_LONG_TERM_REFERENCE; + context->desc.h264enc.p_remain = context->desc.h264enc.gop_size - context->desc.h264enc.gop_cnt - context->desc.h264enc.i_remain; coded_buf = handle_table_get(drv->htab, h264->coded_buf); @@ -104,6 +131,16 @@ vlVaHandleVAEncPictureParameterBufferTypeH264(vlVaDriver *drv, vlVaContext *cont return VA_STATUS_SUCCESS; } +static uint8_t +vlVaDpbIndex(vlVaContext *context, VASurfaceID id) +{ + for (uint8_t i = 0; i < context->desc.h264enc.dpb_size; i++) { + if (context->desc.h264enc.dpb[i].id == id) + return i; + } + return PIPE_H2645_LIST_REF_INVALID_ENTRY; +} + VAStatus vlVaHandleVAEncSliceParameterBufferTypeH264(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf) { @@ -113,6 +150,8 @@ vlVaHandleVAEncSliceParameterBufferTypeH264(vlVaDriver *drv, vlVaContext *contex h264 = buf->data; memset(&context->desc.h264enc.ref_idx_l0_list, VA_INVALID_ID, sizeof(context->desc.h264enc.ref_idx_l0_list)); memset(&context->desc.h264enc.ref_idx_l1_list, VA_INVALID_ID, sizeof(context->desc.h264enc.ref_idx_l1_list)); + memset(&context->desc.h264enc.ref_list0, PIPE_H2645_LIST_REF_INVALID_ENTRY, sizeof(context->desc.h264enc.ref_list0)); + memset(&context->desc.h264enc.ref_list1, PIPE_H2645_LIST_REF_INVALID_ENTRY, sizeof(context->desc.h264enc.ref_list1)); if(h264->num_ref_idx_active_override_flag) { context->desc.h264enc.num_ref_idx_l0_active_minus1 = h264->num_ref_idx_l0_active_minus1; @@ -121,12 +160,14 @@ vlVaHandleVAEncSliceParameterBufferTypeH264(vlVaDriver *drv, vlVaContext *contex for (int i = 0; i < 32; i++) { if (h264->RefPicList0[i].picture_id != VA_INVALID_ID) { + context->desc.h264enc.ref_list0[i] = vlVaDpbIndex(context, h264->RefPicList0[i].picture_id); context->desc.h264enc.ref_idx_l0_list[i] = PTR_TO_UINT(util_hash_table_get(context->desc.h264enc.frame_idx, UINT_TO_PTR(h264->RefPicList0[i].picture_id + 1))); context->desc.h264enc.l0_is_long_term[i] = h264->RefPicList0[i].flags & VA_PICTURE_H264_LONG_TERM_REFERENCE; } if (h264->RefPicList1[i].picture_id != VA_INVALID_ID && h264->slice_type == 1) { + context->desc.h264enc.ref_list1[i] = vlVaDpbIndex(context, h264->RefPicList1[i].picture_id); context->desc.h264enc.ref_idx_l1_list[i] = PTR_TO_UINT(util_hash_table_get(context->desc.h264enc.frame_idx, UINT_TO_PTR(h264->RefPicList1[i].picture_id + 1))); context->desc.h264enc.l1_is_long_term[i] = h264->RefPicList1[i].flags & diff --git a/src/gallium/frontends/va/picture_hevc_enc.c b/src/gallium/frontends/va/picture_hevc_enc.c index 13f6b4edfab..43c2b26d6d0 100644 --- a/src/gallium/frontends/va/picture_hevc_enc.c +++ b/src/gallium/frontends/va/picture_hevc_enc.c @@ -49,6 +49,7 @@ vlVaHandleVAEncPictureParameterBufferTypeHEVC(vlVaDriver *drv, vlVaContext *cont { VAEncPictureParameterBufferHEVC *h265; vlVaBuffer *coded_buf; + vlVaSurface *surf; int i; h265 = buf->data; @@ -58,6 +59,30 @@ vlVaHandleVAEncPictureParameterBufferTypeHEVC(vlVaDriver *drv, vlVaContext *cont for (i = 0; i < 15; i++) context->desc.h265enc.reference_frames[i] = h265->reference_frames[i].picture_id; + surf = handle_table_get(drv->htab, h265->decoded_curr_pic.picture_id); + if (!surf) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + for (i = 0; i < ARRAY_SIZE(context->desc.h265enc.dpb); i++) { + if (context->desc.h265enc.dpb[i].id == h265->decoded_curr_pic.picture_id) { + assert(surf->is_dpb); + break; + } + if (!context->desc.h265enc.dpb[i].id) { + assert(!surf->is_dpb); + surf->is_dpb = true; + vlVaSetSurfaceContext(drv, surf, context); + context->desc.h265enc.dpb_size++; + break; + } + } + if (i == ARRAY_SIZE(context->desc.h264enc.dpb)) + return VA_STATUS_ERROR_INVALID_PARAMETER; + context->desc.h265enc.dpb_curr_pic = i; + context->desc.h265enc.dpb[i].id = h265->decoded_curr_pic.picture_id; + context->desc.h265enc.dpb[i].pic_order_cnt = h265->decoded_curr_pic.pic_order_cnt; + context->desc.h265enc.dpb[i].is_ltr = h265->decoded_curr_pic.flags & VA_PICTURE_HEVC_LONG_TERM_REFERENCE; + context->desc.h265enc.pic_order_cnt = h265->decoded_curr_pic.pic_order_cnt; coded_buf = handle_table_get(drv->htab, h265->coded_buf); if (!coded_buf) @@ -107,6 +132,16 @@ vlVaHandleVAEncPictureParameterBufferTypeHEVC(vlVaDriver *drv, vlVaContext *cont return VA_STATUS_SUCCESS; } +static uint8_t +vlVaDpbIndex(vlVaContext *context, VASurfaceID id) +{ + for (uint8_t i = 0; i < context->desc.h265enc.dpb_size; i++) { + if (context->desc.h265enc.dpb[i].id == id) + return i; + } + return PIPE_H2645_LIST_REF_INVALID_ENTRY; +} + VAStatus vlVaHandleVAEncSliceParameterBufferTypeHEVC(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf) { @@ -116,6 +151,8 @@ vlVaHandleVAEncSliceParameterBufferTypeHEVC(vlVaDriver *drv, vlVaContext *contex h265 = buf->data; memset(&context->desc.h265enc.ref_idx_l0_list, VA_INVALID_ID, sizeof(context->desc.h265enc.ref_idx_l0_list)); memset(&context->desc.h265enc.ref_idx_l1_list, VA_INVALID_ID, sizeof(context->desc.h265enc.ref_idx_l1_list)); + memset(&context->desc.h265enc.ref_list0, PIPE_H2645_LIST_REF_INVALID_ENTRY, sizeof(context->desc.h265enc.ref_list0)); + memset(&context->desc.h265enc.ref_list1, PIPE_H2645_LIST_REF_INVALID_ENTRY, sizeof(context->desc.h265enc.ref_list1)); if (h265->slice_fields.bits.num_ref_idx_active_override_flag) { context->desc.h265enc.num_ref_idx_l0_active_minus1 = h265->num_ref_idx_l0_active_minus1; @@ -124,10 +161,12 @@ vlVaHandleVAEncSliceParameterBufferTypeHEVC(vlVaDriver *drv, vlVaContext *contex for (int i = 0; i < 15; i++) { if (h265->ref_pic_list0[i].picture_id != VA_INVALID_ID) { + context->desc.h265enc.ref_list0[i] = vlVaDpbIndex(context, h265->ref_pic_list0[i].picture_id); context->desc.h265enc.ref_idx_l0_list[i] = PTR_TO_UINT(util_hash_table_get(context->desc.h265enc.frame_idx, UINT_TO_PTR(h265->ref_pic_list0[i].picture_id + 1))); } if (h265->ref_pic_list1[i].picture_id != VA_INVALID_ID && h265->slice_type == PIPE_H265_SLICE_TYPE_B) { + context->desc.h265enc.ref_list1[i] = vlVaDpbIndex(context, h265->ref_pic_list1[i].picture_id); context->desc.h265enc.ref_idx_l1_list[i] = PTR_TO_UINT(util_hash_table_get(context->desc.h265enc.frame_idx, UINT_TO_PTR(h265->ref_pic_list1[i].picture_id + 1))); } @@ -182,7 +221,7 @@ vlVaHandleVAEncSequenceParameterBufferTypeHEVC(vlVaDriver *drv, vlVaContext *con uint32_t num_units_in_tick = 0, time_scale = 0; if (!context->decoder) { - context->templat.max_references = PIPE_H265_MAX_REFERENCES; + context->templat.max_references = PIPE_H265_MAX_NUM_LIST_REF; context->templat.level = h265->general_level_idc; context->decoder = drv->pipe->create_video_codec(drv->pipe, &context->templat); diff --git a/src/gallium/frontends/va/surface.c b/src/gallium/frontends/va/surface.c index 1abbecf3525..af3859d5b92 100644 --- a/src/gallium/frontends/va/surface.c +++ b/src/gallium/frontends/va/surface.c @@ -67,6 +67,34 @@ vlVaCreateSurfaces(VADriverContextP ctx, int width, int height, int format, NULL, 0); } +static void +vlVaRemoveDpbSurface(vlVaSurface *surf, VASurfaceID id) +{ + assert(surf->ctx->templat.entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE); + + switch (u_reduce_video_profile(surf->ctx->templat.profile)) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + for (unsigned i = 0; i < surf->ctx->desc.h264enc.dpb_size; i++) { + if (surf->ctx->desc.h264enc.dpb[i].id == id) { + memset(&surf->ctx->desc.h264enc.dpb[i], 0, sizeof(surf->ctx->desc.h264enc.dpb[i])); + break; + } + } + break; + case PIPE_VIDEO_FORMAT_HEVC: + for (unsigned i = 0; i < surf->ctx->desc.h265enc.dpb_size; i++) { + if (surf->ctx->desc.h265enc.dpb[i].id == id) { + memset(&surf->ctx->desc.h265enc.dpb[i], 0, sizeof(surf->ctx->desc.h265enc.dpb[i])); + break; + } + } + break; + default: + assert(false); + break; + } +} + VAStatus vlVaDestroySurfaces(VADriverContextP ctx, VASurfaceID *surface_list, int num_surfaces) { @@ -91,6 +119,8 @@ vlVaDestroySurfaces(VADriverContextP ctx, VASurfaceID *surface_list, int num_sur _mesa_set_remove_key(surf->ctx->surfaces, surf); if (surf->fence && surf->ctx->decoder && surf->ctx->decoder->destroy_fence) surf->ctx->decoder->destroy_fence(surf->ctx->decoder, surf->fence); + if (surf->is_dpb) + vlVaRemoveDpbSurface(surf, surface_list[i]); } if (drv->last_efc_surface) { vlVaSurface *efc_surf = drv->last_efc_surface; diff --git a/src/gallium/frontends/va/va_private.h b/src/gallium/frontends/va/va_private.h index 8662f093003..2366e868658 100644 --- a/src/gallium/frontends/va/va_private.h +++ b/src/gallium/frontends/va/va_private.h @@ -446,6 +446,7 @@ typedef struct vlVaSurface { bool full_range; struct pipe_fence_handle *fence; struct vlVaSurface *efc_surface; /* input surface for EFC */ + bool is_dpb; } vlVaSurface; typedef struct { @@ -563,6 +564,7 @@ VAStatus vlVaMapBuffer2(VADriverContextP ctx, VABufferID buf_id, void **pbuf, ui VAStatus vlVaHandleVAProcPipelineParameterBufferType(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf); VAStatus vlVaHandleSurfaceAllocate(vlVaDriver *drv, vlVaSurface *surface, struct pipe_video_buffer *templat, const uint64_t *modifiers, unsigned int modifiers_count); +void vlVaSetSurfaceContext(vlVaDriver *drv, vlVaSurface *surf, vlVaContext *context); void vlVaGetReferenceFrame(vlVaDriver *drv, VASurfaceID surface_id, struct pipe_video_buffer **ref_frame); void vlVaHandlePictureParameterBufferMPEG12(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf); void vlVaHandleIQMatrixBufferMPEG12(vlVaContext *context, vlVaBuffer *buf); diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h index 4d4d0e987db..b1942563502 100644 --- a/src/gallium/include/pipe/p_video_state.h +++ b/src/gallium/include/pipe/p_video_state.h @@ -40,8 +40,10 @@ extern "C" { #endif -#define PIPE_H264_MAX_REFERENCES 16 -#define PIPE_H265_MAX_REFERENCES 15 +#define PIPE_H264_MAX_NUM_LIST_REF 32 +#define PIPE_H264_MAX_DPB_SIZE 33 +#define PIPE_H265_MAX_NUM_LIST_REF 15 +#define PIPE_H265_MAX_DPB_SIZE 16 #define PIPE_H265_MAX_SLICES 128 #define PIPE_AV1_MAX_REFERENCES 8 #define PIPE_DEFAULT_FRAME_RATE_DEN 1 @@ -50,6 +52,7 @@ extern "C" { #define PIPE_H2645_EXTENDED_SAR 255 #define PIPE_ENC_ROI_REGION_NUM_MAX 32 #define PIPE_DEFAULT_DECODER_FEEDBACK_TIMEOUT_NS 1000000000 +#define PIPE_H2645_LIST_REF_INVALID_ENTRY 0xff /* * see table 6-12 in the spec @@ -634,6 +637,14 @@ struct pipe_h264_enc_seq_param uint32_t max_dec_frame_buffering; }; +struct pipe_h265_enc_dpb_entry +{ + uint32_t id; + uint32_t frame_idx; + uint32_t pic_order_cnt; + bool is_ltr; +}; + struct pipe_h264_enc_picture_desc { struct pipe_picture_desc base; @@ -663,10 +674,10 @@ struct pipe_h264_enc_picture_desc unsigned pic_order_cnt; unsigned num_ref_idx_l0_active_minus1; unsigned num_ref_idx_l1_active_minus1; - unsigned ref_idx_l0_list[32]; - bool l0_is_long_term[32]; - unsigned ref_idx_l1_list[32]; - bool l1_is_long_term[32]; + unsigned ref_idx_l0_list[PIPE_H264_MAX_NUM_LIST_REF]; + bool l0_is_long_term[PIPE_H264_MAX_NUM_LIST_REF]; + unsigned ref_idx_l1_list[PIPE_H264_MAX_NUM_LIST_REF]; + bool l1_is_long_term[PIPE_H264_MAX_NUM_LIST_REF]; unsigned gop_size; struct pipe_enc_quality_modes quality_modes; struct pipe_enc_intra_refresh intra_refresh; @@ -699,6 +710,12 @@ struct pipe_h264_enc_picture_desc }; uint32_t value; } header_flags; + + struct pipe_h265_enc_dpb_entry dpb[PIPE_H264_MAX_DPB_SIZE]; + uint8_t dpb_size; + uint8_t dpb_curr_pic; /* index in dpb */ + uint8_t ref_list0[PIPE_H264_MAX_NUM_LIST_REF]; /* index in dpb, PIPE_H2645_LIST_REF_INVALID_ENTRY invalid */ + uint8_t ref_list1[PIPE_H264_MAX_NUM_LIST_REF]; /* index in dpb, PIPE_H2645_LIST_REF_INVALID_ENTRY invalid */ }; struct pipe_h265_st_ref_pic_set @@ -866,6 +883,13 @@ struct pipe_h265_enc_rate_control unsigned vbr_quality_factor; }; +struct pipe_h264_enc_dpb_entry +{ + uint32_t id; + uint32_t pic_order_cnt; + bool is_ltr; +}; + struct pipe_h265_enc_picture_desc { struct pipe_picture_desc base; @@ -886,8 +910,8 @@ struct pipe_h265_enc_picture_desc struct pipe_enc_roi roi; unsigned num_ref_idx_l0_active_minus1; unsigned num_ref_idx_l1_active_minus1; - unsigned ref_idx_l0_list[PIPE_H265_MAX_REFERENCES]; - unsigned ref_idx_l1_list[PIPE_H265_MAX_REFERENCES]; + unsigned ref_idx_l0_list[PIPE_H265_MAX_NUM_LIST_REF]; + unsigned ref_idx_l1_list[PIPE_H265_MAX_NUM_LIST_REF]; bool not_referenced; struct hash_table *frame_idx; @@ -916,6 +940,12 @@ struct pipe_h265_enc_picture_desc struct pipe_enc_hdr_cll metadata_hdr_cll; struct pipe_enc_hdr_mdcv metadata_hdr_mdcv; + + struct pipe_h264_enc_dpb_entry dpb[PIPE_H265_MAX_DPB_SIZE]; + uint8_t dpb_size; + uint8_t dpb_curr_pic; /* index in dpb */ + uint8_t ref_list0[PIPE_H265_MAX_NUM_LIST_REF]; /* index in dpb, PIPE_H2645_LIST_REF_INVALID_ENTRY invalid */ + uint8_t ref_list1[PIPE_H265_MAX_NUM_LIST_REF]; /* index in dpb, PIPE_H2645_LIST_REF_INVALID_ENTRY invalid */ }; struct pipe_av1_enc_rate_control