diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_enc.c b/src/gallium/drivers/radeonsi/radeon_vcn_enc.c index c5a3890072a..cac84cd90d4 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_enc.c +++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc.c @@ -479,6 +479,15 @@ static void radeon_vcn_enc_h264_get_param(struct radeon_encoder *enc, enc->enc_pic.h264_enc_params.is_long_term = pic->is_ltr; enc->enc_pic.not_referenced = pic->not_referenced; + if (enc->dpb_type == DPB_TIER_2) { + for (uint32_t i = 0; i < ARRAY_SIZE(pic->dpb); i++) { + struct pipe_video_buffer *buf = pic->dpb[i].buffer; + enc->enc_pic.dpb_bufs[i] = + buf ? vl_video_buffer_get_associated_data(buf, &enc->base) : NULL; + assert(!buf || enc->enc_pic.dpb_bufs[i]); + } + } + radeon_vcn_enc_h264_get_cropping_param(enc, pic); radeon_vcn_enc_h264_get_dbk_param(enc, pic); radeon_vcn_enc_h264_get_rc_param(enc, pic); @@ -673,6 +682,15 @@ static void radeon_vcn_enc_hevc_get_param(struct radeon_encoder *enc, enc->enc_pic.nal_unit_type = pic->pic.nal_unit_type; enc->enc_pic.temporal_id = pic->pic.temporal_id; + if (enc->dpb_type == DPB_TIER_2) { + for (uint32_t i = 0; i < ARRAY_SIZE(pic->dpb); i++) { + struct pipe_video_buffer *buf = pic->dpb[i].buffer; + enc->enc_pic.dpb_bufs[i] = + buf ? vl_video_buffer_get_associated_data(buf, &enc->base) : NULL; + assert(!buf || enc->enc_pic.dpb_bufs[i]); + } + } + radeon_vcn_enc_hevc_get_cropping_param(enc, pic); radeon_vcn_enc_hevc_get_dbk_param(enc, pic); radeon_vcn_enc_hevc_get_rc_param(enc, pic); @@ -882,6 +900,15 @@ static void radeon_vcn_enc_av1_get_param(struct radeon_encoder *enc, } } + if (enc->dpb_type == DPB_TIER_2) { + for (uint32_t i = 0; i < ARRAY_SIZE(pic->dpb); i++) { + struct pipe_video_buffer *buf = pic->dpb[i].buffer; + enc->enc_pic.dpb_bufs[i] = + buf ? vl_video_buffer_get_associated_data(buf, &enc->base) : NULL; + assert(!buf || enc->enc_pic.dpb_bufs[i]); + } + } + radeon_vcn_enc_av1_get_spec_misc_param(enc, pic); radeon_vcn_enc_av1_get_rc_param(enc, pic); radeon_vcn_enc_av1_get_tile_config(enc, pic); @@ -1087,6 +1114,9 @@ static int setup_dpb(struct radeon_encoder *enc, uint32_t num_reconstructed_pict enc_pic->ctx_buf.rec_luma_pitch = pitch; enc_pic->ctx_buf.pre_encode_picture_luma_pitch = pitch; enc_pic->ctx_buf.num_reconstructed_pictures = num_reconstructed_pictures; + enc_pic->dpb_luma_size = luma_size; + enc_pic->dpb_chroma_size = chroma_size; + enc_pic->total_coloc_bytes = total_coloc_bytes; offset = 0; enc->metadata_size = 0; @@ -1200,7 +1230,7 @@ static int setup_dpb(struct radeon_encoder *enc, uint32_t num_reconstructed_pict enc->dpb_slots = num_reconstructed_pictures; - return offset; + return enc->dpb_size; } /* each block (MB/CTB/SB) has one QP/QI value */ @@ -1340,14 +1370,18 @@ static void radeon_enc_begin_frame(struct pipe_video_codec *encoder, } } + if (enc->dpb_type == DPB_TIER_2) + dpb_slots = 0; + radeon_vcn_enc_get_param(enc, picture); if (!enc->dpb) { enc->dpb = CALLOC_STRUCT(rvid_buffer); - setup_dpb(enc, dpb_slots); - if (!enc->dpb || - !si_vid_create_buffer(enc->screen, enc->dpb, enc->dpb_size, PIPE_USAGE_DEFAULT)) { - RVID_ERR("Can't create DPB buffer.\n"); - goto error; + if (setup_dpb(enc, dpb_slots)) { + if (!enc->dpb || + !si_vid_create_buffer(enc->screen, enc->dpb, enc->dpb_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't create DPB buffer.\n"); + goto error; + } } } @@ -1685,6 +1719,96 @@ static void radeon_enc_destroy_fence(struct pipe_video_codec *encoder, enc->ws->fence_reference(enc->ws, &fence, NULL); } +static unsigned int radeon_enc_frame_context_buffer_size(struct radeon_encoder *enc) +{ + unsigned int size = 0; + bool is_h264 = u_reduce_video_profile(enc->base.profile) + == PIPE_VIDEO_FORMAT_MPEG4_AVC; + bool is_av1 = u_reduce_video_profile(enc->base.profile) + == PIPE_VIDEO_FORMAT_AV1; + bool has_b = enc->enc_pic.spec_misc.b_picture_enabled; /* for h264 only */ + + size = RENCODE_MAX_METADATA_BUFFER_SIZE_PER_FRAME; + if (is_h264) { + if (has_b) { + enc->enc_pic.fcb_offset.h264.colloc_buffer_offset = size; + size += enc->enc_pic.total_coloc_bytes; + } else + enc->enc_pic.fcb_offset.h264.colloc_buffer_offset = + RENCODE_INVALID_COLOC_OFFSET; + } + + if (is_av1) { + enc->enc_pic.fcb_offset.av1.av1_cdf_frame_context_offset = size; + size += RENCODE_AV1_FRAME_CONTEXT_CDF_TABLE_SIZE; + enc->enc_pic.fcb_offset.av1.av1_cdef_algorithm_context_offset = size; + size += RENCODE_AV1_CDEF_ALGORITHM_FRAME_CONTEXT_SIZE; + } + + size = align(size, enc->alignment); + return size; +} + +void radeon_enc_create_dpb_aux_buffers(struct radeon_encoder *enc, struct radeon_enc_dpb_buffer *buf) +{ + if (buf->fcb) + return; + + uint32_t fcb_size = radeon_enc_frame_context_buffer_size(enc); + uint32_t recon_size = enc->enc_pic.dpb_luma_size + enc->enc_pic.dpb_chroma_size; + + buf->fcb = CALLOC_STRUCT(rvid_buffer); + if (!buf->fcb || !si_vid_create_buffer(enc->screen, buf->fcb, fcb_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't create fcb buffer!\n"); + return; + } + + if (enc->enc_pic.quality_modes.pre_encode_mode) { + buf->pre = CALLOC_STRUCT(rvid_buffer); + if (!buf->pre || !si_vid_create_buffer(enc->screen, buf->pre, recon_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't create preenc buffer!\n"); + return; + } + + buf->pre_fcb = CALLOC_STRUCT(rvid_buffer); + if (!buf->pre_fcb || !si_vid_create_buffer(enc->screen, buf->pre_fcb, fcb_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't create preenc fcb buffer!\n"); + return; + } + } +} + +static void radeon_enc_destroy_dpb_buffer(void *data) +{ + struct radeon_enc_dpb_buffer *dpb = data; + + RADEON_ENC_DESTROY_VIDEO_BUFFER(dpb->fcb); + RADEON_ENC_DESTROY_VIDEO_BUFFER(dpb->pre); + RADEON_ENC_DESTROY_VIDEO_BUFFER(dpb->pre_fcb); + FREE(dpb); +} + +static struct pipe_video_buffer *radeon_enc_create_dpb_buffer(struct pipe_video_codec *encoder, + struct pipe_picture_desc *picture, + const struct pipe_video_buffer *templat) +{ + struct radeon_encoder *enc = (struct radeon_encoder *)encoder; + + struct pipe_video_buffer *buf = enc->base.context->create_video_buffer(enc->base.context, templat); + if (!buf) { + RVID_ERR("Can't create dpb buffer!\n"); + return NULL; + } + + struct radeon_enc_dpb_buffer *dpb = CALLOC_STRUCT(radeon_enc_dpb_buffer); + dpb->luma = (struct si_texture *)((struct vl_video_buffer *)buf)->resources[0]; + dpb->chroma = (struct si_texture *)((struct vl_video_buffer *)buf)->resources[1]; + + vl_video_buffer_set_associated_data(buf, &enc->base, dpb, &radeon_enc_destroy_dpb_buffer); + + return buf; +} + struct pipe_video_codec *radeon_create_encoder(struct pipe_context *context, const struct pipe_video_codec *templ, struct radeon_winsys *ws, @@ -1732,6 +1856,12 @@ struct pipe_video_codec *radeon_create_encoder(struct pipe_context *context, ac_vcn_enc_init_cmds(&enc->cmd, sscreen->info.vcn_ip_version); + if (sscreen->info.vcn_ip_version >= VCN_5_0_0) + enc->dpb_type = DPB_TIER_2; + + if (enc->dpb_type == DPB_TIER_2) + enc->base.create_dpb_buffer = radeon_enc_create_dpb_buffer; + if (sscreen->info.vcn_ip_version >= VCN_5_0_0) { radeon_enc_5_0_init(enc); if (sscreen->info.vcn_ip_version == VCN_5_0_0) { diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_enc.h b/src/gallium/drivers/radeonsi/radeon_vcn_enc.h index 446f8dca630..c0f4c6d4a6d 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_enc.h +++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc.h @@ -57,6 +57,14 @@ struct pipe_video_codec *radeon_create_encoder(struct pipe_context *context, struct radeon_winsys *ws, radeon_enc_get_buffer get_buffer); +struct radeon_enc_dpb_buffer { + struct si_texture *luma; /* recon luma */ + struct si_texture *chroma; /* recon chroma */ + struct rvid_buffer *fcb; /* frame context buffer*/ + struct rvid_buffer *pre; /* preenc recon */ + struct rvid_buffer *pre_fcb; /* preenc frame context buffer */ +}; + struct radeon_enc_pic { union { enum pipe_h2645_enc_picture_type picture_type; @@ -90,12 +98,31 @@ struct radeon_enc_pic { unsigned nal_unit_type; unsigned temporal_id; unsigned num_temporal_layers; + unsigned dpb_luma_size; + unsigned dpb_chroma_size; + unsigned total_coloc_bytes; rvcn_enc_quality_modes_t quality_modes; bool not_referenced; bool use_rc_per_pic_ex; bool av1_tile_splitting_legacy_flag; + struct { + union { + struct + { + uint32_t av1_cdf_frame_context_offset; + uint32_t av1_cdef_algorithm_context_offset; + } av1; + struct + { + uint32_t colloc_buffer_offset; + } h264; + }; + } fcb_offset; + + struct radeon_enc_dpb_buffer *dpb_bufs[RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES]; + struct { struct { struct { @@ -242,6 +269,11 @@ struct radeon_encoder { unsigned roi_size; unsigned metadata_size; + enum { + DPB_LEGACY = 0, + DPB_TIER_2 + } dpb_type; + struct pipe_context *ectx; }; @@ -368,4 +400,7 @@ void radeon_enc_av1_tile_layout (uint32_t nb_sb, uint32_t nb_tiles, uint32_t min bool radeon_enc_av1_skip_mode_allowed(struct radeon_encoder *enc); +void radeon_enc_create_dpb_aux_buffers(struct radeon_encoder *enc, + struct radeon_enc_dpb_buffer *buf); + #endif // _RADEON_VCN_ENC_H diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_enc_5_0.c b/src/gallium/drivers/radeonsi/radeon_vcn_enc_5_0.c index 9c531e73659..bd622e2d426 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_enc_5_0.c +++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc_5_0.c @@ -279,6 +279,102 @@ static void radeon_enc_ctx(struct radeon_encoder *enc) RADEON_ENC_END(); } +static void radeon_enc_ctx_tier2(struct radeon_encoder *enc) +{ + uint32_t num_refs = 0; + uint32_t swizzle_mode = radeon_enc_ref_swizzle_mode(enc); + bool is_h264 = u_reduce_video_profile(enc->base.profile) + == PIPE_VIDEO_FORMAT_MPEG4_AVC; + bool is_av1 = u_reduce_video_profile(enc->base.profile) + == PIPE_VIDEO_FORMAT_AV1; + + for (uint32_t i = 0; i < RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES; i++) { + if (enc->enc_pic.dpb_bufs[i]) { + radeon_enc_create_dpb_aux_buffers(enc, enc->enc_pic.dpb_bufs[i]); + num_refs = i + 1; + } + } + + RADEON_ENC_BEGIN(enc->cmd.ctx); + if (enc->dpb->res) { + RADEON_ENC_READWRITE(enc->dpb->res->buf, enc->dpb->res->domains, 0); + } else { + RADEON_ENC_CS(0); + RADEON_ENC_CS(0); + } + RADEON_ENC_CS(num_refs); + + for (uint32_t i = 0; i < RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES; i++) { + if (!enc->enc_pic.dpb_bufs[i]) { + for (int j = 0; j < 15; j++) + RADEON_ENC_CS(0); + continue; + } + struct si_texture *luma = enc->enc_pic.dpb_bufs[i]->luma; + struct si_texture *chroma = enc->enc_pic.dpb_bufs[i]->chroma; + struct rvid_buffer *fcb = enc->enc_pic.dpb_bufs[i]->fcb; + RADEON_ENC_READWRITE(luma->buffer.buf, luma->buffer.domains, luma->surface.u.gfx9.surf_offset); + RADEON_ENC_CS(luma->surface.u.gfx9.surf_pitch); + RADEON_ENC_READWRITE(chroma->buffer.buf, chroma->buffer.domains, chroma->surface.u.gfx9.surf_offset); + RADEON_ENC_CS(chroma->surface.u.gfx9.surf_pitch); + RADEON_ENC_CS(0); + RADEON_ENC_CS(0); + RADEON_ENC_CS(0); + RADEON_ENC_CS(swizzle_mode); + RADEON_ENC_READWRITE(fcb->res->buf, fcb->res->domains, 0); + if (is_h264) { + RADEON_ENC_CS(enc->enc_pic.fcb_offset.h264.colloc_buffer_offset); + RADEON_ENC_CS(0); + } else if (is_av1) { + RADEON_ENC_CS(enc->enc_pic.fcb_offset.av1.av1_cdf_frame_context_offset); + RADEON_ENC_CS(enc->enc_pic.fcb_offset.av1.av1_cdef_algorithm_context_offset); + } else { + RADEON_ENC_CS(0); + RADEON_ENC_CS(0); + } + RADEON_ENC_CS(0); + } + + /* pre-encoding */ + for (uint32_t i = 0; i < RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES; i++) { + if (!enc->enc_pic.quality_modes.pre_encode_mode || !enc->enc_pic.dpb_bufs[i]) { + for (int j = 0; j < 15; j++) + RADEON_ENC_CS(0); + continue; + } + struct rvid_buffer *pre = enc->enc_pic.dpb_bufs[i]->pre; + struct rvid_buffer *pre_fcb = enc->enc_pic.dpb_bufs[i]->fcb; + RADEON_ENC_READWRITE(pre->res->buf, pre->res->domains, 0); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_luma_pitch); + RADEON_ENC_READWRITE(pre->res->buf, pre->res->domains, enc->enc_pic.dpb_luma_size); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch); + RADEON_ENC_CS(0); + RADEON_ENC_CS(0); + RADEON_ENC_CS(0); + RADEON_ENC_CS(swizzle_mode); + RADEON_ENC_READWRITE(pre_fcb->res->buf, pre_fcb->res->domains, 0); + if (is_h264) { + RADEON_ENC_CS(enc->enc_pic.fcb_offset.h264.colloc_buffer_offset); + RADEON_ENC_CS(0); + } else if (is_av1) { + RADEON_ENC_CS(enc->enc_pic.fcb_offset.av1.av1_cdf_frame_context_offset); + RADEON_ENC_CS(enc->enc_pic.fcb_offset.av1.av1_cdef_algorithm_context_offset); + } else { + RADEON_ENC_CS(0); + RADEON_ENC_CS(0); + } + RADEON_ENC_CS(0); + } + + RADEON_ENC_CS(enc->enc_pic.ctx_buf.pre_encode_picture_luma_pitch); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.pre_encode_picture_chroma_pitch); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.pre_encode_input_picture.rgb.red_offset); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.pre_encode_input_picture.rgb.green_offset); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.pre_encode_input_picture.rgb.blue_offset); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.av1.av1_sdb_intermediate_context_offset); + RADEON_ENC_END(); +} + static void radeon_enc_ctx_override(struct radeon_encoder *enc) { RADEON_ENC_BEGIN(enc->cmd.ctx_override); @@ -301,6 +397,9 @@ static void radeon_enc_ctx_override(struct radeon_encoder *enc) static void radeon_enc_metadata(struct radeon_encoder *enc) { + if (!enc->meta) + return; + enc->enc_pic.metadata.two_pass_search_center_map_offset = enc->enc_pic.ctx_buf.two_pass_search_center_map_offset; RADEON_ENC_BEGIN(enc->cmd.metadata); @@ -925,13 +1024,19 @@ void radeon_enc_5_0_init(struct radeon_encoder *enc) radeon_enc_4_0_init(enc); enc->session_init = radeon_enc_session_init; - enc->ctx = radeon_enc_ctx; enc->output_format = radeon_enc_output_format; enc->metadata = radeon_enc_metadata; - enc->ctx_override = radeon_enc_ctx_override; enc->encode_params = radeon_enc_encode_params; enc->rc_per_pic = radeon_enc_rc_per_pic; + if (enc->dpb_type == DPB_LEGACY) { + enc->ctx = radeon_enc_ctx; + enc->ctx_override = radeon_enc_ctx_override; + } else if (enc->dpb_type == DPB_TIER_2) { + enc->ctx = radeon_enc_ctx_tier2; + enc->ctx_override = radeon_enc_dummy; + } + if (u_reduce_video_profile(enc->base.profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC) { enc->spec_misc = radeon_enc_spec_misc; enc->encode_params_codec_spec = radeon_enc_encode_params_h264;