From 73657d4246a30c1023e81fae69892659406a852e Mon Sep 17 00:00:00 2001 From: David Rosca Date: Wed, 1 Jan 2025 13:58:13 +0100 Subject: [PATCH] radeonsi/uvd_enc: Support raw packed headers Reviewed-by: Ruijing Dong Acked-by: Leo Liu Part-of: --- src/gallium/drivers/radeonsi/radeon_uvd_enc.c | 117 ++++++++++++++++-- src/gallium/drivers/radeonsi/radeon_uvd_enc.h | 18 ++- .../drivers/radeonsi/radeon_uvd_enc_1_1.c | 45 ++----- 3 files changed, 134 insertions(+), 46 deletions(-) diff --git a/src/gallium/drivers/radeonsi/radeon_uvd_enc.c b/src/gallium/drivers/radeonsi/radeon_uvd_enc.c index 41de0d43197..816c320fdb4 100644 --- a/src/gallium/drivers/radeonsi/radeon_uvd_enc.c +++ b/src/gallium/drivers/radeonsi/radeon_uvd_enc.c @@ -23,8 +23,6 @@ static void radeon_uvd_enc_get_param(struct radeon_uvd_encoder *enc, enc->enc_pic.desc = pic; enc->enc_pic.picture_type = pic->picture_type; enc->enc_pic.nal_unit_type = pic->pic.nal_unit_type; - enc->enc_pic.is_iframe = (pic->picture_type == PIPE_H2645_ENC_PICTURE_TYPE_IDR) || - (pic->picture_type == PIPE_H2645_ENC_PICTURE_TYPE_I); enc->enc_pic.enc_params.reference_picture_index = pic->ref_list0[0] == PIPE_H2645_LIST_REF_INVALID_ENTRY ? 0xffffffff : pic->ref_list0[0]; enc->enc_pic.enc_params.reconstructed_picture_index = pic->dpb_curr_pic; @@ -187,6 +185,79 @@ static void radeon_uvd_enc_begin_frame(struct pipe_video_codec *encoder, } } +static void *radeon_uvd_enc_encode_headers(struct radeon_uvd_encoder *enc) +{ + unsigned num_slices = 0, num_headers = 0; + + util_dynarray_foreach(&enc->enc_pic.desc->raw_headers, struct pipe_enc_raw_header, header) { + if (header->is_slice) + num_slices++; + num_headers++; + } + + if (!num_headers || !num_slices || num_headers == num_slices) + return NULL; + + size_t segments_size = + sizeof(struct ruvd_enc_output_unit_segment) * (num_headers - num_slices + 1); + struct ruvd_enc_feedback_data *data = + CALLOC_VARIANT_LENGTH_STRUCT(ruvd_enc_feedback_data, segments_size); + if (!data) + return NULL; + + uint8_t *ptr = enc->ws->buffer_map(enc->ws, enc->bs_handle, &enc->cs, + PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY); + if (!ptr) { + RVID_ERR("Can't map bs buffer.\n"); + FREE(data); + return NULL; + } + + unsigned offset = 0; + struct ruvd_enc_output_unit_segment *slice_segment = NULL; + + util_dynarray_foreach(&enc->enc_pic.desc->raw_headers, struct pipe_enc_raw_header, header) { + if (header->is_slice) { + if (slice_segment) + continue; + slice_segment = &data->segments[data->num_segments]; + slice_segment->is_slice = true; + } else { + unsigned size; + switch (header->type) { + case PIPE_H265_NAL_VPS: + size = radeon_uvd_enc_write_vps(enc, ptr + offset); + break; + case PIPE_H265_NAL_SPS: + size = radeon_uvd_enc_write_sps(enc, ptr + offset); + break; + case PIPE_H265_NAL_PPS: + size = radeon_uvd_enc_write_pps(enc, ptr + offset); + break; + default: + assert(header->buffer); + memcpy(ptr + offset, header->buffer, header->size); + size = header->size; + break; + } + data->segments[data->num_segments].size = size; + data->segments[data->num_segments].offset = offset; + offset += size; + } + data->num_segments++; + } + + enc->bs_offset = align(offset, 16); + assert(enc->bs_offset < enc->bs_size); + + assert(slice_segment); + slice_segment->offset = enc->bs_offset; + + enc->ws->buffer_unmap(enc->ws, enc->bs_handle); + + return data; +} + static void radeon_uvd_enc_encode_bitstream(struct pipe_video_codec *encoder, struct pipe_video_buffer *source, struct pipe_resource *destination, void **fb) @@ -194,6 +265,7 @@ static void radeon_uvd_enc_encode_bitstream(struct pipe_video_codec *encoder, struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *)encoder; enc->get_buffer(destination, &enc->bs_handle, NULL); enc->bs_size = destination->width0; + enc->bs_offset = 0; *fb = enc->fb = CALLOC_STRUCT(rvid_buffer); @@ -202,6 +274,8 @@ static void radeon_uvd_enc_encode_bitstream(struct pipe_video_codec *encoder, return; } + enc->fb->user_data = radeon_uvd_enc_encode_headers(enc); + enc->need_feedback = true; enc->encode(enc); } @@ -244,15 +318,38 @@ static void radeon_uvd_enc_get_feedback(struct pipe_video_codec *encoder, void * struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *)encoder; struct rvid_buffer *fb = feedback; - if (NULL != size) { - radeon_uvd_enc_feedback_t *fb_data = (radeon_uvd_enc_feedback_t *)enc->ws->buffer_map( - enc->ws, fb->res->buf, &enc->cs, PIPE_MAP_READ_WRITE | RADEON_MAP_TEMPORARY); + radeon_uvd_enc_feedback_t *fb_data = (radeon_uvd_enc_feedback_t *)enc->ws->buffer_map( + enc->ws, fb->res->buf, &enc->cs, PIPE_MAP_READ_WRITE | RADEON_MAP_TEMPORARY); - if (!fb_data->status) - *size = fb_data->bitstream_size; - else - *size = 0; - enc->ws->buffer_unmap(enc->ws, fb->res->buf); + if (!fb_data->status) + *size = fb_data->bitstream_size; + else + *size = 0; + + enc->ws->buffer_unmap(enc->ws, fb->res->buf); + + metadata->present_metadata = PIPE_VIDEO_FEEDBACK_METADATA_TYPE_CODEC_UNIT_LOCATION; + + if (fb->user_data) { + struct ruvd_enc_feedback_data *data = fb->user_data; + metadata->codec_unit_metadata_count = data->num_segments; + for (unsigned i = 0; i < data->num_segments; i++) { + metadata->codec_unit_metadata[i].offset = data->segments[i].offset; + if (data->segments[i].is_slice) { + metadata->codec_unit_metadata[i].size = *size; + metadata->codec_unit_metadata[i].flags = 0; + } else { + metadata->codec_unit_metadata[i].size = data->segments[i].size; + metadata->codec_unit_metadata[i].flags = PIPE_VIDEO_CODEC_UNIT_LOCATION_FLAG_SINGLE_NALU; + } + } + FREE(fb->user_data); + fb->user_data = NULL; + } else { + metadata->codec_unit_metadata_count = 1; + metadata->codec_unit_metadata[0].offset = 0; + metadata->codec_unit_metadata[0].size = *size; + metadata->codec_unit_metadata[0].flags = 0; } si_vid_destroy_buffer(fb); diff --git a/src/gallium/drivers/radeonsi/radeon_uvd_enc.h b/src/gallium/drivers/radeonsi/radeon_uvd_enc.h index 821e25eb036..a39adbda652 100644 --- a/src/gallium/drivers/radeonsi/radeon_uvd_enc.h +++ b/src/gallium/drivers/radeonsi/radeon_uvd_enc.h @@ -317,8 +317,6 @@ struct radeon_uvd_enc_pic { unsigned nal_unit_type; unsigned temporal_id; - bool is_iframe; - ruvd_enc_task_info_t task_info; ruvd_enc_session_init_t session_init; ruvd_enc_layer_control_t layer_ctrl; @@ -359,6 +357,7 @@ struct radeon_uvd_encoder { struct pb_buffer_lean *bs_handle; unsigned bs_size; + unsigned bs_offset; unsigned dpb_slots; @@ -375,9 +374,24 @@ struct radeon_uvd_encoder { bool need_rc_per_pic; }; +struct ruvd_enc_output_unit_segment { + bool is_slice; + unsigned size; + unsigned offset; +}; + +struct ruvd_enc_feedback_data { + unsigned num_segments; + struct ruvd_enc_output_unit_segment segments[]; +}; + struct si_screen; void radeon_uvd_enc_1_1_init(struct radeon_uvd_encoder *enc); bool si_radeon_uvd_enc_supported(struct si_screen *sscreen); +unsigned int radeon_uvd_enc_write_vps(struct radeon_uvd_encoder *enc, uint8_t *out); +unsigned int radeon_uvd_enc_write_sps(struct radeon_uvd_encoder *enc, uint8_t *out); +unsigned int radeon_uvd_enc_write_pps(struct radeon_uvd_encoder *enc, uint8_t *out); + #endif // _RADEON_UVD_ENC_H diff --git a/src/gallium/drivers/radeonsi/radeon_uvd_enc_1_1.c b/src/gallium/drivers/radeonsi/radeon_uvd_enc_1_1.c index db810ba92b3..1a03b5761d6 100644 --- a/src/gallium/drivers/radeonsi/radeon_uvd_enc_1_1.c +++ b/src/gallium/drivers/radeonsi/radeon_uvd_enc_1_1.c @@ -277,17 +277,13 @@ static void radeon_uvd_enc_quality_params(struct radeon_uvd_encoder *enc) RADEON_ENC_END(); } -static void radeon_uvd_enc_nalu_sps_hevc(struct radeon_uvd_encoder *enc) +unsigned int radeon_uvd_enc_write_sps(struct radeon_uvd_encoder *enc, uint8_t *out) { struct radeon_bitstream bs; struct pipe_h265_enc_seq_param *sps = &enc->enc_pic.desc->seq; int i; - RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER); - RADEON_ENC_CS(RENC_UVD_NALU_TYPE_SPS); - uint32_t *size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++]; - - radeon_bs_reset(&bs, NULL, &enc->cs); + radeon_bs_reset(&bs, out, NULL); radeon_bs_set_emulation_prevention(&bs, false); radeon_bs_code_fixed_bits(&bs, 0x00000001, 32); radeon_bs_code_fixed_bits(&bs, 0x4201, 16); @@ -412,21 +408,15 @@ static void radeon_uvd_enc_nalu_sps_hevc(struct radeon_uvd_encoder *enc) radeon_bs_code_fixed_bits(&bs, 0x1, 1); radeon_bs_byte_align(&bs); - radeon_bs_flush_headers(&bs); - *size_in_bytes = bs.bits_output / 8; - RADEON_ENC_END(); + return bs.bits_output / 8; } -static void radeon_uvd_enc_nalu_pps_hevc(struct radeon_uvd_encoder *enc) +unsigned int radeon_uvd_enc_write_pps(struct radeon_uvd_encoder *enc, uint8_t *out) { struct radeon_bitstream bs; struct pipe_h265_enc_pic_param *pps = &enc->enc_pic.desc->pic; - RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER); - RADEON_ENC_CS(RENC_UVD_NALU_TYPE_PPS); - uint32_t *size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++]; - - radeon_bs_reset(&bs, NULL, &enc->cs); + radeon_bs_reset(&bs, out, NULL); radeon_bs_set_emulation_prevention(&bs, false); radeon_bs_code_fixed_bits(&bs, 0x00000001, 32); radeon_bs_code_fixed_bits(&bs, 0x4401, 16); @@ -473,22 +463,16 @@ static void radeon_uvd_enc_nalu_pps_hevc(struct radeon_uvd_encoder *enc) radeon_bs_code_fixed_bits(&bs, 0x1, 1); radeon_bs_byte_align(&bs); - radeon_bs_flush_headers(&bs); - *size_in_bytes = bs.bits_output / 8; - RADEON_ENC_END(); + return bs.bits_output / 8; } -static void radeon_uvd_enc_nalu_vps_hevc(struct radeon_uvd_encoder *enc) +unsigned int radeon_uvd_enc_write_vps(struct radeon_uvd_encoder *enc, uint8_t *out) { struct radeon_bitstream bs; struct pipe_h265_enc_vid_param *vps = &enc->enc_pic.desc->vid; int i; - RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER); - RADEON_ENC_CS(RENC_UVD_NALU_TYPE_VPS); - uint32_t *size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++]; - - radeon_bs_reset(&bs, NULL, &enc->cs); + radeon_bs_reset(&bs, out, NULL); radeon_bs_set_emulation_prevention(&bs, false); radeon_bs_code_fixed_bits(&bs, 0x00000001, 32); radeon_bs_code_fixed_bits(&bs, 0x4001, 16); @@ -524,9 +508,7 @@ static void radeon_uvd_enc_nalu_vps_hevc(struct radeon_uvd_encoder *enc) radeon_bs_code_fixed_bits(&bs, 0x1, 1); radeon_bs_byte_align(&bs); - radeon_bs_flush_headers(&bs); - *size_in_bytes = bs.bits_output / 8; - RADEON_ENC_END(); + return bs.bits_output / 8; } static void radeon_uvd_enc_slice_header_hevc(struct radeon_uvd_encoder *enc) @@ -715,7 +697,7 @@ static void radeon_uvd_enc_bitstream(struct radeon_uvd_encoder *enc) { enc->enc_pic.bit_buf.mode = RENC_UVD_SWIZZLE_MODE_LINEAR; enc->enc_pic.bit_buf.video_bitstream_buffer_size = enc->bs_size; - enc->enc_pic.bit_buf.video_bitstream_data_offset = 0; + enc->enc_pic.bit_buf.video_bitstream_data_offset = enc->bs_offset; RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_VIDEO_BITSTREAM_BUFFER); RADEON_ENC_CS(enc->enc_pic.bit_buf.mode); @@ -800,7 +782,7 @@ static void radeon_uvd_enc_encode_params_hevc(struct radeon_uvd_encoder *enc) enc->enc_pic.enc_params.pic_type = RENC_UVD_PICTURE_TYPE_I; } - enc->enc_pic.enc_params.allowed_max_bitstream_size = enc->bs_size; + enc->enc_pic.enc_params.allowed_max_bitstream_size = enc->bs_size - enc->bs_offset; if (sscreen->info.gfx_level < GFX9) { enc->enc_pic.enc_params.input_pic_luma_pitch = (enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); @@ -933,11 +915,6 @@ static void encode(struct radeon_uvd_encoder *enc) enc->enc_pic.layer_sel.temporal_layer_index = enc->enc_pic.temporal_id; radeon_uvd_enc_layer_select(enc); - if (enc->enc_pic.is_iframe) { - radeon_uvd_enc_nalu_vps_hevc(enc); - radeon_uvd_enc_nalu_pps_hevc(enc); - radeon_uvd_enc_nalu_sps_hevc(enc); - } radeon_uvd_enc_slice_header_hevc(enc); radeon_uvd_enc_encode_params_hevc(enc);