diff --git a/src/gallium/drivers/radeon/radeon_temporal.h b/src/gallium/drivers/radeon/radeon_temporal.h new file mode 100644 index 00000000000..5a294d61066 --- /dev/null +++ b/src/gallium/drivers/radeon/radeon_temporal.h @@ -0,0 +1,232 @@ +/************************************************************************** + * + * Copyright 2021 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef _RADEON_TEMPORAL_H +#define _RADEON_TEMPORAL_H + +#include "radeon_video.h" + +#define RENCODE_MAX_NUM_TEMPORAL_LAYERS 4 +#define RENCODE_MAX_TEMPORAL_LAYER_PATTERN_SIZE 9 + +typedef struct rvcn_temporal_layer_pattern_entry_s +{ + unsigned temporal_id; + unsigned reference_index_in_table; + bool reference_modification; + unsigned frame_num_offset; + unsigned poc_offset; + bool mark_as_reference; +} rvcn_temporal_layer_pattern_entry_t; + +typedef struct rvcn_temporal_layer_pattern_table_s +{ + unsigned pattern_size; + rvcn_temporal_layer_pattern_entry_t pattern_table[RENCODE_MAX_TEMPORAL_LAYER_PATTERN_SIZE]; +} rvcn_temporal_layer_pattern_table_t; + +static const rvcn_temporal_layer_pattern_table_t rvcn_temporal_layer_pattern_tables[RENCODE_MAX_NUM_TEMPORAL_LAYERS] = +{ + /* 1 temporal layer */ + { + 2, /* temporal layer pattern size */ + { + { + 0, + 0, + false, + 0, + 0, + true, + }, + { + 0, + 0, + false, + 1, + 2, + true, + } + } + }, + /* 2 temporal layers */ + { + 3, /* temporal layer pattern size */ + { + { + 0, + 0, + false, + 0, + 0, + true, + }, + { + 1, + 0, + false, + 1, + 2, + false, + }, + { + 0, + 0, + false, + 1, + 4, + true, + } + } + }, + /* 3 temporal layers */ + { + 5, /* temporal layer pattern size */ + { + { + 0, + 0, + false, + 0, + 0, + true, + }, + { + 2, + 0, + false, + 1, + 2, + false, + }, + { + 1, + 0, + false, + 1, + 4, + true, + }, + { + 2, + 2, + false, + 2, + 6, + false, + }, + { + 0, + 0, + true, + 2, + 8, + true, + } + } + }, + /* 4 temporal layers */ + { + 9, /* temporal layer pattern size */ + { + { + 0, + 0, + false, + 0, + 0, + true, + }, + { + 3, + 0, + false, + 1, + 2, + false, + }, + { + 2, + 0, + false, + 1, + 4, + true, + }, + { + 3, + 2, + false, + 2, + 6, + false, + }, + { + 1, + 0, + true, + 2, + 8, + true, + }, + { + 3, + 4, + false, + 3, + 10, + false, + }, + { + 2, + 4, + false, + 3, + 12, + true, + }, + { + 3, + 6, + false, + 4, + 14, + false, + }, + { + 0, + 0, + true, + 4, + 16, + true, + } + } + } +}; + +#endif // _RADEON_TEMPORAL_H \ No newline at end of file diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.c b/src/gallium/drivers/radeon/radeon_vcn_enc.c index dad9a1b8a43..1ab69ab1998 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_enc.c +++ b/src/gallium/drivers/radeon/radeon_vcn_enc.c @@ -61,6 +61,8 @@ static void radeon_vcn_enc_get_param(struct radeon_encoder *enc, struct pipe_pic enc->enc_pic.crop_top = 0; enc->enc_pic.crop_bottom = (align(enc->base.height, 16) - enc->base.height) / 2; } + enc->enc_pic.num_temporal_layers = pic->num_temporal_layers; + enc->enc_pic.temporal_id = 0; enc->enc_pic.rc_layer_init.target_bit_rate = pic->rate_ctrl.target_bitrate; enc->enc_pic.rc_layer_init.peak_bit_rate = pic->rate_ctrl.peak_bitrate; enc->enc_pic.rc_layer_init.frame_rate_num = pic->rate_ctrl.frame_rate_num; @@ -95,6 +97,7 @@ static void radeon_vcn_enc_get_param(struct radeon_encoder *enc, struct pipe_pic default: enc->enc_pic.rc_session_init.rate_control_method = RENCODE_RATE_CONTROL_METHOD_NONE; } + enc->enc_pic.num_temporal_layers = pic->num_temporal_layers; } else if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) { struct pipe_h265_enc_picture_desc *pic = (struct pipe_h265_enc_picture_desc *)picture; enc->enc_pic.picture_type = pic->picture_type; @@ -520,6 +523,7 @@ void radeon_enc_code_fixed_bits(struct radeon_encoder *enc, unsigned int value, unsigned int num_bits) { unsigned int bits_to_pack = 0; + enc->bits_size += num_bits; while (num_bits > 0) { unsigned int value_to_pack = value & (0xffffffff >> (32 - num_bits)); @@ -552,6 +556,7 @@ void radeon_enc_reset(struct radeon_encoder *enc) enc->bits_output = 0; enc->num_zeros = 0; enc->byte_index = 0; + enc->bits_size = 0; } void radeon_enc_byte_align(struct radeon_encoder *enc) diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.h b/src/gallium/drivers/radeon/radeon_vcn_enc.h index a11682fb3bd..b568cfd7afc 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_enc.h +++ b/src/gallium/drivers/radeon/radeon_vcn_enc.h @@ -71,6 +71,7 @@ #define RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS 0x00000003 #define RENCODE_DIRECT_OUTPUT_NALU_TYPE_PREFIX 0x00000004 #define RENCODE_DIRECT_OUTPUT_NALU_TYPE_END_OF_SEQUENCE 0x00000005 +#define RENCODE_DIRECT_OUTPUT_NALU_TYPE_SEI 0x00000006 #define RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS 16 #define RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS 16 @@ -438,6 +439,9 @@ struct radeon_enc_pic { unsigned bit_depth_chroma_minus8; unsigned nal_unit_type; unsigned max_num_merge_cand; + unsigned temporal_id; + unsigned num_temporal_layers; + unsigned temporal_layer_pattern_index; bool not_referenced; bool is_idr; @@ -490,6 +494,8 @@ struct radeon_encoder { void (*nalu_pps)(struct radeon_encoder *enc); void (*nalu_vps)(struct radeon_encoder *enc); void (*nalu_aud)(struct radeon_encoder *enc); + void (*nalu_sei)(struct radeon_encoder *enc); + void (*nalu_prefix)(struct radeon_encoder *enc); void (*slice_header)(struct radeon_encoder *enc); void (*ctx)(struct radeon_encoder *enc); void (*bitstream)(struct radeon_encoder *enc); @@ -537,6 +543,7 @@ struct radeon_encoder { unsigned num_zeros; unsigned byte_index; unsigned bits_output; + unsigned bits_size; uint32_t total_task_size; uint32_t *p_task_size; diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c index 6704b3075b2..5db22c1c977 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c @@ -28,6 +28,7 @@ #include "pipe/p_video_codec.h" #include "radeon_vcn_enc.h" #include "radeon_video.h" +#include "radeon_temporal.h" #include "si_pipe.h" #include "util/u_video.h" @@ -135,8 +136,8 @@ static void radeon_enc_session_init_hevc(struct radeon_encoder *enc) static void radeon_enc_layer_control(struct radeon_encoder *enc) { - enc->enc_pic.layer_ctrl.max_num_temporal_layers = 1; - enc->enc_pic.layer_ctrl.num_temporal_layers = 1; + enc->enc_pic.layer_ctrl.max_num_temporal_layers = enc->enc_pic.num_temporal_layers; + enc->enc_pic.layer_ctrl.num_temporal_layers = enc->enc_pic.num_temporal_layers; RADEON_ENC_BEGIN(enc->cmd.layer_control); RADEON_ENC_CS(enc->enc_pic.layer_ctrl.max_num_temporal_layers); @@ -146,7 +147,7 @@ static void radeon_enc_layer_control(struct radeon_encoder *enc) static void radeon_enc_layer_select(struct radeon_encoder *enc) { - enc->enc_pic.layer_sel.temporal_layer_index = 0; + enc->enc_pic.layer_sel.temporal_layer_index = enc->enc_pic.temporal_id; RADEON_ENC_BEGIN(enc->cmd.layer_select); RADEON_ENC_CS(enc->enc_pic.layer_sel.temporal_layer_index); @@ -458,6 +459,168 @@ static void radeon_enc_nalu_sps_hevc(struct radeon_encoder *enc) RADEON_ENC_END(); } +static void radeon_enc_nalu_prefix(struct radeon_encoder *enc) +{ + uint nalRefIdc = enc->enc_pic.is_idr ? 3 : 0; + + rvcn_temporal_layer_pattern_table_t table_info; + table_info = rvcn_temporal_layer_pattern_tables[enc->enc_pic.layer_ctrl.num_temporal_layers]; + + if (enc->enc_pic.pic_order_cnt == 0) + enc->enc_pic.temporal_layer_pattern_index = 0; + else if(enc->enc_pic.temporal_layer_pattern_index == (table_info.pattern_size - 1)) + enc->enc_pic.temporal_layer_pattern_index = 1; + else + enc->enc_pic.temporal_layer_pattern_index++; + + rvcn_temporal_layer_pattern_entry_t pattern = + table_info.pattern_table[enc->enc_pic.temporal_layer_pattern_index]; + + RADEON_ENC_BEGIN(enc->cmd.nalu); + RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_PREFIX); + uint32_t *size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++]; + radeon_enc_reset(enc); + radeon_enc_set_emulation_prevention(enc, false); + radeon_enc_code_fixed_bits(enc, 0x00000001, 32); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, nalRefIdc, 2); + radeon_enc_code_fixed_bits(enc, 14, 5); + radeon_enc_byte_align(enc); + radeon_enc_set_emulation_prevention(enc, true); + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.is_idr ? 0x1 : 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 6); + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 3); + radeon_enc_code_fixed_bits(enc, 0x0, 4); + radeon_enc_code_fixed_bits(enc, pattern.temporal_id, 3); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x3, 2); + + if(nalRefIdc != 0) + { + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_byte_align(enc); + } + + radeon_enc_flush_headers(enc); + *size_in_bytes = (enc->bits_output + 7) / 8; + RADEON_ENC_END(); +} + +static void radeon_enc_nalu_sei(struct radeon_encoder *enc) +{ + unsigned number_of_layers; + + rvcn_temporal_layer_pattern_table_t table_info; + table_info = rvcn_temporal_layer_pattern_tables[enc->enc_pic.layer_ctrl.num_temporal_layers - 1]; + number_of_layers = table_info.pattern_size; + + RADEON_ENC_BEGIN(enc->cmd.nalu); + RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SEI); + unsigned *size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++]; + radeon_enc_reset(enc); + radeon_enc_set_emulation_prevention(enc, false); + + radeon_enc_code_fixed_bits(enc, 0x00000001, 32); + radeon_enc_code_fixed_bits(enc, 0x6, 8); + radeon_enc_byte_align(enc); + + radeon_enc_set_emulation_prevention(enc, true); + + /* save the current position for later */ + unsigned position = enc->cs.current.cdw; + unsigned shifter = enc->shifter; + unsigned bits_in_shifter = enc->bits_in_shifter; + unsigned num_zeros = enc->num_zeros; + unsigned byte_index = enc->byte_index; + unsigned bits_output = enc->bits_output; + bool emulation_prevention = enc->emulation_prevention; + + /* temporarily fill out the payload type and size */ + radeon_enc_code_fixed_bits(enc, 24, 8); + radeon_enc_code_fixed_bits(enc, 0, 8); + + unsigned svc_start_offset = enc->bits_size; + + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_ue(enc, number_of_layers - 1); + + for(int i = 0; i < number_of_layers; i++ ) + { + rvcn_temporal_layer_pattern_entry_t pattern = table_info.pattern_table[i]; + radeon_enc_code_ue(enc, i); + radeon_enc_code_fixed_bits(enc, 0x0, 6); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 3); + radeon_enc_code_fixed_bits(enc, 0x0, 4); + radeon_enc_code_fixed_bits(enc, pattern.temporal_id, 3); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_ue(enc, 0); + radeon_enc_code_ue(enc, 0); + } + unsigned svc_size = ((enc->bits_size - svc_start_offset) + 7) / 8; + unsigned aligned = (32 - enc->bits_in_shifter) % 8; + if (aligned > 0) + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_byte_align(enc); + + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_byte_align(enc); + + /* store our current state, and go to the beginning to write the size */ + unsigned position2 = enc->cs.current.cdw; + unsigned shifter2 = enc->shifter; + unsigned bits_in_shifter2 = enc->bits_in_shifter; + unsigned num_zeros2 = enc->num_zeros; + unsigned byte_index2 = enc->byte_index; + unsigned bits_output2 = enc->bits_output; + bool emulation_prevention2 = enc->emulation_prevention; + + enc->cs.current.cdw = position; + enc->shifter = shifter; + enc->bits_in_shifter = bits_in_shifter; + enc->num_zeros = num_zeros; + enc->byte_index = byte_index; + enc->bits_output = bits_output; + enc->emulation_prevention = emulation_prevention; + + radeon_enc_output_one_byte(enc, 24); + radeon_enc_output_one_byte(enc, svc_size); + + /* restore our state */ + enc->cs.current.cdw = position2; + enc->shifter = shifter2; + enc->bits_in_shifter = bits_in_shifter2; + enc->num_zeros = num_zeros2; + enc->byte_index = byte_index2; + enc->bits_output = bits_output2; + enc->emulation_prevention = emulation_prevention2; + + radeon_enc_flush_headers(enc); + + *size_in_bytes = (enc->bits_output + 7) / 8; + RADEON_ENC_END(); +} + static void radeon_enc_nalu_pps(struct radeon_encoder *enc) { RADEON_ENC_BEGIN(enc->cmd.nalu); @@ -1140,7 +1303,11 @@ static void begin(struct radeon_encoder *enc) static void radeon_enc_headers_h264(struct radeon_encoder *enc) { + if (enc->enc_pic.layer_ctrl.num_temporal_layers > 1) + enc->nalu_prefix(enc); if (enc->enc_pic.is_idr) { + if (enc->enc_pic.layer_ctrl.num_temporal_layers > 1) + enc->nalu_sei(enc); enc->nalu_sps(enc); enc->nalu_pps(enc); } @@ -1223,6 +1390,8 @@ void radeon_enc_1_2_init(struct radeon_encoder *enc) enc->encode_params = radeon_enc_encode_params; enc->encode_params_codec_spec = radeon_enc_encode_params_h264; enc->encode_headers = radeon_enc_headers_h264; + enc->nalu_prefix = radeon_enc_nalu_prefix; + enc->nalu_sei = radeon_enc_nalu_sei; } else if (u_reduce_video_profile(enc->base.profile) == PIPE_VIDEO_FORMAT_HEVC) { enc->session_init = radeon_enc_session_init_hevc; enc->slice_control = radeon_enc_slice_control_hevc;