radeon/vcn/enc: H.264 SVC encode

Implement H.264 temporal, Scalable Video Coding (SVC) for VCN devices by
sending the required parameters to the firmware, and creating H.264 NALU
prefix and SEI scalability_info headers.

Signed-off-by: Thong Thai <thong.thai@amd.com>
Reviewed-by: Boyuan Zhang <Boyuan.Zhang@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11850>
This commit is contained in:
Thong Thai 2021-07-13 11:51:39 -04:00
parent 41f4b69354
commit 51935d594e
4 changed files with 416 additions and 3 deletions

View file

@ -0,0 +1,232 @@
/**************************************************************************
*
* Copyright 2021 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#ifndef _RADEON_TEMPORAL_H
#define _RADEON_TEMPORAL_H
#include "radeon_video.h"
#define RENCODE_MAX_NUM_TEMPORAL_LAYERS 4
#define RENCODE_MAX_TEMPORAL_LAYER_PATTERN_SIZE 9
typedef struct rvcn_temporal_layer_pattern_entry_s
{
unsigned temporal_id;
unsigned reference_index_in_table;
bool reference_modification;
unsigned frame_num_offset;
unsigned poc_offset;
bool mark_as_reference;
} rvcn_temporal_layer_pattern_entry_t;
typedef struct rvcn_temporal_layer_pattern_table_s
{
unsigned pattern_size;
rvcn_temporal_layer_pattern_entry_t pattern_table[RENCODE_MAX_TEMPORAL_LAYER_PATTERN_SIZE];
} rvcn_temporal_layer_pattern_table_t;
static const rvcn_temporal_layer_pattern_table_t rvcn_temporal_layer_pattern_tables[RENCODE_MAX_NUM_TEMPORAL_LAYERS] =
{
/* 1 temporal layer */
{
2, /* temporal layer pattern size */
{
{
0,
0,
false,
0,
0,
true,
},
{
0,
0,
false,
1,
2,
true,
}
}
},
/* 2 temporal layers */
{
3, /* temporal layer pattern size */
{
{
0,
0,
false,
0,
0,
true,
},
{
1,
0,
false,
1,
2,
false,
},
{
0,
0,
false,
1,
4,
true,
}
}
},
/* 3 temporal layers */
{
5, /* temporal layer pattern size */
{
{
0,
0,
false,
0,
0,
true,
},
{
2,
0,
false,
1,
2,
false,
},
{
1,
0,
false,
1,
4,
true,
},
{
2,
2,
false,
2,
6,
false,
},
{
0,
0,
true,
2,
8,
true,
}
}
},
/* 4 temporal layers */
{
9, /* temporal layer pattern size */
{
{
0,
0,
false,
0,
0,
true,
},
{
3,
0,
false,
1,
2,
false,
},
{
2,
0,
false,
1,
4,
true,
},
{
3,
2,
false,
2,
6,
false,
},
{
1,
0,
true,
2,
8,
true,
},
{
3,
4,
false,
3,
10,
false,
},
{
2,
4,
false,
3,
12,
true,
},
{
3,
6,
false,
4,
14,
false,
},
{
0,
0,
true,
4,
16,
true,
}
}
}
};
#endif // _RADEON_TEMPORAL_H

View file

@ -61,6 +61,8 @@ static void radeon_vcn_enc_get_param(struct radeon_encoder *enc, struct pipe_pic
enc->enc_pic.crop_top = 0;
enc->enc_pic.crop_bottom = (align(enc->base.height, 16) - enc->base.height) / 2;
}
enc->enc_pic.num_temporal_layers = pic->num_temporal_layers;
enc->enc_pic.temporal_id = 0;
enc->enc_pic.rc_layer_init.target_bit_rate = pic->rate_ctrl.target_bitrate;
enc->enc_pic.rc_layer_init.peak_bit_rate = pic->rate_ctrl.peak_bitrate;
enc->enc_pic.rc_layer_init.frame_rate_num = pic->rate_ctrl.frame_rate_num;
@ -95,6 +97,7 @@ static void radeon_vcn_enc_get_param(struct radeon_encoder *enc, struct pipe_pic
default:
enc->enc_pic.rc_session_init.rate_control_method = RENCODE_RATE_CONTROL_METHOD_NONE;
}
enc->enc_pic.num_temporal_layers = pic->num_temporal_layers;
} else if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) {
struct pipe_h265_enc_picture_desc *pic = (struct pipe_h265_enc_picture_desc *)picture;
enc->enc_pic.picture_type = pic->picture_type;
@ -520,6 +523,7 @@ void radeon_enc_code_fixed_bits(struct radeon_encoder *enc, unsigned int value,
unsigned int num_bits)
{
unsigned int bits_to_pack = 0;
enc->bits_size += num_bits;
while (num_bits > 0) {
unsigned int value_to_pack = value & (0xffffffff >> (32 - num_bits));
@ -552,6 +556,7 @@ void radeon_enc_reset(struct radeon_encoder *enc)
enc->bits_output = 0;
enc->num_zeros = 0;
enc->byte_index = 0;
enc->bits_size = 0;
}
void radeon_enc_byte_align(struct radeon_encoder *enc)

View file

@ -71,6 +71,7 @@
#define RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS 0x00000003
#define RENCODE_DIRECT_OUTPUT_NALU_TYPE_PREFIX 0x00000004
#define RENCODE_DIRECT_OUTPUT_NALU_TYPE_END_OF_SEQUENCE 0x00000005
#define RENCODE_DIRECT_OUTPUT_NALU_TYPE_SEI 0x00000006
#define RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS 16
#define RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS 16
@ -438,6 +439,9 @@ struct radeon_enc_pic {
unsigned bit_depth_chroma_minus8;
unsigned nal_unit_type;
unsigned max_num_merge_cand;
unsigned temporal_id;
unsigned num_temporal_layers;
unsigned temporal_layer_pattern_index;
bool not_referenced;
bool is_idr;
@ -490,6 +494,8 @@ struct radeon_encoder {
void (*nalu_pps)(struct radeon_encoder *enc);
void (*nalu_vps)(struct radeon_encoder *enc);
void (*nalu_aud)(struct radeon_encoder *enc);
void (*nalu_sei)(struct radeon_encoder *enc);
void (*nalu_prefix)(struct radeon_encoder *enc);
void (*slice_header)(struct radeon_encoder *enc);
void (*ctx)(struct radeon_encoder *enc);
void (*bitstream)(struct radeon_encoder *enc);
@ -537,6 +543,7 @@ struct radeon_encoder {
unsigned num_zeros;
unsigned byte_index;
unsigned bits_output;
unsigned bits_size;
uint32_t total_task_size;
uint32_t *p_task_size;

View file

@ -28,6 +28,7 @@
#include "pipe/p_video_codec.h"
#include "radeon_vcn_enc.h"
#include "radeon_video.h"
#include "radeon_temporal.h"
#include "si_pipe.h"
#include "util/u_video.h"
@ -135,8 +136,8 @@ static void radeon_enc_session_init_hevc(struct radeon_encoder *enc)
static void radeon_enc_layer_control(struct radeon_encoder *enc)
{
enc->enc_pic.layer_ctrl.max_num_temporal_layers = 1;
enc->enc_pic.layer_ctrl.num_temporal_layers = 1;
enc->enc_pic.layer_ctrl.max_num_temporal_layers = enc->enc_pic.num_temporal_layers;
enc->enc_pic.layer_ctrl.num_temporal_layers = enc->enc_pic.num_temporal_layers;
RADEON_ENC_BEGIN(enc->cmd.layer_control);
RADEON_ENC_CS(enc->enc_pic.layer_ctrl.max_num_temporal_layers);
@ -146,7 +147,7 @@ static void radeon_enc_layer_control(struct radeon_encoder *enc)
static void radeon_enc_layer_select(struct radeon_encoder *enc)
{
enc->enc_pic.layer_sel.temporal_layer_index = 0;
enc->enc_pic.layer_sel.temporal_layer_index = enc->enc_pic.temporal_id;
RADEON_ENC_BEGIN(enc->cmd.layer_select);
RADEON_ENC_CS(enc->enc_pic.layer_sel.temporal_layer_index);
@ -458,6 +459,168 @@ static void radeon_enc_nalu_sps_hevc(struct radeon_encoder *enc)
RADEON_ENC_END();
}
static void radeon_enc_nalu_prefix(struct radeon_encoder *enc)
{
uint nalRefIdc = enc->enc_pic.is_idr ? 3 : 0;
rvcn_temporal_layer_pattern_table_t table_info;
table_info = rvcn_temporal_layer_pattern_tables[enc->enc_pic.layer_ctrl.num_temporal_layers];
if (enc->enc_pic.pic_order_cnt == 0)
enc->enc_pic.temporal_layer_pattern_index = 0;
else if(enc->enc_pic.temporal_layer_pattern_index == (table_info.pattern_size - 1))
enc->enc_pic.temporal_layer_pattern_index = 1;
else
enc->enc_pic.temporal_layer_pattern_index++;
rvcn_temporal_layer_pattern_entry_t pattern =
table_info.pattern_table[enc->enc_pic.temporal_layer_pattern_index];
RADEON_ENC_BEGIN(enc->cmd.nalu);
RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_PREFIX);
uint32_t *size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++];
radeon_enc_reset(enc);
radeon_enc_set_emulation_prevention(enc, false);
radeon_enc_code_fixed_bits(enc, 0x00000001, 32);
radeon_enc_code_fixed_bits(enc, 0x0, 1);
radeon_enc_code_fixed_bits(enc, nalRefIdc, 2);
radeon_enc_code_fixed_bits(enc, 14, 5);
radeon_enc_byte_align(enc);
radeon_enc_set_emulation_prevention(enc, true);
radeon_enc_code_fixed_bits(enc, 0x1, 1);
radeon_enc_code_fixed_bits(enc, enc->enc_pic.is_idr ? 0x1 : 0x0, 1);
radeon_enc_code_fixed_bits(enc, 0x0, 6);
radeon_enc_code_fixed_bits(enc, 0x1, 1);
radeon_enc_code_fixed_bits(enc, 0x0, 3);
radeon_enc_code_fixed_bits(enc, 0x0, 4);
radeon_enc_code_fixed_bits(enc, pattern.temporal_id, 3);
radeon_enc_code_fixed_bits(enc, 0x0, 1);
radeon_enc_code_fixed_bits(enc, 0x0, 1);
radeon_enc_code_fixed_bits(enc, 0x0, 1);
radeon_enc_code_fixed_bits(enc, 0x3, 2);
if(nalRefIdc != 0)
{
radeon_enc_code_fixed_bits(enc, 0x0, 1);
radeon_enc_code_fixed_bits(enc, 0x0, 1);
radeon_enc_code_fixed_bits(enc, 0x1, 1);
radeon_enc_byte_align(enc);
}
radeon_enc_flush_headers(enc);
*size_in_bytes = (enc->bits_output + 7) / 8;
RADEON_ENC_END();
}
static void radeon_enc_nalu_sei(struct radeon_encoder *enc)
{
unsigned number_of_layers;
rvcn_temporal_layer_pattern_table_t table_info;
table_info = rvcn_temporal_layer_pattern_tables[enc->enc_pic.layer_ctrl.num_temporal_layers - 1];
number_of_layers = table_info.pattern_size;
RADEON_ENC_BEGIN(enc->cmd.nalu);
RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SEI);
unsigned *size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++];
radeon_enc_reset(enc);
radeon_enc_set_emulation_prevention(enc, false);
radeon_enc_code_fixed_bits(enc, 0x00000001, 32);
radeon_enc_code_fixed_bits(enc, 0x6, 8);
radeon_enc_byte_align(enc);
radeon_enc_set_emulation_prevention(enc, true);
/* save the current position for later */
unsigned position = enc->cs.current.cdw;
unsigned shifter = enc->shifter;
unsigned bits_in_shifter = enc->bits_in_shifter;
unsigned num_zeros = enc->num_zeros;
unsigned byte_index = enc->byte_index;
unsigned bits_output = enc->bits_output;
bool emulation_prevention = enc->emulation_prevention;
/* temporarily fill out the payload type and size */
radeon_enc_code_fixed_bits(enc, 24, 8);
radeon_enc_code_fixed_bits(enc, 0, 8);
unsigned svc_start_offset = enc->bits_size;
radeon_enc_code_fixed_bits(enc, 0x0, 1);
radeon_enc_code_fixed_bits(enc, 0x0, 1);
radeon_enc_code_fixed_bits(enc, 0x0, 1);
radeon_enc_code_ue(enc, number_of_layers - 1);
for(int i = 0; i < number_of_layers; i++ )
{
rvcn_temporal_layer_pattern_entry_t pattern = table_info.pattern_table[i];
radeon_enc_code_ue(enc, i);
radeon_enc_code_fixed_bits(enc, 0x0, 6);
radeon_enc_code_fixed_bits(enc, 0x0, 1);
radeon_enc_code_fixed_bits(enc, 0x0, 3);
radeon_enc_code_fixed_bits(enc, 0x0, 4);
radeon_enc_code_fixed_bits(enc, pattern.temporal_id, 3);
radeon_enc_code_fixed_bits(enc, 0x0, 1);
radeon_enc_code_fixed_bits(enc, 0x0, 1);
radeon_enc_code_fixed_bits(enc, 0x0, 1);
radeon_enc_code_fixed_bits(enc, 0x0, 1);
radeon_enc_code_fixed_bits(enc, 0x0, 1);
radeon_enc_code_fixed_bits(enc, 0x0, 1);
radeon_enc_code_fixed_bits(enc, 0x0, 1);
radeon_enc_code_fixed_bits(enc, 0x0, 1);
radeon_enc_code_fixed_bits(enc, 0x0, 1);
radeon_enc_code_fixed_bits(enc, 0x0, 1);
radeon_enc_code_fixed_bits(enc, 0x0, 1);
radeon_enc_code_fixed_bits(enc, 0x0, 1);
radeon_enc_code_fixed_bits(enc, 0x0, 1);
radeon_enc_code_ue(enc, 0);
radeon_enc_code_ue(enc, 0);
}
unsigned svc_size = ((enc->bits_size - svc_start_offset) + 7) / 8;
unsigned aligned = (32 - enc->bits_in_shifter) % 8;
if (aligned > 0)
radeon_enc_code_fixed_bits(enc, 0x1, 1);
radeon_enc_byte_align(enc);
radeon_enc_code_fixed_bits(enc, 0x1, 1);
radeon_enc_byte_align(enc);
/* store our current state, and go to the beginning to write the size */
unsigned position2 = enc->cs.current.cdw;
unsigned shifter2 = enc->shifter;
unsigned bits_in_shifter2 = enc->bits_in_shifter;
unsigned num_zeros2 = enc->num_zeros;
unsigned byte_index2 = enc->byte_index;
unsigned bits_output2 = enc->bits_output;
bool emulation_prevention2 = enc->emulation_prevention;
enc->cs.current.cdw = position;
enc->shifter = shifter;
enc->bits_in_shifter = bits_in_shifter;
enc->num_zeros = num_zeros;
enc->byte_index = byte_index;
enc->bits_output = bits_output;
enc->emulation_prevention = emulation_prevention;
radeon_enc_output_one_byte(enc, 24);
radeon_enc_output_one_byte(enc, svc_size);
/* restore our state */
enc->cs.current.cdw = position2;
enc->shifter = shifter2;
enc->bits_in_shifter = bits_in_shifter2;
enc->num_zeros = num_zeros2;
enc->byte_index = byte_index2;
enc->bits_output = bits_output2;
enc->emulation_prevention = emulation_prevention2;
radeon_enc_flush_headers(enc);
*size_in_bytes = (enc->bits_output + 7) / 8;
RADEON_ENC_END();
}
static void radeon_enc_nalu_pps(struct radeon_encoder *enc)
{
RADEON_ENC_BEGIN(enc->cmd.nalu);
@ -1140,7 +1303,11 @@ static void begin(struct radeon_encoder *enc)
static void radeon_enc_headers_h264(struct radeon_encoder *enc)
{
if (enc->enc_pic.layer_ctrl.num_temporal_layers > 1)
enc->nalu_prefix(enc);
if (enc->enc_pic.is_idr) {
if (enc->enc_pic.layer_ctrl.num_temporal_layers > 1)
enc->nalu_sei(enc);
enc->nalu_sps(enc);
enc->nalu_pps(enc);
}
@ -1223,6 +1390,8 @@ void radeon_enc_1_2_init(struct radeon_encoder *enc)
enc->encode_params = radeon_enc_encode_params;
enc->encode_params_codec_spec = radeon_enc_encode_params_h264;
enc->encode_headers = radeon_enc_headers_h264;
enc->nalu_prefix = radeon_enc_nalu_prefix;
enc->nalu_sei = radeon_enc_nalu_sei;
} else if (u_reduce_video_profile(enc->base.profile) == PIPE_VIDEO_FORMAT_HEVC) {
enc->session_init = radeon_enc_session_init_hevc;
enc->slice_control = radeon_enc_slice_control_hevc;