From b5963fc1f00a66f44984abde1d807fdd97df5f06 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 25 May 2023 11:33:49 +1000 Subject: [PATCH] radv/video: fix h264/265 dpb usage. This seems to be the best compromise I can come up with so far. I can't figure out to get the tier2 programming to match between 264 and 265, maybe they are just programmed different here, good old firmware. Fixes: 1693c03a3963 ("radv/video: add initial h264 decoder for VCN") Reviewed-by: Lynne Part-of: --- src/amd/vulkan/radv_video.c | 51 +++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/src/amd/vulkan/radv_video.c b/src/amd/vulkan/radv_video.c index 120abd6e761..9f72f22888d 100644 --- a/src/amd/vulkan/radv_video.c +++ b/src/amd/vulkan/radv_video.c @@ -829,25 +829,25 @@ static rvcn_dec_message_avc_t get_h264_msg(struct radv_video_session *vid, const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot = vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR); - result.frame_num_list[idx] = idx; - result.field_order_cnt_list[idx][0] = dpb_slot->pStdReferenceInfo->PicOrderCnt[0]; - result.field_order_cnt_list[idx][1] = dpb_slot->pStdReferenceInfo->PicOrderCnt[1]; + result.frame_num_list[i] = idx; + result.field_order_cnt_list[i][0] = dpb_slot->pStdReferenceInfo->PicOrderCnt[0]; + result.field_order_cnt_list[i][1] = dpb_slot->pStdReferenceInfo->PicOrderCnt[1]; - result.ref_frame_list[idx] = idx; + result.ref_frame_list[i] = idx; if (dpb_slot->pStdReferenceInfo->flags.top_field_flag) - result.used_for_reference_flags |= (1 << (2 * idx)); + result.used_for_reference_flags |= (1 << (2 * i)); if (dpb_slot->pStdReferenceInfo->flags.bottom_field_flag) - result.used_for_reference_flags |= (1 << (2 * idx + 1)); + result.used_for_reference_flags |= (1 << (2 * i + 1)); if (!dpb_slot->pStdReferenceInfo->flags.top_field_flag && !dpb_slot->pStdReferenceInfo->flags.bottom_field_flag) - result.used_for_reference_flags |= (3 << (2 * idx)); + result.used_for_reference_flags |= (3 << (2 * i)); if (dpb_slot->pStdReferenceInfo->flags.used_for_long_term_reference) - result.ref_frame_list[idx] |= 0x80; + result.ref_frame_list[i] |= 0x80; if (dpb_slot->pStdReferenceInfo->flags.is_non_existing) - result.non_existing_frame_flags |= 1 << idx; + result.non_existing_frame_flags |= 1 << i; } result.curr_pic_ref_frame_num = frame_info->referenceSlotCount; @@ -1131,6 +1131,7 @@ static bool rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, } *slice_offset = 0; + bool tier_2_use_slot = false; switch (vid->vk.op) { case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: { rvcn_dec_message_avc_t avc = get_h264_msg(vid, params, frame_info, slice_offset, &decode->width_in_samples, &decode->height_in_samples, it_ptr); @@ -1142,6 +1143,7 @@ static bool rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, rvcn_dec_message_hevc_t hevc = get_h265_msg(device, vid, params, frame_info, it_ptr); memcpy(codec, (void *)&hevc, sizeof(rvcn_dec_message_hevc_t)); index_codec->message_id = RDECODE_MESSAGE_HEVC; + tier_2_use_slot = true; break; } default: @@ -1157,7 +1159,7 @@ static bool rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, for (int i = 0; i < frame_info->referenceSlotCount; i++) { struct radv_image_view *f_dpb_iv = radv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding); struct radv_image *dpb_img = f_dpb_iv->image; - int idx = frame_info->pReferenceSlots[i].slotIndex; + int idx = tier_2_use_slot ? frame_info->pReferenceSlots[i].slotIndex : i; radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dpb_img->bindings[0].bo); addr = radv_buffer_get_va(dpb_img->bindings[0].bo) + dpb_img->bindings[0].offset; @@ -1269,6 +1271,7 @@ static struct ruvd_h264 get_uvd_h264_msg(struct radv_video_session *vid, memset(result.scaling_list_8x8, 0x10, 2*64); } + memset(it_ptr, 0, IT_SCALING_TABLE_SIZE); memcpy(it_ptr, result.scaling_list_4x4, 6 * 16); memcpy((char *)it_ptr + 96, result.scaling_list_8x8, 2 * 64); @@ -1281,15 +1284,23 @@ static struct ruvd_h264 get_uvd_h264_msg(struct radv_video_session *vid, result.frame_num = h264_pic_info->pStdPictureInfo->frame_num; result.num_ref_frames = sps->max_num_ref_frames; + memset(result.ref_frame_list, 0xff, sizeof(unsigned char) * 16); + memset(result.frame_num_list, 0, sizeof(unsigned int) * 16); for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) { int idx = frame_info->pReferenceSlots[i].slotIndex; const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot = vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR); - result.frame_num_list[idx] = dpb_slot->pStdReferenceInfo->FrameNum; - result.field_order_cnt_list[idx][0] = dpb_slot->pStdReferenceInfo->PicOrderCnt[0]; - result.field_order_cnt_list[idx][1] = dpb_slot->pStdReferenceInfo->PicOrderCnt[1]; + result.frame_num_list[i] = dpb_slot->pStdReferenceInfo->FrameNum; + result.field_order_cnt_list[i][0] = dpb_slot->pStdReferenceInfo->PicOrderCnt[0]; + result.field_order_cnt_list[i][1] = dpb_slot->pStdReferenceInfo->PicOrderCnt[1]; + + result.ref_frame_list[i] = idx; + + if (dpb_slot->pStdReferenceInfo->flags.used_for_long_term_reference) + result.ref_frame_list[i] |= 0x80; } + result.curr_pic_ref_frame_num = frame_info->referenceSlotCount; result.decoded_pic_idx = frame_info->pSetupReferenceSlot->slotIndex; return result; @@ -1340,12 +1351,14 @@ static struct ruvd_h265 get_uvd_h265_msg(struct radv_device *device, sps->log2_diff_max_min_luma_transform_block_size; result.max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter; result.max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra; - result.pcm_sample_bit_depth_luma_minus1 = sps->pcm_sample_bit_depth_luma_minus1; - result.pcm_sample_bit_depth_chroma_minus1 = sps->pcm_sample_bit_depth_chroma_minus1; - result.log2_min_pcm_luma_coding_block_size_minus3 = - sps->log2_min_pcm_luma_coding_block_size_minus3; - result.log2_diff_max_min_pcm_luma_coding_block_size = - sps->log2_diff_max_min_pcm_luma_coding_block_size; + if (sps->flags.pcm_enabled_flag) { + result.pcm_sample_bit_depth_luma_minus1 = sps->pcm_sample_bit_depth_luma_minus1; + result.pcm_sample_bit_depth_chroma_minus1 = sps->pcm_sample_bit_depth_chroma_minus1; + result.log2_min_pcm_luma_coding_block_size_minus3 = + sps->log2_min_pcm_luma_coding_block_size_minus3; + result.log2_diff_max_min_pcm_luma_coding_block_size = + sps->log2_diff_max_min_pcm_luma_coding_block_size; + } result.num_short_term_ref_pic_sets = sps->num_short_term_ref_pic_sets; result.pps_info_flags = 0;