diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 494ad436cbf..297b0f0ac81 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -577,6 +577,7 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device .KHR_video_queue = !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE), .KHR_video_decode_queue = !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE), .KHR_video_decode_h264 = VIDEO_CODEC_H264DEC && !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE), + .KHR_video_decode_h265 = VIDEO_CODEC_H265DEC && !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE), .KHR_vulkan_memory_model = true, .KHR_workgroup_memory_explicit_layout = true, .KHR_zero_initialize_workgroup_memory = true, @@ -2973,7 +2974,7 @@ radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, ui VkQueueFamilyVideoPropertiesKHR *prop = (VkQueueFamilyVideoPropertiesKHR *)ext; if (pQueueFamilyProperties[i].queueFamilyProperties.queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) - prop->videoCodecOperations = VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR; + prop->videoCodecOperations = VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR | VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR; break; } default: diff --git a/src/amd/vulkan/radv_video.c b/src/amd/vulkan/radv_video.c index 0fae896dc15..921f554ec3d 100644 --- a/src/amd/vulkan/radv_video.c +++ b/src/amd/vulkan/radv_video.c @@ -32,6 +32,7 @@ #include "ac_uvd_dec.h" #define NUM_H264_REFS 17 +#define NUM_H265_REFS 8 #define FB_BUFFER_OFFSET 0x1000 #define FB_BUFFER_SIZE 2048 #define FB_BUFFER_SIZE_TONGA (2048 * 64) @@ -138,6 +139,56 @@ static unsigned calc_ctx_size_h264_perf(struct radv_video_session *vid) return ctx_size; } +static unsigned calc_ctx_size_h265_main(struct radv_video_session *vid) +{ + unsigned width = align(vid->vk.max_coded.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(vid->vk.max_coded.height, VL_MACROBLOCK_HEIGHT); + + unsigned max_references = vid->vk.max_dpb_slots + 1; + + if (vid->vk.max_coded.width * vid->vk.max_coded.height >= 4096 * 2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); + + width = align(width, 16); + height = align(height, 16); + return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024; +} + +static unsigned calc_ctx_size_h265_main10(struct radv_video_session *vid) +{ + unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb; + unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size; + unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4); + + unsigned width = align(vid->vk.max_coded.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(vid->vk.max_coded.height, VL_MACROBLOCK_HEIGHT); + unsigned coeff_10bit = 2; + + unsigned max_references = vid->vk.max_dpb_slots + 1; + + if (vid->vk.max_coded.width * vid->vk.max_coded.height >= 4096 * 2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); + + /* 64x64 is the maximum ctb size. */ + log2_ctb_size = 6; + + width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; + height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; + + num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4); + context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256); + max_mb_address = (unsigned)ceil(height * 8 / 2048.0); + + cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb; + db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024); + + return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size; +} + VkResult radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR *pCreateInfo, @@ -167,16 +218,20 @@ radv_CreateVideoSessionKHR(VkDevice _device, case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: vid->stream_type = RDECODE_CODEC_H264_PERF; break; + case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: + vid->stream_type = RDECODE_CODEC_H265; + break; default: return VK_ERROR_FEATURE_NOT_PRESENT; } vid->stream_handle = si_vid_alloc_stream_handle(); - vid->dbg_frame_cnt = 0; vid->dpb_type = DPB_MAX_RES; vid->db_alignment = (device->physical_device->rad_info.family >= CHIP_RENOIR && - vid->vk.max_coded.width > 32 && 0) ? 64 : 32; + vid->vk.max_coded.width > 32 && + (vid->stream_type == RDECODE_CODEC_H265 && + vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)) ? 64 : 32; *pVideoSession = radv_video_session_to_handle(vid); return VK_SUCCESS; @@ -249,6 +304,9 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: cap = &pdevice->rad_info.dec_caps.codec_info[RADV_VIDEO_FORMAT_MPEG4_AVC]; break; + case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: + cap = &pdevice->rad_info.dec_caps.codec_info[RADV_VIDEO_FORMAT_HEVC]; + break; default: unreachable("unsupported operation"); } @@ -283,6 +341,16 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION; break; } + case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: { + struct VkVideoDecodeH265CapabilitiesKHR *ext = (struct VkVideoDecodeH265CapabilitiesKHR *) + vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_H265_CAPABILITIES_KHR); + pCapabilities->maxDpbSlots = NUM_H264_REFS; + pCapabilities->maxActiveReferencePictures = NUM_H265_REFS; + ext->maxLevelIdc = 51; + strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME); + pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION; + break; + } default: break; } @@ -323,13 +391,37 @@ radv_GetPhysicalDeviceVideoFormatPropertiesKHR(VkPhysicalDevice physicalDevice, *pVideoFormatPropertyCount = 1; + bool need_10bit = false; + const struct VkVideoProfileListInfoKHR *prof_list = (struct VkVideoProfileListInfoKHR *) + vk_find_struct_const(pVideoFormatInfo->pNext, VIDEO_PROFILE_LIST_INFO_KHR); + if (prof_list) { + for (unsigned i = 0; i < prof_list->profileCount; i++) { + const VkVideoProfileInfoKHR *profile = &prof_list->pProfiles[i]; + if (profile->lumaBitDepth & VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR) + need_10bit = true; + } + } + + (*pVideoFormatPropertyCount)++; + if (need_10bit) + (*pVideoFormatPropertyCount)++; + if (!pVideoFormatProperties) return VK_SUCCESS; - pVideoFormatProperties[0].format = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM; - pVideoFormatProperties[0].imageType = VK_IMAGE_TYPE_2D; - pVideoFormatProperties[0].imageTiling = VK_IMAGE_TILING_OPTIMAL; - pVideoFormatProperties[0].imageUsageFlags = pVideoFormatInfo->imageUsage; + int idx = 0; + pVideoFormatProperties[idx].format = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM; + pVideoFormatProperties[idx].imageType = VK_IMAGE_TYPE_2D; + pVideoFormatProperties[idx].imageTiling = VK_IMAGE_TILING_OPTIMAL; + pVideoFormatProperties[idx].imageUsageFlags = pVideoFormatInfo->imageUsage; + idx++; + if (need_10bit) { + pVideoFormatProperties[idx].format = VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16; + pVideoFormatProperties[idx].imageType = VK_IMAGE_TYPE_2D; + pVideoFormatProperties[idx].imageTiling = VK_IMAGE_TILING_OPTIMAL; + pVideoFormatProperties[idx].imageUsageFlags = pVideoFormatInfo->imageUsage; + idx++; + } return VK_SUCCESS; } @@ -351,7 +443,7 @@ radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, if (device->physical_device->rad_info.family >= CHIP_POLARIS10) num_memory_reqs++; - if (vid->stream_type == RDECODE_CODEC_H264_PERF) + if (vid->stream_type == RDECODE_CODEC_H264_PERF || vid->stream_type == RDECODE_CODEC_H265) num_memory_reqs++; *pMemoryRequirementsCount = num_memory_reqs; @@ -375,7 +467,18 @@ radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, pMemoryRequirements[idx].memoryRequirements.alignment = 0; pMemoryRequirements[idx].memoryRequirements.memoryTypeBits = memory_type_bits; } + if (vid->stream_type == RDECODE_CODEC_H265) { + uint32_t ctx_size; + if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10) + ctx_size = calc_ctx_size_h265_main10(vid); + else + ctx_size = calc_ctx_size_h265_main(vid); + pMemoryRequirements[idx].memoryBindIndex = RADV_BIND_DECODER_CTX; + pMemoryRequirements[idx].memoryRequirements.size = align(ctx_size, 4096); + pMemoryRequirements[idx].memoryRequirements.alignment = 0; + pMemoryRequirements[idx].memoryRequirements.memoryTypeBits = memory_type_bits; + } return VK_SUCCESS; } @@ -587,7 +690,159 @@ static rvcn_dec_message_avc_t get_h264_msg(struct radv_video_session *vid, return result; } -static bool rvcn_dec_message_decode(struct radv_video_session *vid, +static rvcn_dec_message_hevc_t get_h265_msg(struct radv_device *device, + struct radv_video_session *vid, + struct radv_video_session_params *params, + const struct VkVideoDecodeInfoKHR *frame_info, + void *it_ptr) +{ + rvcn_dec_message_hevc_t result; + int i, j; + const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info = + vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H265_PICTURE_INFO_KHR); + memset(&result, 0, sizeof(result)); + + const StdVideoH265SequenceParameterSet *sps = vk_video_find_h265_dec_std_sps(¶ms->vk, h265_pic_info->pStdPictureInfo->sps_video_parameter_set_id); + const StdVideoH265PictureParameterSet *pps = vk_video_find_h265_dec_std_pps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id); + + result.sps_info_flags = 0; + result.sps_info_flags |= sps->flags.scaling_list_enabled_flag << 0; + result.sps_info_flags |= sps->flags.amp_enabled_flag << 1; + result.sps_info_flags |= sps->flags.sample_adaptive_offset_enabled_flag << 2; + result.sps_info_flags |= sps->flags.pcm_enabled_flag << 3; + result.sps_info_flags |= sps->flags.pcm_loop_filter_disabled_flag << 4; + result.sps_info_flags |= sps->flags.long_term_ref_pics_present_flag << 5; + result.sps_info_flags |= sps->flags.sps_temporal_mvp_enabled_flag << 6; + result.sps_info_flags |= sps->flags.strong_intra_smoothing_enabled_flag << 7; + result.sps_info_flags |= sps->flags.separate_colour_plane_flag << 8; + + if (device->physical_device->rad_info.family == CHIP_CARRIZO) + result.sps_info_flags |= 1 << 9; + + result.chroma_format = sps->chroma_format_idc; + result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8; + result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8; + result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4; + result.sps_max_dec_pic_buffering_minus1 = sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[0]; + result.log2_min_luma_coding_block_size_minus3 = + sps->log2_min_luma_coding_block_size_minus3; + result.log2_diff_max_min_luma_coding_block_size = + sps->log2_diff_max_min_luma_coding_block_size; + result.log2_min_transform_block_size_minus2 = + sps->log2_min_luma_transform_block_size_minus2; + result.log2_diff_max_min_transform_block_size = + sps->log2_diff_max_min_luma_transform_block_size; + result.max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter; + result.max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra; + if (sps->flags.pcm_enabled_flag) { + result.pcm_sample_bit_depth_luma_minus1 = sps->pcm_sample_bit_depth_luma_minus1; + result.pcm_sample_bit_depth_chroma_minus1 = sps->pcm_sample_bit_depth_chroma_minus1; + result.log2_min_pcm_luma_coding_block_size_minus3 = sps->log2_min_pcm_luma_coding_block_size_minus3; + result.log2_diff_max_min_pcm_luma_coding_block_size = sps->log2_diff_max_min_pcm_luma_coding_block_size; + } + result.num_short_term_ref_pic_sets = sps->num_short_term_ref_pic_sets; + + result.pps_info_flags = 0; + result.pps_info_flags |= pps->flags.dependent_slice_segments_enabled_flag << 0; + result.pps_info_flags |= pps->flags.output_flag_present_flag << 1; + result.pps_info_flags |= pps->flags.sign_data_hiding_enabled_flag << 2; + result.pps_info_flags |= pps->flags.cabac_init_present_flag << 3; + result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 4; + result.pps_info_flags |= pps->flags.transform_skip_enabled_flag << 5; + result.pps_info_flags |= pps->flags.cu_qp_delta_enabled_flag << 6; + result.pps_info_flags |= pps->flags.pps_slice_chroma_qp_offsets_present_flag << 7; + result.pps_info_flags |= pps->flags.weighted_pred_flag << 8; + result.pps_info_flags |= pps->flags.weighted_bipred_flag << 9; + result.pps_info_flags |= pps->flags.transquant_bypass_enabled_flag << 10; + result.pps_info_flags |= pps->flags.tiles_enabled_flag << 11; + result.pps_info_flags |= pps->flags.entropy_coding_sync_enabled_flag << 12; + result.pps_info_flags |= pps->flags.uniform_spacing_flag << 13; + result.pps_info_flags |= pps->flags.loop_filter_across_tiles_enabled_flag << 14; + result.pps_info_flags |= pps->flags.pps_loop_filter_across_slices_enabled_flag << 15; + result.pps_info_flags |= pps->flags.deblocking_filter_override_enabled_flag << 16; + result.pps_info_flags |= pps->flags.pps_deblocking_filter_disabled_flag << 17; + result.pps_info_flags |= pps->flags.lists_modification_present_flag << 18; + result.pps_info_flags |= pps->flags.slice_segment_header_extension_present_flag << 19; + + result.num_extra_slice_header_bits = pps->num_extra_slice_header_bits; + result.num_long_term_ref_pic_sps = sps->num_long_term_ref_pics_sps; + result.num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active_minus1; + result.num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active_minus1; + result.pps_cb_qp_offset = pps->pps_cb_qp_offset; + result.pps_cr_qp_offset = pps->pps_cr_qp_offset; + result.pps_beta_offset_div2 = pps->pps_beta_offset_div2; + result.pps_tc_offset_div2 = pps->pps_tc_offset_div2; + result.diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth; + result.num_tile_columns_minus1 = pps->num_tile_columns_minus1; + result.num_tile_rows_minus1 = pps->num_tile_rows_minus1; + result.log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level_minus2; + result.init_qp_minus26 = pps->init_qp_minus26; + + for (i = 0; i < 19; ++i) + result.column_width_minus1[i] = pps->column_width_minus1[i]; + + for (i = 0; i < 21; ++i) + result.row_height_minus1[i] = pps->row_height_minus1[i]; + + result.num_delta_pocs_ref_rps_idx = h265_pic_info->pStdPictureInfo->NumDeltaPocsOfRefRpsIdx; + result.curr_poc = h265_pic_info->pStdPictureInfo->PicOrderCntVal; + + memset(result.poc_list, 0, 16 * sizeof(int)); + memset(result.ref_pic_list, 0x7f, 16); + for (i = 0; i < frame_info->referenceSlotCount; i++) { + const struct VkVideoDecodeH265DpbSlotInfoKHR *dpb_slot = + vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR); + int idx = frame_info->pReferenceSlots[i].slotIndex; + result.poc_list[idx] = dpb_slot->pStdReferenceInfo->PicOrderCntVal; + result.ref_pic_list[idx] = idx; + } + result.curr_idx = frame_info->pSetupReferenceSlot->slotIndex; + + for (i = 0; i < 8; ++i) + result.ref_pic_set_st_curr_before[i] = h265_pic_info->pStdPictureInfo->RefPicSetStCurrBefore[i]; + + for (i = 0; i < 8; ++i) + result.ref_pic_set_st_curr_after[i] = h265_pic_info->pStdPictureInfo->RefPicSetStCurrAfter[i]; + + for (i = 0; i < 8; ++i) + result.ref_pic_set_lt_curr[i] = h265_pic_info->pStdPictureInfo->RefPicSetLtCurr[i]; + + if (sps->flags.sps_scaling_list_data_present_flag) { + for (i = 0; i < 6; ++i) + result.ucScalingListDCCoefSizeID2[i] = sps->pScalingLists->ScalingListDCCoef16x16[i]; + + for (i = 0; i < 2; ++i) + result.ucScalingListDCCoefSizeID3[i] = sps->pScalingLists->ScalingListDCCoef32x32[i]; + + memcpy(it_ptr, sps->pScalingLists->ScalingList4x4, 6 * 16); + memcpy((char *)it_ptr + 96, sps->pScalingLists->ScalingList8x8, 6 * 64); + memcpy((char *)it_ptr + 480, sps->pScalingLists->ScalingList16x16, 6 * 64); + memcpy((char *)it_ptr + 864, sps->pScalingLists->ScalingList32x32, 2 * 64); + } + + for (i = 0; i < 2; i++) { + for (j = 0; j < 15; j++) + result.direct_reflist[i][j] = 0xff;//pic->RefPicList[i][j]; + } + + if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10) { + if (vid->vk.picture_format == VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) { + result.p010_mode = 1; + result.msb_mode = 1; + } else { + result.p010_mode = 0; + result.luma_10to8 = 5; + result.chroma_10to8 = 5; + result.hevc_reserved[0] = 4; /* sclr_luma10to8 */ + result.hevc_reserved[1] = 4; /* sclr_chroma10to8 */ + } + } + + return result; +} + +static bool rvcn_dec_message_decode(struct radv_device *device, + struct radv_video_session *vid, struct radv_video_session_params *params, void *ptr, void *it_ptr, @@ -655,6 +910,11 @@ static bool rvcn_dec_message_decode(struct radv_video_session *vid, decode->sw_ctxt_size = RDECODE_SESSION_CONTEXT_SIZE; decode->db_pitch = align(frame_info->dstPictureResource.codedExtent.width, vid->db_alignment); + if (device->physical_device->rad_info.family >= CHIP_NAVI21 && + (vid->stream_type == RDECODE_CODEC_H265 && + vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)) + decode->db_aligned_height = align(frame_info->dstPictureResource.codedExtent.height, 64); + decode->db_surf_tile_config = 0; decode->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w; @@ -693,6 +953,12 @@ static bool rvcn_dec_message_decode(struct radv_video_session *vid, index_codec->message_id = RDECODE_MESSAGE_AVC; break; } + case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: { + rvcn_dec_message_hevc_t hevc = get_h265_msg(device, vid, params, frame_info, it_ptr); + memcpy(codec, (void *)&hevc, sizeof(rvcn_dec_message_hevc_t)); + index_codec->message_id = RDECODE_MESSAGE_HEVC; + break; + } default: unreachable("unknown operation"); } @@ -1091,6 +1357,9 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: size += sizeof(rvcn_dec_message_avc_t); break; + case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: + size += sizeof(rvcn_dec_message_hevc_t); + break; default: unreachable("unsupported codec."); } @@ -1109,7 +1378,7 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, msg_bo = cmd_buffer->upload.upload_bo; uint32_t slice_offset; - rvcn_dec_message_decode(vid, params, ptr, it_ptr, &slice_offset, frame_info); + rvcn_dec_message_decode(cmd_buffer->device, vid, params, ptr, it_ptr, &slice_offset, frame_info); rvcn_dec_message_feedback(fb_ptr); send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset); send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, msg_bo, out_offset);