diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 64392942be6..ba867e01bcb 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -4324,10 +4324,15 @@ struct anv_vid_mem { }; #define ANV_VIDEO_MEM_REQS_H264 4 +#define ANV_VIDEO_MEM_REQS_H265 9 #define ANV_MB_WIDTH 16 #define ANV_MB_HEIGHT 16 +#define ANV_VIDEO_H264_MAX_NUM_REF_FRAME 16 +#define ANV_VIDEO_H265_MAX_NUM_REF_FRAME 16 +#define ANV_VIDEO_H265_HCP_NUM_REF_FRAME 8 +#define ANV_MAX_H265_CTB_SIZE 64 -enum { +enum anv_vid_mem_h264_types { ANV_VID_MEM_H264_INTRA_ROW_STORE, ANV_VID_MEM_H264_DEBLOCK_FILTER_ROW_STORE, ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH, @@ -4335,11 +4340,24 @@ enum { ANV_VID_MEM_H264_MAX, }; +enum anv_vid_mem_h265_types { + ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_LINE, + ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_LINE, + ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_COLUMN, + ANV_VID_MEM_H265_METADATA_LINE, + ANV_VID_MEM_H265_METADATA_TILE_LINE, + ANV_VID_MEM_H265_METADATA_TILE_COLUMN, + ANV_VID_MEM_H265_SAO_LINE, + ANV_VID_MEM_H265_SAO_TILE_LINE, + ANV_VID_MEM_H265_SAO_TILE_COLUMN, + ANV_VID_MEM_H265_MAX, +}; + struct anv_video_session { struct vk_video_session vk; /* the decoder needs some private memory allocations */ - struct anv_vid_mem vid_mem[ANV_VID_MEM_H264_MAX]; + struct anv_vid_mem vid_mem[ANV_VID_MEM_H265_MAX]; }; struct anv_video_session_params { diff --git a/src/intel/vulkan/anv_video.c b/src/intel/vulkan/anv_video.c index 38a3b09b2ea..09e1c5a1306 100644 --- a/src/intel/vulkan/anv_video.c +++ b/src/intel/vulkan/anv_video.c @@ -114,10 +114,6 @@ anv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, { pCapabilities->minBitstreamBufferOffsetAlignment = 32; pCapabilities->minBitstreamBufferSizeAlignment = 32; - pCapabilities->pictureAccessGranularity.width = ANV_MB_WIDTH; - pCapabilities->pictureAccessGranularity.height = ANV_MB_HEIGHT; - pCapabilities->minCodedExtent.width = ANV_MB_WIDTH; - pCapabilities->minCodedExtent.height = ANV_MB_HEIGHT; pCapabilities->maxCodedExtent.width = 4096; pCapabilities->maxCodedExtent.height = 4096; pCapabilities->flags = VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR; @@ -132,7 +128,11 @@ anv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, struct VkVideoDecodeH264CapabilitiesKHR *ext = (struct VkVideoDecodeH264CapabilitiesKHR *) vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_H264_CAPABILITIES_KHR); pCapabilities->maxDpbSlots = 17; - pCapabilities->maxActiveReferencePictures = 16; + pCapabilities->maxActiveReferencePictures = ANV_VIDEO_H264_MAX_NUM_REF_FRAME; + pCapabilities->pictureAccessGranularity.width = ANV_MB_WIDTH; + pCapabilities->pictureAccessGranularity.height = ANV_MB_HEIGHT; + pCapabilities->minCodedExtent.width = ANV_MB_WIDTH; + pCapabilities->minCodedExtent.height = ANV_MB_HEIGHT; ext->fieldOffsetGranularity.x = 0; ext->fieldOffsetGranularity.y = 0; @@ -141,6 +141,23 @@ anv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION; break; } + case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: { + struct VkVideoDecodeH265CapabilitiesKHR *ext = (struct VkVideoDecodeH265CapabilitiesKHR *) + vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_H265_CAPABILITIES_KHR); + + pCapabilities->pictureAccessGranularity.width = ANV_MAX_H265_CTB_SIZE; + pCapabilities->pictureAccessGranularity.height = ANV_MAX_H265_CTB_SIZE; + pCapabilities->minCodedExtent.width = ANV_MAX_H265_CTB_SIZE; + pCapabilities->minCodedExtent.height = ANV_MAX_H265_CTB_SIZE; + pCapabilities->maxDpbSlots = ANV_VIDEO_H265_MAX_NUM_REF_FRAME; + pCapabilities->maxActiveReferencePictures = ANV_VIDEO_H265_HCP_NUM_REF_FRAME; + + ext->maxLevelIdc = STD_VIDEO_H265_LEVEL_IDC_6_2; + + strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME); + pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION; + break; + } default: break; } @@ -196,6 +213,69 @@ get_h264_video_session_mem_reqs(struct anv_video_session *vid, mem_reqs[3].memoryRequirements.memoryTypeBits = memory_types; } +static void +get_h265_video_session_mem_reqs(struct anv_video_session *vid, + VkVideoSessionMemoryRequirementsKHR *mem_reqs, + uint32_t memory_types) +{ + /* TODO. these sizes can be determined dynamically depending on ctb sizes of each slice. */ + uint32_t size = align(vid->vk.max_coded.width, 32) >> 3; + uint32_t width_in_ctb = align(vid->vk.max_coded.width, ANV_MAX_H265_CTB_SIZE) / ANV_MAX_H265_CTB_SIZE; + uint32_t height_in_ctb = align(vid->vk.max_coded.height, ANV_MAX_H265_CTB_SIZE) / ANV_MAX_H265_CTB_SIZE; + + mem_reqs[0].memoryBindIndex = ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_LINE; + mem_reqs[0].memoryRequirements.size = size << 6; + mem_reqs[0].memoryRequirements.alignment = 4096; + mem_reqs[0].memoryRequirements.memoryTypeBits = memory_types; + + mem_reqs[1].memoryBindIndex = ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_LINE; + mem_reqs[1].memoryRequirements.size = size << 6; + mem_reqs[1].memoryRequirements.alignment = 4096; + mem_reqs[1].memoryRequirements.memoryTypeBits = memory_types; + + size = align(vid->vk.max_coded.height + 6 * height_in_ctb, 32) >> 3; + mem_reqs[2].memoryBindIndex = ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_COLUMN; + mem_reqs[2].memoryRequirements.size = size << 6; + mem_reqs[2].memoryRequirements.alignment = 4096; + mem_reqs[2].memoryRequirements.memoryTypeBits = memory_types; + + size = (((vid->vk.max_coded.width + 15) >> 4) * 188 + width_in_ctb * 9 + 1023) >> 9; + mem_reqs[3].memoryBindIndex = ANV_VID_MEM_H265_METADATA_LINE; + mem_reqs[3].memoryRequirements.size = size << 6; + mem_reqs[3].memoryRequirements.alignment = 4096; + mem_reqs[3].memoryRequirements.memoryTypeBits = memory_types; + + size = (((vid->vk.max_coded.width + 15) >> 4) * 172 + width_in_ctb * 9 + 1023) >> 9; + mem_reqs[4].memoryBindIndex = ANV_VID_MEM_H265_METADATA_TILE_LINE; + mem_reqs[4].memoryRequirements.size = size << 6; + mem_reqs[4].memoryRequirements.alignment = 4096; + mem_reqs[4].memoryRequirements.memoryTypeBits = memory_types; + + size = (((vid->vk.max_coded.height + 15) >> 4) * 176 + height_in_ctb * 89 + 1023) >> 9; + mem_reqs[5].memoryBindIndex = ANV_VID_MEM_H265_METADATA_TILE_COLUMN; + mem_reqs[5].memoryRequirements.size = size << 6; + mem_reqs[5].memoryRequirements.alignment = 4096; + mem_reqs[5].memoryRequirements.memoryTypeBits = memory_types; + + size = align((vid->vk.max_coded.width >> 1) + width_in_ctb * 3, 16) >> 3; + mem_reqs[6].memoryBindIndex = ANV_VID_MEM_H265_SAO_LINE; + mem_reqs[6].memoryRequirements.size = size << 6; + mem_reqs[6].memoryRequirements.alignment = 4096; + mem_reqs[6].memoryRequirements.memoryTypeBits = memory_types; + + size = align((vid->vk.max_coded.width >> 1) + width_in_ctb * 6, 16) >> 3; + mem_reqs[7].memoryBindIndex = ANV_VID_MEM_H265_SAO_TILE_LINE; + mem_reqs[7].memoryRequirements.size = size << 6; + mem_reqs[7].memoryRequirements.alignment = 4096; + mem_reqs[7].memoryRequirements.memoryTypeBits = memory_types; + + size = align((vid->vk.max_coded.height >> 1) + height_in_ctb * 6, 16) >> 3; + mem_reqs[8].memoryBindIndex = ANV_VID_MEM_H265_SAO_TILE_COLUMN; + mem_reqs[8].memoryRequirements.size = size << 6; + mem_reqs[8].memoryRequirements.alignment = 4096; + mem_reqs[8].memoryRequirements.memoryTypeBits = memory_types; +} + VkResult anv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR videoSession, @@ -209,6 +289,9 @@ anv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: *pVideoSessionMemoryRequirementsCount = ANV_VIDEO_MEM_REQS_H264; break; + case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: + *pVideoSessionMemoryRequirementsCount = ANV_VIDEO_MEM_REQS_H265; + break; default: unreachable("unknown codec"); } @@ -220,6 +303,9 @@ anv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: get_h264_video_session_mem_reqs(vid, mem_reqs, memory_types); break; + case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: + get_h265_video_session_mem_reqs(vid, mem_reqs, memory_types); + break; default: unreachable("unknown codec"); } @@ -253,9 +339,9 @@ anv_BindVideoSessionMemoryKHR(VkDevice _device, { ANV_FROM_HANDLE(anv_video_session, vid, videoSession); - assert(bind_mem_count == 4); switch (vid->vk.op) { case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: + case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: for (unsigned i = 0; i < bind_mem_count; i++) { copy_bind(&vid->vid_mem[bind_mem[i].memoryBindIndex], &bind_mem[i]); } diff --git a/src/intel/vulkan/genX_video.c b/src/intel/vulkan/genX_video.c index 253fcfeffb3..3d5dba42ae9 100644 --- a/src/intel/vulkan/genX_video.c +++ b/src/intel/vulkan/genX_video.c @@ -42,7 +42,13 @@ void genX(CmdControlVideoCodingKHR)(VkCommandBuffer commandBuffer, const VkVideoCodingControlInfoKHR *pCodingControlInfo) { + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + if (pCodingControlInfo->flags & VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR) { + anv_batch_emit(&cmd_buffer->batch, GENX(MI_FLUSH_DW), flush) { + flush.VideoPipelineCacheInvalidate = 1; + } + } } void @@ -55,6 +61,746 @@ genX(CmdEndVideoCodingKHR)(VkCommandBuffer commandBuffer, cmd_buffer->video.params = NULL; } +static void +scaling_list(struct anv_cmd_buffer *cmd_buffer, + const StdVideoH265ScalingLists *scaling_list) +{ + /* 4x4, 8x8, 16x16, 32x32 */ + for (uint8_t size = 0; size < 4; size++) { + /* Intra, Inter */ + for (uint8_t pred = 0; pred < 2; pred++) { + /* Y, Cb, Cr */ + for (uint8_t color = 0; color < 3; color++) { + if (size == 3 && color > 0) + continue; + + anv_batch_emit(&cmd_buffer->batch, GENX(HCP_QM_STATE), qm) { + qm.SizeID = size; + qm.PredictionType = pred; + qm.ColorComponent = color; + + qm.DCCoefficient = size > 1 ? + (size == 2 ? scaling_list->ScalingListDCCoef16x16[3 * pred + color] : + scaling_list->ScalingListDCCoef32x32[pred]) : 0; + + if (size == 0) { + for (uint8_t i = 0; i < 4; i++) + for (uint8_t j = 0; j < 4; j++) + qm.QuantizerMatrix8x8[4 * i + j] = + scaling_list->ScalingList4x4[3 * pred + color][4 * i + j]; + } else if (size == 1) { + for (uint8_t i = 0; i < 8; i++) + for (uint8_t j = 0; j < 8; j++) + qm.QuantizerMatrix8x8[8 * i + j] = + scaling_list->ScalingList8x8[3 * pred + color][8 * i + j]; + } else if (size == 2) { + for (uint8_t i = 0; i < 8; i++) + for (uint8_t j = 0; j < 8; j++) + qm.QuantizerMatrix8x8[8 * i + j] = + scaling_list->ScalingList16x16[3 * pred + color][8 * i + j]; + } else if (size == 3) { + for (uint8_t i = 0; i < 8; i++) + for (uint8_t j = 0; j < 8; j++) + qm.QuantizerMatrix8x8[8 * i + j] = + scaling_list->ScalingList32x32[pred][8 * i + j]; + } + } + } + } + } +} + +static void +anv_h265_decode_video(struct anv_cmd_buffer *cmd_buffer, + const VkVideoDecodeInfoKHR *frame_info) +{ + ANV_FROM_HANDLE(anv_buffer, src_buffer, frame_info->srcBuffer); + struct anv_video_session *vid = cmd_buffer->video.vid; + struct anv_video_session_params *params = cmd_buffer->video.params; + + const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info = + vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H265_PICTURE_INFO_KHR); + + const StdVideoH265SequenceParameterSet *sps = + vk_video_find_h265_dec_std_sps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_seq_parameter_set_id); + const StdVideoH265PictureParameterSet *pps = + vk_video_find_h265_dec_std_pps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id); + + struct vk_video_h265_reference ref_slots[2][8] = { 0 }; + uint8_t dpb_idx[ANV_VIDEO_H265_MAX_NUM_REF_FRAME] = { 0,}; + + anv_batch_emit(&cmd_buffer->batch, GENX(MI_FLUSH_DW), flush) { + flush.VideoPipelineCacheInvalidate = 1; + }; + +#if GFX_VER >= 12 + anv_batch_emit(&cmd_buffer->batch, GENX(MI_FORCE_WAKEUP), wake) { + wake.HEVCPowerWellControl = 1; + wake.MaskBits = 768; + } + + anv_batch_emit(&cmd_buffer->batch, GENX(VD_CONTROL_STATE), cs) { + cs.PipelineInitialization = true; + } + + anv_batch_emit(&cmd_buffer->batch, GENX(MFX_WAIT), mfx) { + mfx.MFXSyncControlFlag = 1; + } +#endif + + anv_batch_emit(&cmd_buffer->batch, GENX(HCP_PIPE_MODE_SELECT), sel) { + sel.CodecSelect = Decode; + sel.CodecStandardSelect = HEVC; + } + +#if GFX_VER >= 12 + anv_batch_emit(&cmd_buffer->batch, GENX(MFX_WAIT), mfx) { + mfx.MFXSyncControlFlag = 1; + } +#endif + + const struct anv_image_view *iv = + anv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding); + const struct anv_image *img = iv->image; + + anv_batch_emit(&cmd_buffer->batch, GENX(HCP_SURFACE_STATE), ss) { + ss.SurfacePitch = img->planes[0].primary_surface.isl.row_pitch_B - 1; + ss.SurfaceID = HCP_CurrentDecodedPicture; + ss.SurfaceFormat = PLANAR_420_8; + + ss.YOffsetforUCb = img->planes[1].primary_surface.memory_range.offset / + img->planes[0].primary_surface.isl.row_pitch_B; + +#if GFX_VER >= 11 + ss.DefaultAlphaValue = 0xffff; +#endif + } + +#if GFX_VER >= 12 + /* Seems to need to set same states to ref as decode on gen12 */ + anv_batch_emit(&cmd_buffer->batch, GENX(HCP_SURFACE_STATE), ss) { + ss.SurfacePitch = img->planes[0].primary_surface.isl.row_pitch_B - 1; + ss.SurfaceID = HCP_ReferencePicture; + ss.SurfaceFormat = is_10bit ? P010 : PLANAR_420_8; + + ss.YOffsetforUCb = img->planes[1].primary_surface.memory_range.offset / + img->planes[0].primary_surface.isl.row_pitch_B; + + ss.DefaultAlphaValue = 0xffff; + } +#endif + + anv_batch_emit(&cmd_buffer->batch, GENX(HCP_PIPE_BUF_ADDR_STATE), buf) { + buf.DecodedPictureAddress = + anv_image_address(img, &img->planes[0].primary_surface.memory_range); + + buf.DecodedPictureMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, buf.DecodedPictureAddress.bo, 0), + }; + + buf.DeblockingFilterLineBufferAddress = (struct anv_address) { + vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_LINE].mem->bo, + vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_LINE].offset + }; + + buf.DeblockingFilterLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, buf.DeblockingFilterLineBufferAddress.bo, 0), + }; + + buf.DeblockingFilterTileLineBufferAddress = (struct anv_address) { + vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_LINE].mem->bo, + vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_LINE].offset + }; + + buf.DeblockingFilterTileLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, buf.DeblockingFilterTileLineBufferAddress.bo, 0), + }; + + buf.DeblockingFilterTileColumnBufferAddress = (struct anv_address) { + vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_COLUMN].mem->bo, + vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_COLUMN].offset + }; + + buf.DeblockingFilterTileColumnBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, buf.DeblockingFilterTileColumnBufferAddress.bo, 0), + }; + + buf.MetadataLineBufferAddress = (struct anv_address) { + vid->vid_mem[ANV_VID_MEM_H265_METADATA_LINE].mem->bo, + vid->vid_mem[ANV_VID_MEM_H265_METADATA_LINE].offset + }; + + buf.MetadataLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, buf.MetadataLineBufferAddress.bo, 0), + }; + + buf.MetadataTileLineBufferAddress = (struct anv_address) { + vid->vid_mem[ANV_VID_MEM_H265_METADATA_TILE_LINE].mem->bo, + vid->vid_mem[ANV_VID_MEM_H265_METADATA_TILE_LINE].offset + }; + + buf.MetadataTileLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, buf.MetadataTileLineBufferAddress.bo, 0), + }; + + buf.MetadataTileColumnBufferAddress = (struct anv_address) { + vid->vid_mem[ANV_VID_MEM_H265_METADATA_TILE_COLUMN].mem->bo, + vid->vid_mem[ANV_VID_MEM_H265_METADATA_TILE_COLUMN].offset + }; + + buf.MetadataTileColumnBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, buf.MetadataTileColumnBufferAddress.bo, 0), + }; + + buf.SAOLineBufferAddress = (struct anv_address) { + vid->vid_mem[ANV_VID_MEM_H265_SAO_LINE].mem->bo, + vid->vid_mem[ANV_VID_MEM_H265_SAO_LINE].offset + }; + + buf.SAOLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, buf.SAOLineBufferAddress.bo, 0), + }; + + buf.SAOTileLineBufferAddress = (struct anv_address) { + vid->vid_mem[ANV_VID_MEM_H265_SAO_TILE_LINE].mem->bo, + vid->vid_mem[ANV_VID_MEM_H265_SAO_TILE_LINE].offset + }; + + buf.SAOTileLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, buf.SAOTileLineBufferAddress.bo, 0), + }; + + buf.SAOTileColumnBufferAddress = (struct anv_address) { + vid->vid_mem[ANV_VID_MEM_H265_SAO_TILE_COLUMN].mem->bo, + vid->vid_mem[ANV_VID_MEM_H265_SAO_TILE_COLUMN].offset + }; + + buf.SAOTileColumnBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, buf.SAOTileColumnBufferAddress.bo, 0), + }; + + buf.CurrentMVTemporalBufferAddress = anv_image_address(img, &img->vid_dmv_top_surface); + + buf.CurrentMVTemporalBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, buf.CurrentMVTemporalBufferAddress.bo, 0), + }; + + for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) { + const struct anv_image_view *ref_iv = + anv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding); + int slot_idx = frame_info->pReferenceSlots[i].slotIndex; + + assert(slot_idx < ANV_VIDEO_H265_MAX_NUM_REF_FRAME); + dpb_idx[slot_idx] = i; + + buf.ReferencePictureAddress[i] = + anv_image_address(ref_iv->image, &ref_iv->image->planes[0].primary_surface.memory_range); + } + + buf.ReferencePictureMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + + buf.OriginalUncompressedPictureSourceMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + + buf.StreamOutDataDestinationMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + + buf.DecodedPictureStatusBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + + buf.LCUILDBStreamOutBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + + for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) { + const struct anv_image_view *ref_iv = + anv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding); + + buf.CollocatedMVTemporalBufferAddress[i] = + anv_image_address(ref_iv->image, &ref_iv->image->vid_dmv_top_surface); + } + + buf.CollocatedMVTemporalBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, buf.CollocatedMVTemporalBufferAddress[0].bo, 0), + }; + + buf.VP9ProbabilityBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + + buf.VP9SegmentIDBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + + buf.VP9HVDLineRowStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + + buf.VP9HVDTileRowStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; +#if GFX_VER >= 11 + buf.SAOStreamOutDataDestinationBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + buf.FrameStatisticsStreamOutDataDestinationBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + buf.SSESourcePixelRowStoreBufferMemoryAddressAttributesReadWrite = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + buf.HCPScalabilitySliceStateBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + buf.HCPScalabilityCABACDecodedSyntaxElementsBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + buf.MVUpperRightColumnStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + buf.IntraPredictionUpperRightColumnStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + buf.IntraPredictionLeftReconColumnStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; +#endif + } + + anv_batch_emit(&cmd_buffer->batch, GENX(HCP_IND_OBJ_BASE_ADDR_STATE), indirect) { + indirect.HCPIndirectBitstreamObjectBaseAddress = + anv_address_add(src_buffer->address, frame_info->srcBufferOffset & ~4095); + + indirect.HCPIndirectBitstreamObjectMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, src_buffer->address.bo, 0), + }; + + indirect.HCPIndirectBitstreamObjectAccessUpperBound = + anv_address_add(src_buffer->address, ALIGN(frame_info->srcBufferRange, 4096)); + + indirect.HCPIndirectCUObjectMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + + indirect.HCPPAKBSEObjectMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + +#if GFX_VER >= 11 + indirect.HCPVP9PAKCompressedHeaderSyntaxStreamInMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + indirect.HCPVP9PAKProbabilityCounterStreamOutMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + indirect.HCPVP9PAKProbabilityDeltasStreamInMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + indirect.HCPVP9PAKTileRecordStreamOutMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; + indirect.HCPVP9PAKCULevelStatisticStreamOutMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) { + .MOCS = anv_mocs(cmd_buffer->device, NULL, 0), + }; +#endif + } + + if (sps->flags.scaling_list_enabled_flag) { + if (pps->flags.pps_scaling_list_data_present_flag) { + scaling_list(cmd_buffer, pps->pScalingLists); + } else if (sps->flags.sps_scaling_list_data_present_flag) { + scaling_list(cmd_buffer, sps->pScalingLists); + } + } else { + for (uint8_t size = 0; size < 4; size++) { + for (uint8_t pred = 0; pred < 2; pred++) { + for (uint8_t color = 0; color < 3; color++) { + + if (size == 3 && color > 0) + continue; + + anv_batch_emit(&cmd_buffer->batch, GENX(HCP_QM_STATE), qm) { + qm.SizeID = size; + qm.PredictionType = pred; + qm.ColorComponent = color; + qm.DCCoefficient = (size > 1) ? 16 : 0; + unsigned len = (size == 0) ? 16 : 64; + + for (uint8_t q = 0; q < len; q++) + qm.QuantizerMatrix8x8[q] = 0x10; + } + } + } + } + } + + anv_batch_emit(&cmd_buffer->batch, GENX(HCP_PIC_STATE), pic) { + pic.FrameWidthInMinimumCodingBlockSize = + sps->pic_width_in_luma_samples / (1 << (sps->log2_min_luma_coding_block_size_minus3 + 3)) - 1; + pic.FrameHeightInMinimumCodingBlockSize = + sps->pic_height_in_luma_samples / (1 << (sps->log2_min_luma_coding_block_size_minus3 + 3)) - 1; + + pic.MinCUSize = sps->log2_min_luma_coding_block_size_minus3 & 0x3; + pic.LCUSize = (sps->log2_diff_max_min_luma_coding_block_size + + sps->log2_min_luma_coding_block_size_minus3) & 0x3; + + pic.MinTUSize = sps->log2_min_luma_transform_block_size_minus2 & 0x3; + pic.MaxTUSize = (sps->log2_diff_max_min_luma_transform_block_size + sps->log2_min_luma_transform_block_size_minus2) & 0x3; + pic.MinPCMSize = sps->log2_min_pcm_luma_coding_block_size_minus3 & 0x3; + pic.MaxPCMSize = (sps->log2_diff_max_min_pcm_luma_coding_block_size + sps->log2_min_pcm_luma_coding_block_size_minus3) & 0x3; + +#if GFX_VER >= 11 + pic.Log2SAOOffsetScaleLuma = pps->log2_sao_offset_scale_luma; + pic.Log2SAOOffsetScaleChroma = pps->log2_sao_offset_scale_chroma; + pic.ChromaQPOffsetListLength = pps->chroma_qp_offset_list_len_minus1; + pic.DiffCUChromaQPOffsetDepth = pps->diff_cu_chroma_qp_offset_depth; + pic.ChromaQPOffsetListEnable = pps->flags.chroma_qp_offset_list_enabled_flag; + pic.ChromaSubsampling = sps->chroma_format_idc; + + pic.HighPrecisionOffsetsEnable = sps->flags.high_precision_offsets_enabled_flag; + pic.Log2MaxTransformSkipSize = pps->log2_max_transform_skip_block_size_minus2 + 2; + pic.CrossComponentPredictionEnable = pps->flags.cross_component_prediction_enabled_flag; + pic.CABACBypassAlignmentEnable = sps->flags.cabac_bypass_alignment_enabled_flag; + pic.PersistentRiceAdaptationEnable = sps->flags.persistent_rice_adaptation_enabled_flag; + pic.IntraSmoothingDisable = sps->flags.intra_smoothing_disabled_flag; + pic.ExplicitRDPCMEnable = sps->flags.explicit_rdpcm_enabled_flag; + pic.ImplicitRDPCMEnable = sps->flags.implicit_rdpcm_enabled_flag; + pic.TransformSkipContextEnable = sps->flags.transform_skip_context_enabled_flag; + pic.TransformSkipRotationEnable = sps->flags.transform_skip_rotation_enabled_flag; + pic.SPSRangeExtensionEnable = sps->flags.sps_range_extension_flag; +#endif + + pic.CollocatedPictureIsISlice = false; + pic.CurrentPictureIsISlice = false; + pic.SampleAdaptiveOffsetEnable = sps->flags.sample_adaptive_offset_enabled_flag; + pic.PCMEnable = sps->flags.pcm_enabled_flag; + pic.CUQPDeltaEnable = pps->flags.cu_qp_delta_enabled_flag; + pic.MaxDQPDepth = pps->diff_cu_qp_delta_depth; + pic.PCMLoopFilterDisable = sps->flags.pcm_loop_filter_disabled_flag; + pic.ConstrainedIntraPrediction = pps->flags.constrained_intra_pred_flag; + pic.Log2ParallelMergeLevel = pps->log2_parallel_merge_level_minus2; + pic.SignDataHiding = pps->flags.sign_data_hiding_enabled_flag; + pic.LoopFilterEnable = pps->flags.loop_filter_across_tiles_enabled_flag; + pic.EntropyCodingSyncEnable = pps->flags.entropy_coding_sync_enabled_flag; + pic.TilingEnable = pps->flags.tiles_enabled_flag; + pic.WeightedBiPredicationEnable = pps->flags.weighted_bipred_flag; + pic.WeightedPredicationEnable = pps->flags.weighted_pred_flag; + pic.FieldPic = 0; + pic.TopField = true; + pic.TransformSkipEnable = pps->flags.transform_skip_enabled_flag; + pic.AMPEnable = sps->flags.amp_enabled_flag; + pic.TransquantBypassEnable = pps->flags.transquant_bypass_enabled_flag; + pic.StrongIntraSmoothingEnable = sps->flags.strong_intra_smoothing_enabled_flag; + pic.CUPacketStructure = 0; + + pic.PictureCbQPOffset = pps->pps_cb_qp_offset; + pic.PictureCrQPOffset = pps->pps_cr_qp_offset; + pic.IntraMaxTransformHierarchyDepth = sps->max_transform_hierarchy_depth_intra; + pic.InterMaxTransformHierarchyDepth = sps->max_transform_hierarchy_depth_inter; + pic.ChromaPCMSampleBitDepth = sps->pcm_sample_bit_depth_chroma_minus1 & 0xf; + pic.LumaPCMSampleBitDepth = sps->pcm_sample_bit_depth_luma_minus1 & 0xf; + + pic.ChromaBitDepth = sps->bit_depth_chroma_minus8; + pic.LumaBitDepth = sps->bit_depth_luma_minus8; + +#if GFX_VER >= 11 + pic.CbQPOffsetList0 = pps->cb_qp_offset_list[0]; + pic.CbQPOffsetList1 = pps->cb_qp_offset_list[1]; + pic.CbQPOffsetList2 = pps->cb_qp_offset_list[2]; + pic.CbQPOffsetList3 = pps->cb_qp_offset_list[3]; + pic.CbQPOffsetList4 = pps->cb_qp_offset_list[4]; + pic.CbQPOffsetList5 = pps->cb_qp_offset_list[5]; + + pic.CrQPOffsetList0 = pps->cr_qp_offset_list[0]; + pic.CrQPOffsetList1 = pps->cr_qp_offset_list[1]; + pic.CrQPOffsetList2 = pps->cr_qp_offset_list[2]; + pic.CrQPOffsetList3 = pps->cr_qp_offset_list[3]; + pic.CrQPOffsetList4 = pps->cr_qp_offset_list[4]; + pic.CrQPOffsetList5 = pps->cr_qp_offset_list[5]; +#endif + } + + if (pps->flags.tiles_enabled_flag) { + int cum = 0; + anv_batch_emit(&cmd_buffer->batch, GENX(HCP_TILE_STATE), tile) { + tile.NumberofTileColumns = pps->num_tile_columns_minus1; + tile.NumberofTileRows = pps->num_tile_rows_minus1; + for (unsigned i = 0; i < 5; i++) { + tile.ColumnPosition[i].CtbPos0i = cum; + if ((4 * i) == pps->num_tile_columns_minus1) + break; + + cum += pps->column_width_minus1[4 * i] + 1; + tile.ColumnPosition[i].CtbPos1i = cum; + + if ((4 * i + 1) == pps->num_tile_columns_minus1) + break; + cum += pps->column_width_minus1[4 * i + 1] + 1; + tile.ColumnPosition[i].CtbPos2i = cum; + + if ((4 * i + 2) == pps->num_tile_columns_minus1) + break; + cum += pps->column_width_minus1[4 * i + 2] + 1; + tile.ColumnPosition[i].CtbPos3i = cum; + + if ((4 * i + 3) == pps->num_tile_columns_minus1) + break; + + cum += pps->column_width_minus1[4 * i + 3] + 1; + } + + cum = 0; + + for (unsigned i = 0; i < 5; i++) { + tile.Rowposition[i].CtbPos0i = cum; + if ((4 * i) == pps->num_tile_rows_minus1) + break; + + cum += pps->row_height_minus1[4 * i] + 1; + tile.Rowposition[i].CtbPos1i = cum; + + if ((4 * i + 1) == pps->num_tile_rows_minus1) + break; + cum += pps->row_height_minus1[4 * i + 1] + 1; + tile.Rowposition[i].CtbPos2i = cum; + + if ((4 * i + 2) == pps->num_tile_rows_minus1) + break; + cum += pps->row_height_minus1[4 * i + 2] + 1; + tile.Rowposition[i].CtbPos3i = cum; + + if ((4 * i + 3) == pps->num_tile_rows_minus1) + break; + + cum += pps->row_height_minus1[4 * i + 3] + 1; + } + + if (pps->num_tile_rows_minus1 == 20) { + tile.Rowposition[5].CtbPos0i = cum; + } + if (pps->num_tile_rows_minus1 == 20) { + tile.Rowposition[5].CtbPos0i = cum; + cum += pps->row_height_minus1[20] + 1; + tile.Rowposition[5].CtbPos1i = cum; + } + } + } + + /* Slice parsing */ + uint32_t last_slice = h265_pic_info->sliceSegmentCount - 1; + void *slice_map = anv_gem_mmap(cmd_buffer->device, src_buffer->address.bo, + src_buffer->address.offset, frame_info->srcBufferRange, 0); + + struct vk_video_h265_slice_params slice_params[h265_pic_info->sliceSegmentCount]; + + /* All slices should be parsed in advance to collect information necessary */ + for (unsigned s = 0; s < h265_pic_info->sliceSegmentCount; s++) { + uint32_t current_offset = h265_pic_info->pSliceSegmentOffsets[s]; + void *map = slice_map + current_offset; + uint32_t slice_size = 0; + + if (s == last_slice) + slice_size = frame_info->srcBufferRange - current_offset; + else + slice_size = h265_pic_info->pSliceSegmentOffsets[s + 1] - current_offset; + + vk_video_parse_h265_slice_header(frame_info, h265_pic_info, sps, pps, map, slice_size, &slice_params[s]); + vk_fill_video_h265_reference_info(frame_info, h265_pic_info, &slice_params[s], ref_slots); + } + + anv_gem_munmap(cmd_buffer->device, slice_map, frame_info->srcBufferRange); + + for (unsigned s = 0; s < h265_pic_info->sliceSegmentCount; s++) { + uint32_t ctb_size = 1 << (sps->log2_diff_max_min_luma_coding_block_size + + sps->log2_min_luma_coding_block_size_minus3 + 3); + uint32_t pic_width_in_min_cbs_y = sps->pic_width_in_luma_samples / + (1 << (sps->log2_min_luma_coding_block_size_minus3 + 3)); + uint32_t width_in_pix = (1 << (sps->log2_min_luma_coding_block_size_minus3 + 3)) * + pic_width_in_min_cbs_y; + uint32_t ctb_w = DIV_ROUND_UP(width_in_pix, ctb_size); + bool is_last = (s == last_slice); + int slice_qp = (slice_params[s].slice_qp_delta + pps->init_qp_minus26 + 26) & 0x3f; + + anv_batch_emit(&cmd_buffer->batch, GENX(HCP_SLICE_STATE), slice) { + slice.SliceHorizontalPosition = slice_params[s].slice_segment_address % ctb_w; + slice.SliceVerticalPosition = slice_params[s].slice_segment_address / ctb_w; + + if (is_last) { + slice.NextSliceHorizontalPosition = 0; + slice.NextSliceVerticalPosition = 0; + } else { + slice.NextSliceHorizontalPosition = (slice_params[s + 1].slice_segment_address) % ctb_w; + slice.NextSliceVerticalPosition = (slice_params[s + 1].slice_segment_address) / ctb_w; + } + + slice.SliceType = slice_params[s].slice_type; + slice.LastSlice = is_last; + slice.DependentSlice = slice_params[s].dependent_slice_segment; + slice.SliceTemporalMVPEnable = slice_params[s].temporal_mvp_enable; + slice.SliceQP = abs(slice_qp); + slice.SliceQPSign = slice_qp >= 0 ? 0 : 1; + slice.SliceCbQPOffset = slice_params[s].slice_cb_qp_offset; + slice.SliceCrQPOffset = slice_params[s].slice_cr_qp_offset; + slice.SliceHeaderDisableDeblockingFilter = pps->flags.deblocking_filter_override_enabled_flag ? + slice_params[s].disable_deblocking_filter_idc : pps->flags.pps_deblocking_filter_disabled_flag; + slice.SliceTCOffsetDiv2 = slice_params[s].tc_offset_div2; + slice.SliceBetaOffsetDiv2 = slice_params[s].beta_offset_div2; + slice.SliceLoopFilterEnable = slice_params[s].loop_filter_across_slices_enable; + slice.SliceSAOChroma = slice_params[s].sao_chroma_flag; + slice.SliceSAOLuma = slice_params[s].sao_luma_flag; + slice.MVDL1Zero = slice_params[s].mvd_l1_zero_flag; + + uint8_t low_delay = true; + + if (slice_params[s].slice_type == STD_VIDEO_H265_SLICE_TYPE_I) { + low_delay = false; + } else { + for (unsigned i = 0; i < slice_params[s].num_ref_idx_l0_active; i++) { + int slot_idx = ref_slots[0][i].slot_index; + + if (vk_video_h265_poc_by_slot(frame_info, slot_idx) > + h265_pic_info->pStdPictureInfo->PicOrderCntVal) { + low_delay = false; + break; + } + } + + for (unsigned i = 0; i < slice_params[s].num_ref_idx_l1_active; i++) { + int slot_idx = ref_slots[1][i].slot_index; + if (vk_video_h265_poc_by_slot(frame_info, slot_idx) > + h265_pic_info->pStdPictureInfo->PicOrderCntVal) { + low_delay = false; + break; + } + } + } + + slice.LowDelay = low_delay; + slice.CollocatedFromL0 = slice_params[s].collocated_list == 0 ? true : false; + slice.Log2WeightDenominatorChroma = slice_params[s].luma_log2_weight_denom + + (slice_params[s].chroma_log2_weight_denom - slice_params[s].luma_log2_weight_denom); + slice.Log2WeightDenominatorLuma = slice_params[s].luma_log2_weight_denom; + slice.CABACInit = slice_params[s].cabac_init_idc; + slice.MaxMergeIndex = slice_params[s].max_num_merge_cand - 1; + slice.CollocatedMVTemporalBufferIndex = + dpb_idx[ref_slots[slice_params[s].collocated_list][slice_params[s].collocated_ref_idx].slot_index]; + assert(slice.CollocatedMVTemporalBufferIndex < ANV_VIDEO_H265_HCP_NUM_REF_FRAME); + + slice.SliceHeaderLength = slice_params[s].slice_data_bytes_offset; + slice.CABACZeroWordInsertionEnable = false; + slice.EmulationByteSliceInsertEnable = false; + slice.TailInsertionPresent = false; + slice.SliceDataInsertionPresent = false; + slice.HeaderInsertionPresent = false; + + slice.IndirectPAKBSEDataStartOffset = 0; + slice.TransformSkipLambda = 0; + slice.TransformSkipNumberofNonZeroCoeffsFactor0 = 0; + slice.TransformSkipNumberofZeroCoeffsFactor0 = 0; + slice.TransformSkipNumberofNonZeroCoeffsFactor1 = 0; + slice.TransformSkipNumberofZeroCoeffsFactor1 = 0; + +#if GFX_VER >= 12 + slice.OriginalSliceStartCtbX = slice_params[s].slice_segment_address % ctb_w; + slice.OriginalSliceStartCtbY = slice_params[s].slice_segment_address / ctb_w; +#endif + } + + if (slice_params[s].slice_type != STD_VIDEO_H265_SLICE_TYPE_I) { + anv_batch_emit(&cmd_buffer->batch, GENX(HCP_REF_IDX_STATE), ref) { + ref.ReferencePictureListSelect = 0; + ref.NumberofReferenceIndexesActive = slice_params[s].num_ref_idx_l0_active - 1; + + for (unsigned i = 0; i < ref.NumberofReferenceIndexesActive + 1; i++) { + int slot_idx = ref_slots[0][i].slot_index; + unsigned poc = ref_slots[0][i].pic_order_cnt; + int32_t diff_poc = h265_pic_info->pStdPictureInfo->PicOrderCntVal - poc; + + assert(dpb_idx[slot_idx] < ANV_VIDEO_H265_HCP_NUM_REF_FRAME); + + ref.ReferenceListEntry[i].ListEntry = dpb_idx[slot_idx]; + ref.ReferenceListEntry[i].ReferencePicturetbValue = CLAMP(diff_poc, -128, 127) & 0xff; + ref.ReferenceListEntry[i].TopField = true; + } + } + } + + if (slice_params[s].slice_type == STD_VIDEO_H265_SLICE_TYPE_B) { + anv_batch_emit(&cmd_buffer->batch, GENX(HCP_REF_IDX_STATE), ref) { + ref.ReferencePictureListSelect = 1; + ref.NumberofReferenceIndexesActive = slice_params[s].num_ref_idx_l1_active - 1; + + for (unsigned i = 0; i < ref.NumberofReferenceIndexesActive + 1; i++) { + int slot_idx = ref_slots[1][i].slot_index;; + unsigned poc = ref_slots[1][i].pic_order_cnt; + int32_t diff_poc = h265_pic_info->pStdPictureInfo->PicOrderCntVal - poc; + + assert(dpb_idx[slot_idx] < ANV_VIDEO_H265_HCP_NUM_REF_FRAME); + + ref.ReferenceListEntry[i].ListEntry = dpb_idx[slot_idx]; + ref.ReferenceListEntry[i].ReferencePicturetbValue = CLAMP(diff_poc, -128, 127) & 0xff; + ref.ReferenceListEntry[i].TopField = true; + } + } + } + + if ((pps->flags.weighted_pred_flag && (slice_params[s].slice_type == STD_VIDEO_H265_SLICE_TYPE_P)) || + (pps->flags.weighted_bipred_flag && (slice_params[s].slice_type == STD_VIDEO_H265_SLICE_TYPE_B))) { + anv_batch_emit(&cmd_buffer->batch, GENX(HCP_WEIGHTOFFSET_STATE), w) { + w.ReferencePictureListSelect = 0; + + for (unsigned i = 0; i < ANV_VIDEO_H265_MAX_NUM_REF_FRAME; i++) { + w.LumaOffsets->DeltaLumaWeightLX = slice_params[s].luma_weight_l0[i]; + w.LumaOffsets->LumaOffsetLX = slice_params[s].luma_offset_l0[i]; + w.ChromaOffsets->DeltaChromaWeightLX0 = slice_params[s].chroma_weight_l0[i][0]; + w.ChromaOffsets->DeltaChromaWeightLX1 = slice_params[s].chroma_weight_l0[i][1]; + w.ChromaOffsets->ChromaOffsetLX0 = slice_params[s].chroma_offset_l0[i][0]; + w.ChromaOffsets->ChromaOffsetLX1 = slice_params[s].chroma_offset_l0[i][1]; + } + } + + if (slice_params[s].slice_type == STD_VIDEO_H265_SLICE_TYPE_B) { + anv_batch_emit(&cmd_buffer->batch, GENX(HCP_WEIGHTOFFSET_STATE), w) { + w.ReferencePictureListSelect = 1; + + for (unsigned i = 0; i < ANV_VIDEO_H265_MAX_NUM_REF_FRAME; i++) { + w.LumaOffsets->DeltaLumaWeightLX = slice_params[s].luma_weight_l1[i]; + w.LumaOffsets->LumaOffsetLX = slice_params[s].luma_offset_l1[i]; + w.ChromaOffsets->DeltaChromaWeightLX0 = slice_params[s].chroma_weight_l1[i][0]; + w.ChromaOffsets->DeltaChromaWeightLX1 = slice_params[s].chroma_weight_l1[i][1]; + w.ChromaOffsets->ChromaOffsetLX0 = slice_params[s].chroma_offset_l1[i][0]; + w.ChromaOffsets->ChromaOffsetLX1 = slice_params[s].chroma_offset_l1[i][1]; + } + } + } + } + + uint32_t buffer_offset = frame_info->srcBufferOffset & 4095; + + anv_batch_emit(&cmd_buffer->batch, GENX(HCP_BSD_OBJECT), bsd) { + bsd.IndirectBSDDataLength = slice_params[s].slice_size - 3; + bsd.IndirectBSDDataStartAddress = buffer_offset + h265_pic_info->pSliceSegmentOffsets[s] + 3; + } + } + +#if GFX_VER >= 12 + anv_batch_emit(&cmd_buffer->batch, GENX(VD_CONTROL_STATE), cs) { + cs.MemoryImplicitFlush = true; + } +#endif + + anv_batch_emit(&cmd_buffer->batch, GENX(VD_PIPELINE_FLUSH), flush) { + flush.HEVCPipelineDone = true; + flush.HEVCPipelineCommandFlush = true; + flush.VDCommandMessageParserDone = true; + } +} + static void anv_h264_decode_video(struct anv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info) @@ -429,10 +1175,14 @@ genX(CmdDecodeVideoKHR)(VkCommandBuffer commandBuffer, const VkVideoDecodeInfoKHR *frame_info) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + switch (cmd_buffer->video.vid->vk.op) { case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: anv_h264_decode_video(cmd_buffer, frame_info); break; + case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: + anv_h265_decode_video(cmd_buffer, frame_info); + break; default: assert(0); }