diff --git a/src/gallium/drivers/d3d12/d3d12_video_enc.cpp b/src/gallium/drivers/d3d12/d3d12_video_enc.cpp new file mode 100644 index 00000000000..9f962ee8c0f --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_enc.cpp @@ -0,0 +1,1557 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _WIN32 +#include +#endif + +#define D3D12_IGNORE_SDK_LAYERS +#include + +#include "d3d12_util.h" +#include "d3d12_context.h" +#include "d3d12_format.h" +#include "d3d12_resource.h" +#include "d3d12_screen.h" +#include "d3d12_surface.h" +#include "d3d12_video_enc.h" +#include "d3d12_video_enc_h264.h" +#include "d3d12_video_buffer.h" +#include "d3d12_video_texture_array_dpb_manager.h" +#include "d3d12_video_array_of_textures_dpb_manager.h" +#include "d3d12_video_encoder_references_manager_h264.h" +#include "d3d12_residency.h" + +#include "vl/vl_video_buffer.h" +#include "util/format/u_format.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_video.h" + +/** + * flush any outstanding command buffers to the hardware + * should be called before a video_buffer is acessed by the gallium frontend again + */ +void +d3d12_video_encoder_flush(struct pipe_video_codec *codec) +{ + struct d3d12_video_encoder *pD3D12Enc = (struct d3d12_video_encoder *) codec; + assert(pD3D12Enc); + assert(pD3D12Enc->m_spD3D12VideoDevice); + assert(pD3D12Enc->m_spEncodeCommandQueue); + + // Flush buffer_subdata batch and Wait the m_spEncodeCommandQueue for GPU upload completion + // before recording EncodeFrame below. + struct pipe_fence_handle *completion_fence = NULL; + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_flush - Flushing pD3D12Enc->base.context and GPU sync between Video/Context queues before flushing Video Encode Queue.\n"); + pD3D12Enc->base.context->flush(pD3D12Enc->base.context, &completion_fence, PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH); + assert(completion_fence); + struct d3d12_fence *casted_completion_fence = d3d12_fence(completion_fence); + pD3D12Enc->m_spEncodeCommandQueue->Wait(casted_completion_fence->cmdqueue_fence, casted_completion_fence->value); + pD3D12Enc->m_pD3D12Screen->base.fence_reference(&pD3D12Enc->m_pD3D12Screen->base, &completion_fence, NULL); + + if (!pD3D12Enc->m_needsGPUFlush) { + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_flush started. Nothing to flush, all up to date.\n"); + } else { + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_flush started. Will flush video queue work and CPU wait " + "on fenceValue: %d\n", + pD3D12Enc->m_fenceValue); + + HRESULT hr = pD3D12Enc->m_pD3D12Screen->dev->GetDeviceRemovedReason(); + if (hr != S_OK) { + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_flush" + " - D3D12Device was removed BEFORE commandlist " + "execution with HR %x.\n", + hr); + goto flush_fail; + } + + // Close and execute command list and wait for idle on CPU blocking + // this method before resetting list and allocator for next submission. + + if (pD3D12Enc->m_transitionsBeforeCloseCmdList.size() > 0) { + pD3D12Enc->m_spEncodeCommandList->ResourceBarrier(pD3D12Enc->m_transitionsBeforeCloseCmdList.size(), + pD3D12Enc->m_transitionsBeforeCloseCmdList.data()); + pD3D12Enc->m_transitionsBeforeCloseCmdList.clear(); + } + + hr = pD3D12Enc->m_spEncodeCommandList->Close(); + if (FAILED(hr)) { + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_flush - Can't close command list with HR %x\n", hr); + goto flush_fail; + } + + ID3D12CommandList *ppCommandLists[1] = { pD3D12Enc->m_spEncodeCommandList.Get() }; + pD3D12Enc->m_spEncodeCommandQueue->ExecuteCommandLists(1, ppCommandLists); + pD3D12Enc->m_spEncodeCommandQueue->Signal(pD3D12Enc->m_spFence.Get(), pD3D12Enc->m_fenceValue); + pD3D12Enc->m_spFence->SetEventOnCompletion(pD3D12Enc->m_fenceValue, nullptr); + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_flush - ExecuteCommandLists finished on signal with " + "fenceValue: %d\n", + pD3D12Enc->m_fenceValue); + + hr = pD3D12Enc->m_spCommandAllocator->Reset(); + if (FAILED(hr)) { + debug_printf( + "[d3d12_video_encoder] d3d12_video_encoder_flush - resetting ID3D12CommandAllocator failed with HR %x\n", + hr); + goto flush_fail; + } + + hr = pD3D12Enc->m_spEncodeCommandList->Reset(pD3D12Enc->m_spCommandAllocator.Get()); + if (FAILED(hr)) { + debug_printf( + "[d3d12_video_encoder] d3d12_video_encoder_flush - resetting ID3D12GraphicsCommandList failed with HR %x\n", + hr); + goto flush_fail; + } + + // Validate device was not removed + hr = pD3D12Enc->m_pD3D12Screen->dev->GetDeviceRemovedReason(); + if (hr != S_OK) { + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_flush" + " - D3D12Device was removed AFTER commandlist " + "execution with HR %x, but wasn't before.\n", + hr); + goto flush_fail; + } + + debug_printf( + "[d3d12_video_encoder] d3d12_video_encoder_flush - GPU signaled execution finalized for fenceValue: %d\n", + pD3D12Enc->m_fenceValue); + + pD3D12Enc->m_fenceValue++; + pD3D12Enc->m_needsGPUFlush = false; + } + return; + +flush_fail: + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_flush failed for fenceValue: %d\n", pD3D12Enc->m_fenceValue); + assert(false); +} + +/** + * Destroys a d3d12_video_encoder + * Call destroy_XX for applicable XX nested member types before deallocating + * Destroy methods should check != nullptr on their input target argument as this method can be called as part of + * cleanup from failure on the creation method + */ +void +d3d12_video_encoder_destroy(struct pipe_video_codec *codec) +{ + if (codec == nullptr) { + return; + } + + d3d12_video_encoder_flush(codec); // Flush pending work before destroying. + + struct d3d12_video_encoder *pD3D12Enc = (struct d3d12_video_encoder *) codec; + + // Call d3d12_video_encoder dtor to make ComPtr and other member's destructors work + delete pD3D12Enc; +} + +void +d3d12_video_encoder_update_picparams_tracking(struct d3d12_video_encoder *pD3D12Enc, + struct pipe_video_buffer * srcTexture, + struct pipe_picture_desc * picture) +{ + D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA currentPicParams = + d3d12_video_encoder_get_current_picture_param_settings(pD3D12Enc); + + enum pipe_video_format codec = u_reduce_video_profile(pD3D12Enc->base.profile); + bool bUsedAsReference = false; + switch (codec) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + { + d3d12_video_encoder_update_current_frame_pic_params_info_h264(pD3D12Enc, srcTexture, picture, currentPicParams, bUsedAsReference); + } break; + + default: + { + unreachable("Unsupported pipe_video_format"); + } break; + } + + pD3D12Enc->m_upDPBManager->begin_frame(currentPicParams, bUsedAsReference); +} + +bool +d3d12_video_encoder_reconfigure_encoder_objects(struct d3d12_video_encoder *pD3D12Enc, + struct pipe_video_buffer * srcTexture, + struct pipe_picture_desc * picture) +{ + bool codecChanged = + ((pD3D12Enc->m_currentEncodeConfig.m_ConfigDirtyFlags & d3d12_video_encoder_config_dirty_flag_codec) != 0); + bool profileChanged = + ((pD3D12Enc->m_currentEncodeConfig.m_ConfigDirtyFlags & d3d12_video_encoder_config_dirty_flag_profile) != 0); + bool levelChanged = + ((pD3D12Enc->m_currentEncodeConfig.m_ConfigDirtyFlags & d3d12_video_encoder_config_dirty_flag_level) != 0); + bool codecConfigChanged = + ((pD3D12Enc->m_currentEncodeConfig.m_ConfigDirtyFlags & d3d12_video_encoder_config_dirty_flag_codec_config) != 0); + bool inputFormatChanged = + ((pD3D12Enc->m_currentEncodeConfig.m_ConfigDirtyFlags & d3d12_video_encoder_config_dirty_flag_input_format) != 0); + bool resolutionChanged = + ((pD3D12Enc->m_currentEncodeConfig.m_ConfigDirtyFlags & d3d12_video_encoder_config_dirty_flag_resolution) != 0); + bool rateControlChanged = + ((pD3D12Enc->m_currentEncodeConfig.m_ConfigDirtyFlags & d3d12_video_encoder_config_dirty_flag_rate_control) != 0); + bool slicesChanged = + ((pD3D12Enc->m_currentEncodeConfig.m_ConfigDirtyFlags & d3d12_video_encoder_config_dirty_flag_slices) != 0); + bool gopChanged = + ((pD3D12Enc->m_currentEncodeConfig.m_ConfigDirtyFlags & d3d12_video_encoder_config_dirty_flag_gop) != 0); + bool motionPrecisionLimitChanged = ((pD3D12Enc->m_currentEncodeConfig.m_ConfigDirtyFlags & + d3d12_video_encoder_config_dirty_flag_motion_precision_limit) != 0); + + // Events that that trigger a re-creation of the reference picture manager + // Stores codec agnostic textures so only input format, resolution and gop (num dpb references) affects this + if (!pD3D12Enc->m_upDPBManager + // || codecChanged + // || profileChanged + // || levelChanged + // || codecConfigChanged + || inputFormatChanged || + resolutionChanged + // || rateControlChanged + // || slicesChanged + || gopChanged + // || motionPrecisionLimitChanged + ) { + if (!pD3D12Enc->m_upDPBManager) { + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_reconfigure_encoder_objects - Creating Reference " + "Pictures Manager for the first time\n"); + } else { + debug_printf("[d3d12_video_encoder] Reconfiguration triggered -> Re-creating Reference Pictures Manager\n"); + } + + D3D12_RESOURCE_FLAGS resourceAllocFlags = + D3D12_RESOURCE_FLAG_VIDEO_ENCODE_REFERENCE_ONLY | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; + bool fArrayOfTextures = ((pD3D12Enc->m_currentEncodeCapabilities.m_SupportFlags & + D3D12_VIDEO_ENCODER_SUPPORT_FLAG_RECONSTRUCTED_FRAMES_REQUIRE_TEXTURE_ARRAYS) == 0); + uint32_t texturePoolSize = d3d12_video_encoder_get_current_max_dpb_capacity(pD3D12Enc) + + 1u; // adding an extra slot as we also need to count the current frame output recon + // allocation along max reference frame allocations + assert(texturePoolSize < UINT16_MAX); + if (fArrayOfTextures) { + pD3D12Enc->m_upDPBStorageManager = std::make_unique( + static_cast(texturePoolSize), + pD3D12Enc->m_pD3D12Screen->dev, + pD3D12Enc->m_currentEncodeConfig.m_encodeFormatInfo.Format, + pD3D12Enc->m_currentEncodeConfig.m_currentResolution, + (D3D12_RESOURCE_FLAG_VIDEO_ENCODE_REFERENCE_ONLY | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE), + true, // setNullSubresourcesOnAllZero - D3D12 Video Encode expects nullptr pSubresources if AoT, + pD3D12Enc->m_NodeMask, + /*use underlying pool, we can't reuse upper level allocations, need D3D12_RESOURCE_FLAG_VIDEO_ENCODE_REFERENCE_ONLY*/ + true); + } else { + pD3D12Enc->m_upDPBStorageManager = std::make_unique( + static_cast(texturePoolSize), + pD3D12Enc->m_pD3D12Screen->dev, + pD3D12Enc->m_currentEncodeConfig.m_encodeFormatInfo.Format, + pD3D12Enc->m_currentEncodeConfig.m_currentResolution, + resourceAllocFlags, + pD3D12Enc->m_NodeMask); + } + d3d12_video_encoder_create_reference_picture_manager(pD3D12Enc); + } + + bool reCreatedEncoder = false; + // Events that that trigger a re-creation of the encoder + if (!pD3D12Enc->m_spVideoEncoder || codecChanged || + profileChanged + // || levelChanged // Only affects encoder heap + || codecConfigChanged || + inputFormatChanged + // || resolutionChanged // Only affects encoder heap + // Only re-create if there is NO SUPPORT for reconfiguring rateControl on the fly + || (rateControlChanged && ((pD3D12Enc->m_currentEncodeCapabilities.m_SupportFlags & + D3D12_VIDEO_ENCODER_SUPPORT_FLAG_RATE_CONTROL_RECONFIGURATION_AVAILABLE) == + 0 /*checking the flag is NOT set*/)) + // Only re-create if there is NO SUPPORT for reconfiguring slices on the fly + || (slicesChanged && ((pD3D12Enc->m_currentEncodeCapabilities.m_SupportFlags & + D3D12_VIDEO_ENCODER_SUPPORT_FLAG_SUBREGION_LAYOUT_RECONFIGURATION_AVAILABLE) == + 0 /*checking the flag is NOT set*/)) + // Only re-create if there is NO SUPPORT for reconfiguring gop on the fly + || (gopChanged && ((pD3D12Enc->m_currentEncodeCapabilities.m_SupportFlags & + D3D12_VIDEO_ENCODER_SUPPORT_FLAG_SEQUENCE_GOP_RECONFIGURATION_AVAILABLE) == + 0 /*checking the flag is NOT set*/)) || + motionPrecisionLimitChanged) { + if (!pD3D12Enc->m_spVideoEncoder) { + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_reconfigure_encoder_objects - Creating " + "D3D12VideoEncoder for the first time\n"); + } else { + debug_printf("[d3d12_video_encoder] Reconfiguration triggered -> Re-creating D3D12VideoEncoder\n"); + reCreatedEncoder = true; + } + + D3D12_VIDEO_ENCODER_DESC encoderDesc = { pD3D12Enc->m_NodeMask, + D3D12_VIDEO_ENCODER_FLAG_NONE, + pD3D12Enc->m_currentEncodeConfig.m_encoderCodecDesc, + d3d12_video_encoder_get_current_profile_desc(pD3D12Enc), + pD3D12Enc->m_currentEncodeConfig.m_encodeFormatInfo.Format, + d3d12_video_encoder_get_current_codec_config_desc(pD3D12Enc), + pD3D12Enc->m_currentEncodeConfig.m_encoderMotionPrecisionLimit }; + + // Create encoder + HRESULT hr = pD3D12Enc->m_spD3D12VideoDevice->CreateVideoEncoder(&encoderDesc, + IID_PPV_ARGS(pD3D12Enc->m_spVideoEncoder.GetAddressOf())); + if (FAILED(hr)) { + debug_printf("CreateVideoEncoder failed with HR %x\n", hr); + return false; + } + } + + bool reCreatedEncoderHeap = false; + // Events that that trigger a re-creation of the encoder heap + if (!pD3D12Enc->m_spVideoEncoderHeap || codecChanged || profileChanged || + levelChanged + // || codecConfigChanged // Only affects encoder + || inputFormatChanged // Might affect internal textures in the heap + || resolutionChanged + // Only re-create if there is NO SUPPORT for reconfiguring rateControl on the fly + || (rateControlChanged && ((pD3D12Enc->m_currentEncodeCapabilities.m_SupportFlags & + D3D12_VIDEO_ENCODER_SUPPORT_FLAG_RATE_CONTROL_RECONFIGURATION_AVAILABLE) == + 0 /*checking the flag is NOT set*/)) + // Only re-create if there is NO SUPPORT for reconfiguring slices on the fly + || (slicesChanged && ((pD3D12Enc->m_currentEncodeCapabilities.m_SupportFlags & + D3D12_VIDEO_ENCODER_SUPPORT_FLAG_SUBREGION_LAYOUT_RECONFIGURATION_AVAILABLE) == + 0 /*checking the flag is NOT set*/)) + // Only re-create if there is NO SUPPORT for reconfiguring gop on the fly + || (gopChanged && ((pD3D12Enc->m_currentEncodeCapabilities.m_SupportFlags & + D3D12_VIDEO_ENCODER_SUPPORT_FLAG_SEQUENCE_GOP_RECONFIGURATION_AVAILABLE) == + 0 /*checking the flag is NOT set*/)) + // || motionPrecisionLimitChanged // Only affects encoder + ) { + if (!pD3D12Enc->m_spVideoEncoderHeap) { + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_reconfigure_encoder_objects - Creating " + "D3D12VideoEncoderHeap for the first time\n"); + } else { + debug_printf("[d3d12_video_encoder] Reconfiguration triggered -> Re-creating D3D12VideoEncoderHeap\n"); + reCreatedEncoderHeap = true; + } + + D3D12_VIDEO_ENCODER_HEAP_DESC heapDesc = { pD3D12Enc->m_NodeMask, + D3D12_VIDEO_ENCODER_HEAP_FLAG_NONE, + pD3D12Enc->m_currentEncodeConfig.m_encoderCodecDesc, + d3d12_video_encoder_get_current_profile_desc(pD3D12Enc), + d3d12_video_encoder_get_current_level_desc(pD3D12Enc), + // resolution list count + 1, + // resolution list + &pD3D12Enc->m_currentEncodeConfig.m_currentResolution }; + + // Create encoder heap + HRESULT hr = pD3D12Enc->m_spD3D12VideoDevice->CreateVideoEncoderHeap(&heapDesc, + IID_PPV_ARGS(pD3D12Enc->m_spVideoEncoderHeap.GetAddressOf())); + if (FAILED(hr)) { + debug_printf("CreateVideoEncoderHeap failed with HR %x\n", hr); + return false; + } + } + + // If on-the-fly reconfiguration happened without object recreation, set + // D3D12_VIDEO_ENCODER_SEQUENCE_CONTROL_FLAG_*_CHANGED reconfiguration flags in EncodeFrame + if (rateControlChanged && + ((pD3D12Enc->m_currentEncodeCapabilities.m_SupportFlags & + D3D12_VIDEO_ENCODER_SUPPORT_FLAG_RATE_CONTROL_RECONFIGURATION_AVAILABLE) != + 0 /*checking if the flag it's actually set*/) && + (pD3D12Enc->m_fenceValue > 1) && (!reCreatedEncoder || !reCreatedEncoderHeap)) { + pD3D12Enc->m_currentEncodeConfig.m_seqFlags |= D3D12_VIDEO_ENCODER_SEQUENCE_CONTROL_FLAG_RATE_CONTROL_CHANGE; + } + + if (slicesChanged && + ((pD3D12Enc->m_currentEncodeCapabilities.m_SupportFlags & + D3D12_VIDEO_ENCODER_SUPPORT_FLAG_SUBREGION_LAYOUT_RECONFIGURATION_AVAILABLE) != + 0 /*checking if the flag it's actually set*/) && + (pD3D12Enc->m_fenceValue > 1) && (!reCreatedEncoder || !reCreatedEncoderHeap)) { + pD3D12Enc->m_currentEncodeConfig.m_seqFlags |= D3D12_VIDEO_ENCODER_SEQUENCE_CONTROL_FLAG_SUBREGION_LAYOUT_CHANGE; + } + + if (gopChanged && + ((pD3D12Enc->m_currentEncodeCapabilities.m_SupportFlags & + D3D12_VIDEO_ENCODER_SUPPORT_FLAG_SEQUENCE_GOP_RECONFIGURATION_AVAILABLE) != + 0 /*checking if the flag it's actually set*/) && + (pD3D12Enc->m_fenceValue > 1) && (!reCreatedEncoder || !reCreatedEncoderHeap)) { + pD3D12Enc->m_currentEncodeConfig.m_seqFlags |= D3D12_VIDEO_ENCODER_SEQUENCE_CONTROL_FLAG_GOP_SEQUENCE_CHANGE; + } + return true; +} + +void +d3d12_video_encoder_create_reference_picture_manager(struct d3d12_video_encoder *pD3D12Enc) +{ + enum pipe_video_format codec = u_reduce_video_profile(pD3D12Enc->base.profile); + switch (codec) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + { + bool gopHasPFrames = + (pD3D12Enc->m_currentEncodeConfig.m_encoderGOPConfigDesc.m_H264GroupOfPictures.PPicturePeriod > 0) && + ((pD3D12Enc->m_currentEncodeConfig.m_encoderGOPConfigDesc.m_H264GroupOfPictures.GOPLength == 0) || + (pD3D12Enc->m_currentEncodeConfig.m_encoderGOPConfigDesc.m_H264GroupOfPictures.PPicturePeriod < + pD3D12Enc->m_currentEncodeConfig.m_encoderGOPConfigDesc.m_H264GroupOfPictures.GOPLength)); + + pD3D12Enc->m_upDPBManager = std::make_unique( + gopHasPFrames, + *pD3D12Enc->m_upDPBStorageManager, + // Max number of frames to be used as a reference, without counting the current recon picture + d3d12_video_encoder_get_current_max_dpb_capacity(pD3D12Enc) + ); + + pD3D12Enc->m_upBitstreamBuilder = std::make_unique(); + } break; + + default: + { + unreachable("Unsupported pipe_video_format"); + } break; + } +} + +D3D12_VIDEO_ENCODER_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA +d3d12_video_encoder_get_current_slice_param_settings(struct d3d12_video_encoder *pD3D12Enc) +{ + enum pipe_video_format codec = u_reduce_video_profile(pD3D12Enc->base.profile); + switch (codec) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + { + D3D12_VIDEO_ENCODER_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA subregionData = {}; + if (pD3D12Enc->m_currentEncodeConfig.m_encoderSliceConfigMode != + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_FULL_FRAME) { + subregionData.pSlicesPartition_H264 = + &pD3D12Enc->m_currentEncodeConfig.m_encoderSliceConfigDesc.m_SlicesPartition_H264; + subregionData.DataSize = sizeof(D3D12_VIDEO_ENCODER_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_SLICES); + } + return subregionData; + } break; + + default: + { + unreachable("Unsupported pipe_video_format"); + } break; + } +} + +D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA +d3d12_video_encoder_get_current_picture_param_settings(struct d3d12_video_encoder *pD3D12Enc) +{ + enum pipe_video_format codec = u_reduce_video_profile(pD3D12Enc->base.profile); + switch (codec) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + { + D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA curPicParamsData = {}; + curPicParamsData.pH264PicData = &pD3D12Enc->m_currentEncodeConfig.m_encoderPicParamsDesc.m_H264PicData; + curPicParamsData.DataSize = sizeof(pD3D12Enc->m_currentEncodeConfig.m_encoderPicParamsDesc.m_H264PicData); + return curPicParamsData; + } break; + + default: + { + unreachable("Unsupported pipe_video_format"); + } break; + } +} + +D3D12_VIDEO_ENCODER_RATE_CONTROL +d3d12_video_encoder_get_current_rate_control_settings(struct d3d12_video_encoder *pD3D12Enc) +{ + D3D12_VIDEO_ENCODER_RATE_CONTROL curRateControlDesc = {}; + curRateControlDesc.Mode = pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Mode; + curRateControlDesc.Flags = pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags; + curRateControlDesc.TargetFrameRate = pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_FrameRate; + + switch (pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Mode) { + case D3D12_VIDEO_ENCODER_RATE_CONTROL_MODE_ABSOLUTE_QP_MAP: + { + curRateControlDesc.ConfigParams.pConfiguration_CQP = nullptr; + curRateControlDesc.ConfigParams.DataSize = 0; + } break; + case D3D12_VIDEO_ENCODER_RATE_CONTROL_MODE_CQP: + { + curRateControlDesc.ConfigParams.pConfiguration_CQP = + &pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_CQP; + curRateControlDesc.ConfigParams.DataSize = + sizeof(pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_CQP); + } break; + case D3D12_VIDEO_ENCODER_RATE_CONTROL_MODE_CBR: + { + curRateControlDesc.ConfigParams.pConfiguration_CBR = + &pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_CBR; + curRateControlDesc.ConfigParams.DataSize = + sizeof(pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_CBR); + } break; + case D3D12_VIDEO_ENCODER_RATE_CONTROL_MODE_VBR: + { + curRateControlDesc.ConfigParams.pConfiguration_VBR = + &pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_VBR; + curRateControlDesc.ConfigParams.DataSize = + sizeof(pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_VBR); + } break; + case D3D12_VIDEO_ENCODER_RATE_CONTROL_MODE_QVBR: + { + curRateControlDesc.ConfigParams.pConfiguration_QVBR = + &pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_QVBR; + curRateControlDesc.ConfigParams.DataSize = + sizeof(pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_QVBR); + } break; + default: + { + unreachable("Unsupported D3D12_VIDEO_ENCODER_RATE_CONTROL_MODE"); + } break; + } + + return curRateControlDesc; +} + +D3D12_VIDEO_ENCODER_LEVEL_SETTING +d3d12_video_encoder_get_current_level_desc(struct d3d12_video_encoder *pD3D12Enc) +{ + enum pipe_video_format codec = u_reduce_video_profile(pD3D12Enc->base.profile); + switch (codec) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + { + D3D12_VIDEO_ENCODER_LEVEL_SETTING curLevelDesc = {}; + curLevelDesc.pH264LevelSetting = &pD3D12Enc->m_currentEncodeConfig.m_encoderLevelDesc.m_H264LevelSetting; + curLevelDesc.DataSize = sizeof(pD3D12Enc->m_currentEncodeConfig.m_encoderLevelDesc.m_H264LevelSetting); + return curLevelDesc; + } break; + + default: + { + unreachable("Unsupported pipe_video_format"); + } break; + } +} + +uint32_t +d3d12_video_encoder_build_codec_headers(struct d3d12_video_encoder *pD3D12Enc) +{ + enum pipe_video_format codec = u_reduce_video_profile(pD3D12Enc->base.profile); + switch (codec) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + { + return d3d12_video_encoder_build_codec_headers_h264(pD3D12Enc); + + } break; + + default: + { + unreachable("Unsupported pipe_video_format"); + } break; + } + return 0u; +} + +D3D12_VIDEO_ENCODER_SEQUENCE_GOP_STRUCTURE +d3d12_video_encoder_get_current_gop_desc(struct d3d12_video_encoder *pD3D12Enc) +{ + enum pipe_video_format codec = u_reduce_video_profile(pD3D12Enc->base.profile); + switch (codec) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + { + D3D12_VIDEO_ENCODER_SEQUENCE_GOP_STRUCTURE curGOPDesc = {}; + curGOPDesc.pH264GroupOfPictures = + &pD3D12Enc->m_currentEncodeConfig.m_encoderGOPConfigDesc.m_H264GroupOfPictures; + curGOPDesc.DataSize = sizeof(pD3D12Enc->m_currentEncodeConfig.m_encoderGOPConfigDesc.m_H264GroupOfPictures); + return curGOPDesc; + } break; + + default: + { + unreachable("Unsupported pipe_video_format"); + } break; + } +} + +D3D12_VIDEO_ENCODER_CODEC_CONFIGURATION +d3d12_video_encoder_get_current_codec_config_desc(struct d3d12_video_encoder *pD3D12Enc) +{ + enum pipe_video_format codec = u_reduce_video_profile(pD3D12Enc->base.profile); + switch (codec) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + { + D3D12_VIDEO_ENCODER_CODEC_CONFIGURATION codecConfigDesc = {}; + codecConfigDesc.pH264Config = &pD3D12Enc->m_currentEncodeConfig.m_encoderCodecSpecificConfigDesc.m_H264Config; + codecConfigDesc.DataSize = + sizeof(pD3D12Enc->m_currentEncodeConfig.m_encoderCodecSpecificConfigDesc.m_H264Config); + return codecConfigDesc; + } break; + + default: + { + unreachable("Unsupported pipe_video_format"); + } break; + } +} + +D3D12_VIDEO_ENCODER_CODEC +d3d12_video_encoder_get_current_codec(struct d3d12_video_encoder *pD3D12Enc) +{ + enum pipe_video_format codec = u_reduce_video_profile(pD3D12Enc->base.profile); + switch (codec) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + { + return D3D12_VIDEO_ENCODER_CODEC_H264; + } break; + default: + { + unreachable("Unsupported pipe_video_format"); + } break; + } +} + +/// +/// Call d3d12_video_encoder_query_d3d12_driver_caps and see if any optional feature requested +/// is not supported, disable it, query again until finding a negotiated cap/feature set +/// Note that with fallbacks, the upper layer will not get exactly the encoding seetings they requested +/// but for very particular settings it's better to continue with warnings than failing the whole encoding process +/// +bool d3d12_video_encoder_negotiate_requested_features_and_d3d12_driver_caps(struct d3d12_video_encoder *pD3D12Enc, D3D12_FEATURE_DATA_VIDEO_ENCODER_SUPPORT &capEncoderSupportData) { + + /// + /// Check for general support + /// Check for validation errors (some drivers return general support but also validation errors anyways, work around for those unexpected cases) + /// + + bool configSupported = d3d12_video_encoder_query_d3d12_driver_caps(pD3D12Enc, /*inout*/ capEncoderSupportData) + && (((capEncoderSupportData.SupportFlags & D3D12_VIDEO_ENCODER_SUPPORT_FLAG_GENERAL_SUPPORT_OK) != 0) + && (capEncoderSupportData.ValidationFlags == D3D12_VIDEO_ENCODER_VALIDATION_FLAG_NONE)); + + /// + /// If rate control config is not supported, try falling back and check for caps again + /// + + if ((capEncoderSupportData.ValidationFlags & (D3D12_VIDEO_ENCODER_VALIDATION_FLAG_RATE_CONTROL_CONFIGURATION_NOT_SUPPORTED | D3D12_VIDEO_ENCODER_VALIDATION_FLAG_RATE_CONTROL_MODE_NOT_SUPPORTED)) != 0) { + + if (D3D12_VIDEO_ENC_FALLBACK_RATE_CONTROL_CONFIG){ // Check if fallback mode is enabled, or we should just fail without support + + debug_printf("[d3d12_video_encoder] WARNING: Requested rate control is not supported, trying fallback to unsetting optional features\n"); + + bool isRequestingVBVSizesSupported = ((capEncoderSupportData.SupportFlags & D3D12_VIDEO_ENCODER_SUPPORT_FLAG_RATE_CONTROL_VBV_SIZE_CONFIG_AVAILABLE) != 0); + bool isClientRequestingVBVSizes = ((pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags & D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_VBV_SIZES) != 0); + + if(isClientRequestingVBVSizes && !isRequestingVBVSizesSupported) { + debug_printf("[d3d12_video_encoder] WARNING: Requested D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_VBV_SIZES with VBVCapacity (bits): %ld and InitialVBVFullness (bits) %ld is not supported, will continue encoding unsetting this feature as fallback.\n", + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_CBR.VBVCapacity, + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_CBR.InitialVBVFullness); + + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags &= ~D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_VBV_SIZES; + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_CBR.VBVCapacity = 0; + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_CBR.InitialVBVFullness = 0; + } + + bool isRequestingPeakFrameSizeSupported = ((capEncoderSupportData.SupportFlags & D3D12_VIDEO_ENCODER_SUPPORT_FLAG_RATE_CONTROL_MAX_FRAME_SIZE_AVAILABLE) != 0); + bool isClientRequestingPeakFrameSize = ((pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags & D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE) != 0); + + if(isClientRequestingPeakFrameSize && !isRequestingPeakFrameSizeSupported) { + debug_printf("[d3d12_video_encoder] WARNING: Requested D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE with MaxFrameBitSize %ld but the feature is not supported, will continue encoding unsetting this feature as fallback.\n", + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_VBR.MaxFrameBitSize); + + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags &= ~D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE; + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_VBR.MaxFrameBitSize = 0; + } + + /// + /// Try fallback configuration + /// + configSupported = d3d12_video_encoder_query_d3d12_driver_caps(pD3D12Enc, /*inout*/ capEncoderSupportData) + && (((capEncoderSupportData.SupportFlags & D3D12_VIDEO_ENCODER_SUPPORT_FLAG_GENERAL_SUPPORT_OK) != 0) + && (capEncoderSupportData.ValidationFlags == D3D12_VIDEO_ENCODER_VALIDATION_FLAG_NONE)); + + } else { + debug_printf("[d3d12_video_encoder] WARNING: Requested rate control is not supported. To continue with a fallback, must enable the OS environment variable D3D12_VIDEO_ENC_FALLBACK_RATE_CONTROL_CONFIG\n"); + } + } + + if(!configSupported) { + debug_printf("[d3d12_video_encoder] Cap negotiation failed, see more details below:\n"); + + if ((capEncoderSupportData.ValidationFlags & D3D12_VIDEO_ENCODER_VALIDATION_FLAG_CODEC_NOT_SUPPORTED) != 0) { + debug_printf("[d3d12_video_encoder] Requested codec is not supported\n"); + } + + if ((capEncoderSupportData.ValidationFlags & + D3D12_VIDEO_ENCODER_VALIDATION_FLAG_RESOLUTION_NOT_SUPPORTED_IN_LIST) != 0) { + debug_printf("[d3d12_video_encoder] Requested resolution is not supported\n"); + } + + if ((capEncoderSupportData.ValidationFlags & + D3D12_VIDEO_ENCODER_VALIDATION_FLAG_RATE_CONTROL_CONFIGURATION_NOT_SUPPORTED) != 0) { + debug_printf("[d3d12_video_encoder] Requested bitrate or rc config is not supported\n"); + } + + if ((capEncoderSupportData.ValidationFlags & + D3D12_VIDEO_ENCODER_VALIDATION_FLAG_CODEC_CONFIGURATION_NOT_SUPPORTED) != 0) { + debug_printf("[d3d12_video_encoder] Requested codec config is not supported\n"); + } + + if ((capEncoderSupportData.ValidationFlags & + D3D12_VIDEO_ENCODER_VALIDATION_FLAG_RATE_CONTROL_MODE_NOT_SUPPORTED) != 0) { + debug_printf("[d3d12_video_encoder] Requested rate control mode is not supported\n"); + } + + if ((capEncoderSupportData.ValidationFlags & + D3D12_VIDEO_ENCODER_VALIDATION_FLAG_INTRA_REFRESH_MODE_NOT_SUPPORTED) != 0) { + debug_printf("[d3d12_video_encoder] Requested intra refresh config is not supported\n"); + } + + if ((capEncoderSupportData.ValidationFlags & + D3D12_VIDEO_ENCODER_VALIDATION_FLAG_SUBREGION_LAYOUT_MODE_NOT_SUPPORTED) != 0) { + debug_printf("[d3d12_video_encoder] Requested subregion layout mode is not supported\n"); + } + + if ((capEncoderSupportData.ValidationFlags & D3D12_VIDEO_ENCODER_VALIDATION_FLAG_INPUT_FORMAT_NOT_SUPPORTED) != + 0) { + debug_printf("[d3d12_video_encoder] Requested input dxgi format is not supported\n"); + } + } + + return configSupported; +} + +bool d3d12_video_encoder_query_d3d12_driver_caps(struct d3d12_video_encoder *pD3D12Enc, D3D12_FEATURE_DATA_VIDEO_ENCODER_SUPPORT &capEncoderSupportData) { + capEncoderSupportData.NodeIndex = pD3D12Enc->m_NodeIndex; + capEncoderSupportData.Codec = d3d12_video_encoder_get_current_codec(pD3D12Enc); + capEncoderSupportData.InputFormat = pD3D12Enc->m_currentEncodeConfig.m_encodeFormatInfo.Format; + capEncoderSupportData.RateControl = d3d12_video_encoder_get_current_rate_control_settings(pD3D12Enc); + capEncoderSupportData.IntraRefresh = pD3D12Enc->m_currentEncodeConfig.m_IntraRefresh.Mode; + capEncoderSupportData.SubregionFrameEncoding = pD3D12Enc->m_currentEncodeConfig.m_encoderSliceConfigMode; + capEncoderSupportData.ResolutionsListCount = 1; + capEncoderSupportData.pResolutionList = &pD3D12Enc->m_currentEncodeConfig.m_currentResolution; + capEncoderSupportData.CodecGopSequence = d3d12_video_encoder_get_current_gop_desc(pD3D12Enc); + capEncoderSupportData.MaxReferenceFramesInDPB = d3d12_video_encoder_get_current_max_dpb_capacity(pD3D12Enc); + capEncoderSupportData.CodecConfiguration = d3d12_video_encoder_get_current_codec_config_desc(pD3D12Enc); + + enum pipe_video_format codec = u_reduce_video_profile(pD3D12Enc->base.profile); + switch (codec) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + { + capEncoderSupportData.SuggestedProfile.pH264Profile = + &pD3D12Enc->m_currentEncodeCapabilities.m_encoderSuggestedProfileDesc.m_H264Profile; + capEncoderSupportData.SuggestedProfile.DataSize = + sizeof(pD3D12Enc->m_currentEncodeCapabilities.m_encoderSuggestedProfileDesc.m_H264Profile); + capEncoderSupportData.SuggestedLevel.pH264LevelSetting = + &pD3D12Enc->m_currentEncodeCapabilities.m_encoderLevelSuggestedDesc.m_H264LevelSetting; + capEncoderSupportData.SuggestedLevel.DataSize = + sizeof(pD3D12Enc->m_currentEncodeCapabilities.m_encoderLevelSuggestedDesc.m_H264LevelSetting); + } break; + + default: + { + unreachable("Unsupported pipe_video_format"); + } break; + } + + // prepare inout storage for the resolution dependent result. + capEncoderSupportData.pResolutionDependentSupport = + &pD3D12Enc->m_currentEncodeCapabilities.m_currentResolutionSupportCaps; + + HRESULT hr = pD3D12Enc->m_spD3D12VideoDevice->CheckFeatureSupport(D3D12_FEATURE_VIDEO_ENCODER_SUPPORT, + &capEncoderSupportData, + sizeof(capEncoderSupportData)); + if (FAILED(hr)) { + debug_printf("CheckFeatureSupport failed with HR %x\n", hr); + return false; + } + pD3D12Enc->m_currentEncodeCapabilities.m_SupportFlags = capEncoderSupportData.SupportFlags; + pD3D12Enc->m_currentEncodeCapabilities.m_ValidationFlags = capEncoderSupportData.ValidationFlags; + return true; +} + +bool d3d12_video_encoder_check_subregion_mode_support(struct d3d12_video_encoder *pD3D12Enc, + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE requestedSlicesMode + ) +{ + D3D12_FEATURE_DATA_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE capDataSubregionLayout = { }; + capDataSubregionLayout.NodeIndex = pD3D12Enc->m_NodeIndex; + capDataSubregionLayout.Codec = d3d12_video_encoder_get_current_codec(pD3D12Enc); + capDataSubregionLayout.Profile = d3d12_video_encoder_get_current_profile_desc(pD3D12Enc); + capDataSubregionLayout.Level = d3d12_video_encoder_get_current_level_desc(pD3D12Enc); + capDataSubregionLayout.SubregionMode = requestedSlicesMode; + HRESULT hr = pD3D12Enc->m_spD3D12VideoDevice->CheckFeatureSupport(D3D12_FEATURE_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE, &capDataSubregionLayout, sizeof(capDataSubregionLayout)); + if (FAILED(hr)) { + debug_printf("CheckFeatureSupport failed with HR %x\n", hr); + return false; + } + return capDataSubregionLayout.IsSupported; +} + +D3D12_VIDEO_ENCODER_PROFILE_DESC +d3d12_video_encoder_get_current_profile_desc(struct d3d12_video_encoder *pD3D12Enc) +{ + enum pipe_video_format codec = u_reduce_video_profile(pD3D12Enc->base.profile); + switch (codec) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + { + D3D12_VIDEO_ENCODER_PROFILE_DESC curProfDesc = {}; + curProfDesc.pH264Profile = &pD3D12Enc->m_currentEncodeConfig.m_encoderProfileDesc.m_H264Profile; + curProfDesc.DataSize = sizeof(pD3D12Enc->m_currentEncodeConfig.m_encoderProfileDesc.m_H264Profile); + return curProfDesc; + } break; + + default: + { + unreachable("Unsupported pipe_video_format"); + } break; + } +} + +uint32_t +d3d12_video_encoder_get_current_max_dpb_capacity(struct d3d12_video_encoder *pD3D12Enc) +{ + return pD3D12Enc->base.max_references; +} + +bool +d3d12_video_encoder_update_current_encoder_config_state(struct d3d12_video_encoder *pD3D12Enc, + struct pipe_video_buffer * srcTexture, + struct pipe_picture_desc * picture) +{ + enum pipe_video_format codec = u_reduce_video_profile(pD3D12Enc->base.profile); + switch (codec) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + { + return d3d12_video_encoder_update_current_encoder_config_state_h264(pD3D12Enc, srcTexture, picture); + } break; + + default: + { + unreachable("Unsupported pipe_video_format"); + } break; + } +} + +bool +d3d12_video_encoder_create_command_objects(struct d3d12_video_encoder *pD3D12Enc) +{ + assert(pD3D12Enc->m_spD3D12VideoDevice); + + D3D12_COMMAND_QUEUE_DESC commandQueueDesc = { D3D12_COMMAND_LIST_TYPE_VIDEO_ENCODE }; + HRESULT hr = pD3D12Enc->m_pD3D12Screen->dev->CreateCommandQueue( + &commandQueueDesc, + IID_PPV_ARGS(pD3D12Enc->m_spEncodeCommandQueue.GetAddressOf())); + if (FAILED(hr)) { + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_create_command_objects - Call to CreateCommandQueue " + "failed with HR %x\n", + hr); + return false; + } + + hr = pD3D12Enc->m_pD3D12Screen->dev->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&pD3D12Enc->m_spFence)); + if (FAILED(hr)) { + debug_printf( + "[d3d12_video_encoder] d3d12_video_encoder_create_command_objects - Call to CreateFence failed with HR %x\n", + hr); + return false; + } + + hr = pD3D12Enc->m_pD3D12Screen->dev->CreateCommandAllocator( + D3D12_COMMAND_LIST_TYPE_VIDEO_ENCODE, + IID_PPV_ARGS(pD3D12Enc->m_spCommandAllocator.GetAddressOf())); + if (FAILED(hr)) { + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_create_command_objects - Call to " + "CreateCommandAllocator failed with HR %x\n", + hr); + return false; + } + + hr = + pD3D12Enc->m_pD3D12Screen->dev->CreateCommandList(0, + D3D12_COMMAND_LIST_TYPE_VIDEO_ENCODE, + pD3D12Enc->m_spCommandAllocator.Get(), + nullptr, + IID_PPV_ARGS(pD3D12Enc->m_spEncodeCommandList.GetAddressOf())); + + if (FAILED(hr)) { + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_create_command_objects - Call to CreateCommandList " + "failed with HR %x\n", + hr); + return false; + } + + D3D12_COMMAND_QUEUE_DESC copyQueueDesc = { D3D12_COMMAND_LIST_TYPE_COPY }; + hr = pD3D12Enc->m_pD3D12Screen->dev->CreateCommandQueue(©QueueDesc, + IID_PPV_ARGS(pD3D12Enc->m_spCopyQueue.GetAddressOf())); + + if (FAILED(hr)) { + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_create_command_objects - Call to CreateCommandQueue " + "failed with HR %x\n", + hr); + return false; + } + + return true; +} + +struct pipe_video_codec * +d3d12_video_encoder_create_encoder(struct pipe_context *context, const struct pipe_video_codec *codec) +{ + /// + /// Initialize d3d12_video_encoder + /// + + // Not using new doesn't call ctor and the initializations in the class declaration are lost + struct d3d12_video_encoder *pD3D12Enc = new d3d12_video_encoder; + + pD3D12Enc->base = *codec; + pD3D12Enc->m_screen = context->screen; + pD3D12Enc->base.context = context; + pD3D12Enc->base.width = codec->width; + pD3D12Enc->base.height = codec->height; + pD3D12Enc->base.max_references = codec->max_references; + // Only fill methods that are supported by the d3d12 encoder, leaving null the rest (ie. encode_* / encode_macroblock) + pD3D12Enc->base.destroy = d3d12_video_encoder_destroy; + pD3D12Enc->base.begin_frame = d3d12_video_encoder_begin_frame; + pD3D12Enc->base.encode_bitstream = d3d12_video_encoder_encode_bitstream; + pD3D12Enc->base.end_frame = d3d12_video_encoder_end_frame; + pD3D12Enc->base.flush = d3d12_video_encoder_flush; + pD3D12Enc->base.get_feedback = d3d12_video_encoder_get_feedback; + + struct d3d12_context *pD3D12Ctx = (struct d3d12_context *) context; + pD3D12Enc->m_pD3D12Screen = d3d12_screen(pD3D12Ctx->base.screen); + + if (FAILED(pD3D12Enc->m_pD3D12Screen->dev->QueryInterface( + IID_PPV_ARGS(pD3D12Enc->m_spD3D12VideoDevice.GetAddressOf())))) { + debug_printf( + "[d3d12_video_encoder] d3d12_video_encoder_create_encoder - D3D12 Device has no Video encode support\n"); + goto failed; + } + + if (!d3d12_video_encoder_create_command_objects(pD3D12Enc)) { + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_create_encoder - Failure on " + "d3d12_video_encoder_create_command_objects\n"); + goto failed; + } + + return &pD3D12Enc->base; + +failed: + if (pD3D12Enc != nullptr) { + d3d12_video_encoder_destroy((struct pipe_video_codec *) pD3D12Enc); + } + + return nullptr; +} + +bool +d3d12_video_encoder_prepare_output_buffers(struct d3d12_video_encoder *pD3D12Enc, + struct pipe_video_buffer * srcTexture, + struct pipe_picture_desc * picture) +{ + pD3D12Enc->m_currentEncodeCapabilities.m_ResourceRequirementsCaps.NodeIndex = pD3D12Enc->m_NodeIndex; + pD3D12Enc->m_currentEncodeCapabilities.m_ResourceRequirementsCaps.Codec = + pD3D12Enc->m_currentEncodeConfig.m_encoderCodecDesc; + pD3D12Enc->m_currentEncodeCapabilities.m_ResourceRequirementsCaps.Profile = + d3d12_video_encoder_get_current_profile_desc(pD3D12Enc); + pD3D12Enc->m_currentEncodeCapabilities.m_ResourceRequirementsCaps.InputFormat = + pD3D12Enc->m_currentEncodeConfig.m_encodeFormatInfo.Format; + pD3D12Enc->m_currentEncodeCapabilities.m_ResourceRequirementsCaps.PictureTargetResolution = + pD3D12Enc->m_currentEncodeConfig.m_currentResolution; + + HRESULT hr = pD3D12Enc->m_spD3D12VideoDevice->CheckFeatureSupport( + D3D12_FEATURE_VIDEO_ENCODER_RESOURCE_REQUIREMENTS, + &pD3D12Enc->m_currentEncodeCapabilities.m_ResourceRequirementsCaps, + sizeof(pD3D12Enc->m_currentEncodeCapabilities.m_ResourceRequirementsCaps)); + + if (FAILED(hr)) { + debug_printf("CheckFeatureSupport failed with HR %x\n", hr); + return false; + } + + if (!pD3D12Enc->m_currentEncodeCapabilities.m_ResourceRequirementsCaps.IsSupported) { + debug_printf("[d3d12_video_encoder] D3D12_FEATURE_VIDEO_ENCODER_RESOURCE_REQUIREMENTS arguments are not supported.\n"); + return false; + } + + d3d12_video_encoder_calculate_metadata_resolved_buffer_size( + pD3D12Enc->m_currentEncodeCapabilities.m_MaxSlicesInOutput, + pD3D12Enc->m_currentEncodeCapabilities.m_resolvedLayoutMetadataBufferRequiredSize); + + D3D12_HEAP_PROPERTIES Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); + if ((pD3D12Enc->m_spResolvedMetadataBuffer == nullptr) || + (pD3D12Enc->m_spResolvedMetadataBuffer->GetDesc().Width < + pD3D12Enc->m_currentEncodeCapabilities.m_resolvedLayoutMetadataBufferRequiredSize)) { + CD3DX12_RESOURCE_DESC resolvedMetadataBufferDesc = CD3DX12_RESOURCE_DESC::Buffer( + pD3D12Enc->m_currentEncodeCapabilities.m_resolvedLayoutMetadataBufferRequiredSize); + + HRESULT hr = pD3D12Enc->m_pD3D12Screen->dev->CreateCommittedResource( + &Properties, + D3D12_HEAP_FLAG_NONE, + &resolvedMetadataBufferDesc, + D3D12_RESOURCE_STATE_COMMON, + nullptr, + IID_PPV_ARGS(pD3D12Enc->m_spResolvedMetadataBuffer.GetAddressOf())); + + if (FAILED(hr)) { + debug_printf("CreateCommittedResource failed with HR %x\n", hr); + return false; + } + } + + if ((pD3D12Enc->m_spMetadataOutputBuffer == nullptr) || + (pD3D12Enc->m_spMetadataOutputBuffer->GetDesc().Width < + pD3D12Enc->m_currentEncodeCapabilities.m_ResourceRequirementsCaps.MaxEncoderOutputMetadataBufferSize)) { + CD3DX12_RESOURCE_DESC metadataBufferDesc = CD3DX12_RESOURCE_DESC::Buffer( + pD3D12Enc->m_currentEncodeCapabilities.m_ResourceRequirementsCaps.MaxEncoderOutputMetadataBufferSize); + + HRESULT hr = pD3D12Enc->m_pD3D12Screen->dev->CreateCommittedResource( + &Properties, + D3D12_HEAP_FLAG_NONE, + &metadataBufferDesc, + D3D12_RESOURCE_STATE_COMMON, + nullptr, + IID_PPV_ARGS(pD3D12Enc->m_spMetadataOutputBuffer.GetAddressOf())); + + if (FAILED(hr)) { + debug_printf("CreateCommittedResource failed with HR %x\n", hr); + return false; + } + } + return true; +} + +bool +d3d12_video_encoder_reconfigure_session(struct d3d12_video_encoder *pD3D12Enc, + struct pipe_video_buffer * srcTexture, + struct pipe_picture_desc * picture) +{ + assert(pD3D12Enc->m_spD3D12VideoDevice); + if(!d3d12_video_encoder_update_current_encoder_config_state(pD3D12Enc, srcTexture, picture)) { + debug_printf("d3d12_video_encoder_update_current_encoder_config_state failed!\n"); + return false; + } + if(!d3d12_video_encoder_reconfigure_encoder_objects(pD3D12Enc, srcTexture, picture)) { + debug_printf("d3d12_video_encoder_reconfigure_encoder_objects failed!\n"); + return false; + } + d3d12_video_encoder_update_picparams_tracking(pD3D12Enc, srcTexture, picture); + if(!d3d12_video_encoder_prepare_output_buffers(pD3D12Enc, srcTexture, picture)) { + debug_printf("d3d12_video_encoder_prepare_output_buffers failed!\n"); + return false; + } + return true; +} + +/** + * start encoding of a new frame + */ +void +d3d12_video_encoder_begin_frame(struct pipe_video_codec * codec, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) +{ + // Do nothing here. Initialize happens on encoder creation, re-config (if any) happens in + // d3d12_video_encoder_encode_bitstream + struct d3d12_video_encoder *pD3D12Enc = (struct d3d12_video_encoder *) codec; + assert(pD3D12Enc); + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_begin_frame started for fenceValue: %d\n", + pD3D12Enc->m_fenceValue); + + if (!d3d12_video_encoder_reconfigure_session(pD3D12Enc, target, picture)) { + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_begin_frame - Failure on " + "d3d12_video_encoder_reconfigure_session\n"); + goto fail; + } + + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_begin_frame finalized for fenceValue: %d\n", + pD3D12Enc->m_fenceValue); + return; + +fail: + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_begin_frame failed for fenceValue: %d\n", + pD3D12Enc->m_fenceValue); + assert(false); +} + +void +d3d12_video_encoder_calculate_metadata_resolved_buffer_size(uint32_t maxSliceNumber, size_t &bufferSize) +{ + bufferSize = sizeof(D3D12_VIDEO_ENCODER_OUTPUT_METADATA) + + (maxSliceNumber * sizeof(D3D12_VIDEO_ENCODER_FRAME_SUBREGION_METADATA)); +} + +// Returns the number of slices that the output will contain for fixed slicing modes +// and the maximum number of slices the output might contain for dynamic slicing modes (eg. max bytes per slice) +uint32_t +d3d12_video_encoder_calculate_max_slices_count_in_output( + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE slicesMode, + const D3D12_VIDEO_ENCODER_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_SLICES *slicesConfig, + uint32_t MaxSubregionsNumberFromCaps, + D3D12_VIDEO_ENCODER_PICTURE_RESOLUTION_DESC sequenceTargetResolution, + uint32_t SubregionBlockPixelsSize) +{ + uint32_t pic_width_in_subregion_units = + static_cast(std::ceil(sequenceTargetResolution.Width / static_cast(SubregionBlockPixelsSize))); + uint32_t pic_height_in_subregion_units = + static_cast(std::ceil(sequenceTargetResolution.Height / static_cast(SubregionBlockPixelsSize))); + uint32_t total_picture_subregion_units = pic_width_in_subregion_units * pic_height_in_subregion_units; + uint32_t maxSlices = 0u; + switch (slicesMode) { + case D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_FULL_FRAME: + { + maxSlices = 1u; + } break; + case D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_BYTES_PER_SUBREGION: + { + maxSlices = MaxSubregionsNumberFromCaps; + } break; + case D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_SQUARE_UNITS_PER_SUBREGION_ROW_UNALIGNED: + { + maxSlices = static_cast( + std::ceil(total_picture_subregion_units / static_cast(slicesConfig->NumberOfCodingUnitsPerSlice))); + } break; + case D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_ROWS_PER_SUBREGION: + { + maxSlices = static_cast( + std::ceil(pic_height_in_subregion_units / static_cast(slicesConfig->NumberOfRowsPerSlice))); + } break; + case D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME: + { + maxSlices = slicesConfig->NumberOfSlicesPerFrame; + } break; + default: + { + unreachable("Unsupported D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE"); + } break; + } + + return maxSlices; +} + +/** + * encode a bitstream + */ +void +d3d12_video_encoder_encode_bitstream(struct pipe_video_codec * codec, + struct pipe_video_buffer *source, + struct pipe_resource * destination, + void ** feedback) +{ + struct d3d12_video_encoder *pD3D12Enc = (struct d3d12_video_encoder *) codec; + assert(pD3D12Enc); + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_encode_bitstream started for fenceValue: %d\n", + pD3D12Enc->m_fenceValue); + assert(pD3D12Enc->m_spD3D12VideoDevice); + assert(pD3D12Enc->m_spEncodeCommandQueue); + assert(pD3D12Enc->m_pD3D12Screen); + + struct d3d12_video_buffer *pInputVideoBuffer = (struct d3d12_video_buffer *) source; + assert(pInputVideoBuffer); + ID3D12Resource *pInputVideoD3D12Res = d3d12_resource_resource(pInputVideoBuffer->texture); + uint32_t inputVideoD3D12Subresource = 0u; + + struct d3d12_resource *pOutputBitstreamBuffer = (struct d3d12_resource *) destination; + assert(pOutputBitstreamBuffer); + ID3D12Resource *pOutputBufferD3D12Res = d3d12_resource_resource(pOutputBitstreamBuffer); + + // Make them permanently resident for video use + d3d12_promote_to_permanent_residency(pD3D12Enc->m_pD3D12Screen, pOutputBitstreamBuffer); + d3d12_promote_to_permanent_residency(pD3D12Enc->m_pD3D12Screen, pInputVideoBuffer->texture); + + /// + /// Record Encode operation + /// + + /// + /// pInputVideoD3D12Res and pOutputBufferD3D12Res are unwrapped from pipe_resource objects that are passed externally + /// and could be tracked by pipe_context and have pending ops. Flush any work on them and transition to + /// D3D12_RESOURCE_STATE_COMMON before issuing work in Video command queue below. After the video work is done in the + /// GPU, transition back to D3D12_RESOURCE_STATE_COMMON + /// + /// Note that unlike the D3D12TranslationLayer codebase, the state tracker here doesn't (yet) have any kind of + /// multi-queue support, so it wouldn't implicitly synchronize when trying to transition between a graphics op and a + /// video op. + /// + + d3d12_transition_resource_state( + d3d12_context(pD3D12Enc->base.context), + pInputVideoBuffer->texture, // d3d12_resource wrapper for pInputVideoD3D12Res + D3D12_RESOURCE_STATE_COMMON, + D3D12_BIND_INVALIDATE_FULL); + d3d12_transition_resource_state(d3d12_context(pD3D12Enc->base.context), + pOutputBitstreamBuffer, // d3d12_resource wrapped for pOutputBufferD3D12Res + D3D12_RESOURCE_STATE_COMMON, + D3D12_BIND_INVALIDATE_FULL); + d3d12_apply_resource_states(d3d12_context(pD3D12Enc->base.context), false); + + d3d12_resource_wait_idle(d3d12_context(pD3D12Enc->base.context), + pInputVideoBuffer->texture, + false /*wantToWrite*/); + d3d12_resource_wait_idle(d3d12_context(pD3D12Enc->base.context), pOutputBitstreamBuffer, true /*wantToWrite*/); + + std::vector rgCurrentFrameStateTransitions = { + CD3DX12_RESOURCE_BARRIER::Transition(pInputVideoD3D12Res, + D3D12_RESOURCE_STATE_COMMON, + D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ), + CD3DX12_RESOURCE_BARRIER::Transition(pOutputBufferD3D12Res, + D3D12_RESOURCE_STATE_COMMON, + D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE), + CD3DX12_RESOURCE_BARRIER::Transition(pD3D12Enc->m_spMetadataOutputBuffer.Get(), + D3D12_RESOURCE_STATE_COMMON, + D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE) + }; + + pD3D12Enc->m_spEncodeCommandList->ResourceBarrier(rgCurrentFrameStateTransitions.size(), + rgCurrentFrameStateTransitions.data()); + + D3D12_VIDEO_ENCODER_RECONSTRUCTED_PICTURE reconPicOutputTextureDesc = + pD3D12Enc->m_upDPBManager->get_current_frame_recon_pic_output_allocation(); + D3D12_VIDEO_ENCODE_REFERENCE_FRAMES referenceFramesDescriptor = + pD3D12Enc->m_upDPBManager->get_current_reference_frames(); + D3D12_VIDEO_ENCODER_PICTURE_CONTROL_FLAGS picCtrlFlags = D3D12_VIDEO_ENCODER_PICTURE_CONTROL_FLAG_NONE; + + // Transition DPB reference pictures to read mode + uint32_t maxReferences = d3d12_video_encoder_get_current_max_dpb_capacity(pD3D12Enc); + std::vector rgReferenceTransitions(maxReferences); + if ((referenceFramesDescriptor.NumTexture2Ds > 0) || + (pD3D12Enc->m_upDPBManager->is_current_frame_used_as_reference())) { + rgReferenceTransitions.clear(); + rgReferenceTransitions.reserve(maxReferences); + + // Check if array of textures vs texture array + + if (referenceFramesDescriptor.pSubresources == nullptr) { + + // Array of resources mode for reference pictures + + // Transition all subresources of each reference frame independent resource allocation + for (uint32_t referenceIdx = 0; referenceIdx < referenceFramesDescriptor.NumTexture2Ds; referenceIdx++) { + rgReferenceTransitions.push_back( + CD3DX12_RESOURCE_BARRIER::Transition(referenceFramesDescriptor.ppTexture2Ds[referenceIdx], + D3D12_RESOURCE_STATE_COMMON, + D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ)); + } + + // Transition all subresources the output recon pic independent resource allocation + if (reconPicOutputTextureDesc.pReconstructedPicture != nullptr) { + picCtrlFlags |= D3D12_VIDEO_ENCODER_PICTURE_CONTROL_FLAG_USED_AS_REFERENCE_PICTURE; + + rgReferenceTransitions.push_back( + CD3DX12_RESOURCE_BARRIER::Transition(reconPicOutputTextureDesc.pReconstructedPicture, + D3D12_RESOURCE_STATE_COMMON, + D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE)); + } + } else if (referenceFramesDescriptor.NumTexture2Ds > 0) { + + // texture array mode for reference pictures + + // In Texture array mode, the dpb storage allocator uses the same texture array for all the input + // reference pics in ppTexture2Ds and also for the pReconstructedPicture output allocations, just different + // subresources. + + CD3DX12_RESOURCE_DESC referencesTexArrayDesc(referenceFramesDescriptor.ppTexture2Ds[0]->GetDesc()); + + for (uint32_t referenceSubresource = 0; referenceSubresource < referencesTexArrayDesc.DepthOrArraySize; + referenceSubresource++) { + + // all reference frames inputs should be all the same texarray allocation + assert(referenceFramesDescriptor.ppTexture2Ds[0] == + referenceFramesDescriptor.ppTexture2Ds[referenceSubresource]); + + // the reconpic output should be all the same texarray allocation + assert(referenceFramesDescriptor.ppTexture2Ds[0] == reconPicOutputTextureDesc.pReconstructedPicture); + + uint32_t MipLevel, PlaneSlice, ArraySlice; + D3D12DecomposeSubresource(referenceSubresource, + referencesTexArrayDesc.MipLevels, + referencesTexArrayDesc.ArraySize(), + MipLevel, + ArraySlice, + PlaneSlice); + + for (PlaneSlice = 0; PlaneSlice < pD3D12Enc->m_currentEncodeConfig.m_encodeFormatInfo.PlaneCount; + PlaneSlice++) { + + uint32_t planeOutputSubresource = + referencesTexArrayDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice); + + rgReferenceTransitions.push_back(CD3DX12_RESOURCE_BARRIER::Transition( + // Always same allocation in texarray mode + referenceFramesDescriptor.ppTexture2Ds[0], + D3D12_RESOURCE_STATE_COMMON, + // If this is the subresource for the reconpic output allocation, transition to ENCODE_WRITE + // Otherwise, it's a subresource for an input reference picture, transition to ENCODE_READ + (referenceSubresource == reconPicOutputTextureDesc.ReconstructedPictureSubresource) ? + D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE : + D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ, + planeOutputSubresource)); + } + } + } + + if (rgReferenceTransitions.size() > 0) { + pD3D12Enc->m_spEncodeCommandList->ResourceBarrier(static_cast(rgReferenceTransitions.size()), + rgReferenceTransitions.data()); + } + } + + // Update current frame pic params state after reconfiguring above. + D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA currentPicParams = + d3d12_video_encoder_get_current_picture_param_settings(pD3D12Enc); + pD3D12Enc->m_upDPBManager->get_current_frame_picture_control_data(currentPicParams); + + uint32_t prefixGeneratedHeadersByteSize = d3d12_video_encoder_build_codec_headers(pD3D12Enc); + + const D3D12_VIDEO_ENCODER_ENCODEFRAME_INPUT_ARGUMENTS inputStreamArguments = { + // D3D12_VIDEO_ENCODER_SEQUENCE_CONTROL_DESC + { // D3D12_VIDEO_ENCODER_SEQUENCE_CONTROL_FLAGS + pD3D12Enc->m_currentEncodeConfig.m_seqFlags, + // D3D12_VIDEO_ENCODER_INTRA_REFRESH + pD3D12Enc->m_currentEncodeConfig.m_IntraRefresh, + d3d12_video_encoder_get_current_rate_control_settings(pD3D12Enc), + // D3D12_VIDEO_ENCODER_PICTURE_RESOLUTION_DESC + pD3D12Enc->m_currentEncodeConfig.m_currentResolution, + pD3D12Enc->m_currentEncodeConfig.m_encoderSliceConfigMode, + d3d12_video_encoder_get_current_slice_param_settings(pD3D12Enc), + d3d12_video_encoder_get_current_gop_desc(pD3D12Enc) }, + // D3D12_VIDEO_ENCODER_PICTURE_CONTROL_DESC + { // uint32_t IntraRefreshFrameIndex; + pD3D12Enc->m_currentEncodeConfig.m_IntraRefreshCurrentFrameIndex, + // D3D12_VIDEO_ENCODER_PICTURE_CONTROL_FLAGS Flags; + picCtrlFlags, + // D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA PictureControlCodecData; + currentPicParams, + // D3D12_VIDEO_ENCODE_REFERENCE_FRAMES ReferenceFrames; + referenceFramesDescriptor }, + pInputVideoD3D12Res, + inputVideoD3D12Subresource, + prefixGeneratedHeadersByteSize // hint for driver to know header size in final bitstream for rate control internal + // budgeting. - User can also calculate headers fixed size beforehand (eg. no VUI, + // etc) and build them with final values after EncodeFrame is executed + }; + + const D3D12_VIDEO_ENCODER_ENCODEFRAME_OUTPUT_ARGUMENTS outputStreamArguments = { + // D3D12_VIDEO_ENCODER_COMPRESSED_BITSTREAM + { + pOutputBufferD3D12Res, + prefixGeneratedHeadersByteSize, // Start writing after the reserved interval [0, + // prefixGeneratedHeadersByteSize) for bitstream headers + }, + // D3D12_VIDEO_ENCODER_RECONSTRUCTED_PICTURE + reconPicOutputTextureDesc, + // D3D12_VIDEO_ENCODER_ENCODE_OPERATION_METADATA_BUFFER + { pD3D12Enc->m_spMetadataOutputBuffer.Get(), 0 } + }; + + // Upload the CPU buffers with the bitstream headers to the compressed bitstream resource in the interval [0, + // prefixGeneratedHeadersByteSize) + assert(prefixGeneratedHeadersByteSize == pD3D12Enc->m_BitstreamHeadersBuffer.size()); + + pD3D12Enc->base.context->buffer_subdata( + pD3D12Enc->base.context, // context + destination, // dst buffer - "destination" is the pipe_resource object + // wrapping pOutputBitstreamBuffer and eventually pOutputBufferD3D12Res + PIPE_MAP_WRITE, // usage PIPE_MAP_x + 0, // offset + pD3D12Enc->m_BitstreamHeadersBuffer.size(), + pD3D12Enc->m_BitstreamHeadersBuffer.data()); + + // Note: The buffer_subdata is queued in pD3D12Enc->base.context but doesn't execute immediately + // Will flush and sync this batch in d3d12_video_encoder_flush with the rest of the Video Encode Queue GPU work + + // Record EncodeFrame + pD3D12Enc->m_spEncodeCommandList->EncodeFrame(pD3D12Enc->m_spVideoEncoder.Get(), + pD3D12Enc->m_spVideoEncoderHeap.Get(), + &inputStreamArguments, + &outputStreamArguments); + + D3D12_RESOURCE_BARRIER rgResolveMetadataStateTransitions[] = { + CD3DX12_RESOURCE_BARRIER::Transition(pD3D12Enc->m_spResolvedMetadataBuffer.Get(), + D3D12_RESOURCE_STATE_COMMON, + D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE), + CD3DX12_RESOURCE_BARRIER::Transition(pD3D12Enc->m_spMetadataOutputBuffer.Get(), + D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE, + D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ), + CD3DX12_RESOURCE_BARRIER::Transition(pInputVideoD3D12Res, + D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ, + D3D12_RESOURCE_STATE_COMMON), + CD3DX12_RESOURCE_BARRIER::Transition(pOutputBufferD3D12Res, + D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE, + D3D12_RESOURCE_STATE_COMMON) + }; + + pD3D12Enc->m_spEncodeCommandList->ResourceBarrier(_countof(rgResolveMetadataStateTransitions), + rgResolveMetadataStateTransitions); + + const D3D12_VIDEO_ENCODER_RESOLVE_METADATA_INPUT_ARGUMENTS inputMetadataCmd = { + pD3D12Enc->m_currentEncodeConfig.m_encoderCodecDesc, + d3d12_video_encoder_get_current_profile_desc(pD3D12Enc), + pD3D12Enc->m_currentEncodeConfig.m_encodeFormatInfo.Format, + // D3D12_VIDEO_ENCODER_PICTURE_RESOLUTION_DESC + pD3D12Enc->m_currentEncodeConfig.m_currentResolution, + { pD3D12Enc->m_spMetadataOutputBuffer.Get(), 0 } + }; + + const D3D12_VIDEO_ENCODER_RESOLVE_METADATA_OUTPUT_ARGUMENTS outputMetadataCmd = { + { pD3D12Enc->m_spResolvedMetadataBuffer.Get(), 0 } + }; + pD3D12Enc->m_spEncodeCommandList->ResolveEncoderOutputMetadata(&inputMetadataCmd, &outputMetadataCmd); + + // Transition DPB reference pictures back to COMMON + if ((referenceFramesDescriptor.NumTexture2Ds > 0) || + (pD3D12Enc->m_upDPBManager->is_current_frame_used_as_reference())) { + for (auto &BarrierDesc : rgReferenceTransitions) { + std::swap(BarrierDesc.Transition.StateBefore, BarrierDesc.Transition.StateAfter); + } + + if (rgReferenceTransitions.size() > 0) { + pD3D12Enc->m_spEncodeCommandList->ResourceBarrier(static_cast(rgReferenceTransitions.size()), + rgReferenceTransitions.data()); + } + } + + D3D12_RESOURCE_BARRIER rgRevertResolveMetadataStateTransitions[] = { + CD3DX12_RESOURCE_BARRIER::Transition(pD3D12Enc->m_spResolvedMetadataBuffer.Get(), + D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE, + D3D12_RESOURCE_STATE_COMMON), + CD3DX12_RESOURCE_BARRIER::Transition(pD3D12Enc->m_spMetadataOutputBuffer.Get(), + D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ, + D3D12_RESOURCE_STATE_COMMON), + }; + + pD3D12Enc->m_spEncodeCommandList->ResourceBarrier(_countof(rgRevertResolveMetadataStateTransitions), + rgRevertResolveMetadataStateTransitions); + + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_encode_bitstream finalized for fenceValue: %d\n", + pD3D12Enc->m_fenceValue); +} + +void +d3d12_video_encoder_get_feedback(struct pipe_video_codec *codec, void *feedback, unsigned *size) +{ + struct d3d12_video_encoder *pD3D12Enc = (struct d3d12_video_encoder *) codec; + assert(pD3D12Enc); + + if (pD3D12Enc->m_needsGPUFlush) { + d3d12_video_encoder_flush(codec); + } + + D3D12_VIDEO_ENCODER_OUTPUT_METADATA encoderMetadata; + std::vector pSubregionsMetadata; + d3d12_video_encoder_extract_encode_metadata( + pD3D12Enc, + pD3D12Enc->m_spResolvedMetadataBuffer.Get(), + pD3D12Enc->m_currentEncodeCapabilities.m_resolvedLayoutMetadataBufferRequiredSize, + encoderMetadata, + pSubregionsMetadata); + + // Read metadata from encoderMetadata + if (encoderMetadata.EncodeErrorFlags != D3D12_VIDEO_ENCODER_ENCODE_ERROR_FLAG_NO_ERROR) { + debug_printf("[d3d12_video_encoder] Encode GPU command failed - EncodeErrorFlags: %ld\n", + encoderMetadata.EncodeErrorFlags); + *size = 0; + } + + assert(encoderMetadata.EncodedBitstreamWrittenBytesCount > 0u); + *size = (pD3D12Enc->m_BitstreamHeadersBuffer.size() + encoderMetadata.EncodedBitstreamWrittenBytesCount); +} + +void +d3d12_video_encoder_extract_encode_metadata( + struct d3d12_video_encoder * pD3D12Enc, + ID3D12Resource * pResolvedMetadataBuffer, // input + size_t resourceMetadataSize, // input + D3D12_VIDEO_ENCODER_OUTPUT_METADATA & parsedMetadata, // output + std::vector &pSubregionsMetadata // output +) +{ + struct d3d12_screen *pD3D12Screen = (struct d3d12_screen *) pD3D12Enc->m_pD3D12Screen; + assert(pD3D12Screen); + pipe_resource *pPipeResolvedMetadataBuffer = + d3d12_resource_from_resource(&pD3D12Screen->base, pResolvedMetadataBuffer); + assert(pPipeResolvedMetadataBuffer); + assert(resourceMetadataSize < INT_MAX); + struct pipe_box box = { + 0, // x + 0, // y + 0, // z + static_cast(resourceMetadataSize), // width + 1, // height + 1 // depth + }; + struct pipe_transfer *mapTransfer; + unsigned mapUsage = PIPE_MAP_READ; + void * pMetadataBufferSrc = pD3D12Enc->base.context->buffer_map(pD3D12Enc->base.context, + pPipeResolvedMetadataBuffer, + 0, + mapUsage, + &box, + &mapTransfer); + + assert(mapUsage & PIPE_MAP_READ); + assert(pPipeResolvedMetadataBuffer->usage == PIPE_USAGE_DEFAULT); + // Note: As we're calling buffer_map with PIPE_MAP_READ on a pPipeResolvedMetadataBuffer which has pipe_usage_default + // buffer_map itself will do all the synchronization and waits so once the function returns control here + // the contents of mapTransfer are ready to be accessed. + + // Clear output + memset(&parsedMetadata, 0, sizeof(D3D12_VIDEO_ENCODER_OUTPUT_METADATA)); + + // Calculate sizes + size_t encoderMetadataSize = sizeof(D3D12_VIDEO_ENCODER_OUTPUT_METADATA); + + // Copy buffer to the appropriate D3D12_VIDEO_ENCODER_OUTPUT_METADATA memory layout + parsedMetadata = *reinterpret_cast(pMetadataBufferSrc); + + // As specified in D3D12 Encode spec, the array base for metadata for the slices + // (D3D12_VIDEO_ENCODER_FRAME_SUBREGION_METADATA[]) is placed in memory immediately after the + // D3D12_VIDEO_ENCODER_OUTPUT_METADATA structure + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_METADATA *pFrameSubregionMetadata = + reinterpret_cast(reinterpret_cast(pMetadataBufferSrc) + + encoderMetadataSize); + + // Copy fields into D3D12_VIDEO_ENCODER_FRAME_SUBREGION_METADATA + assert(parsedMetadata.WrittenSubregionsCount < SIZE_MAX); + pSubregionsMetadata.resize(static_cast(parsedMetadata.WrittenSubregionsCount)); + for (uint32_t sliceIdx = 0; sliceIdx < parsedMetadata.WrittenSubregionsCount; sliceIdx++) { + pSubregionsMetadata[sliceIdx].bHeaderSize = pFrameSubregionMetadata[sliceIdx].bHeaderSize; + pSubregionsMetadata[sliceIdx].bSize = pFrameSubregionMetadata[sliceIdx].bSize; + pSubregionsMetadata[sliceIdx].bStartOffset = pFrameSubregionMetadata[sliceIdx].bStartOffset; + } + + // Unmap the buffer tmp storage + pipe_buffer_unmap(pD3D12Enc->base.context, mapTransfer); +} + +/** + * end encoding of the current frame + */ +void +d3d12_video_encoder_end_frame(struct pipe_video_codec * codec, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) +{ + struct d3d12_video_encoder *pD3D12Enc = (struct d3d12_video_encoder *) codec; + assert(pD3D12Enc); + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_end_frame started for fenceValue: %d\n", + pD3D12Enc->m_fenceValue); + + // Signal finish of current frame encoding to the picture management tracker + pD3D12Enc->m_upDPBManager->end_frame(); + + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_end_frame finalized for fenceValue: %d\n", + pD3D12Enc->m_fenceValue); + + /// + /// Flush work to the GPU and blocking wait until encode finishes + /// + pD3D12Enc->m_needsGPUFlush = true; + d3d12_video_encoder_flush(codec); +} diff --git a/src/gallium/drivers/d3d12/d3d12_video_enc.h b/src/gallium/drivers/d3d12/d3d12_video_enc.h new file mode 100644 index 00000000000..9870fe4c2f5 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_enc.h @@ -0,0 +1,321 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_VIDEO_ENC_H +#define D3D12_VIDEO_ENC_H + +#include "d3d12_video_types.h" +#include "d3d12_video_encoder_references_manager.h" +#include "d3d12_video_dpb_storage_manager.h" +#include "d3d12_video_encoder_bitstream_builder_h264.h" + +/// +/// Pipe video interface starts +/// + +/** + * creates a video encoder + */ +struct pipe_video_codec * +d3d12_video_encoder_create_encoder(struct pipe_context *context, const struct pipe_video_codec *templ); + +/** + * destroy this video encoder + */ +void +d3d12_video_encoder_destroy(struct pipe_video_codec *codec); + +/** + * start encoding of a new frame + */ +void +d3d12_video_encoder_begin_frame(struct pipe_video_codec * codec, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture); + +/** + * encode to a bitstream + */ +void +d3d12_video_encoder_encode_bitstream(struct pipe_video_codec * codec, + struct pipe_video_buffer *source, + struct pipe_resource * destination, + void ** feedback); + +/** + * get encoder feedback + */ +void +d3d12_video_encoder_get_feedback(struct pipe_video_codec *codec, void *feedback, unsigned *size); + +/** + * end encoding of the current frame + */ +void +d3d12_video_encoder_end_frame(struct pipe_video_codec * codec, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture); + +/** + * flush any outstanding command buffers to the hardware + * should be called before a video_buffer is acessed by the gallium frontend again + */ +void +d3d12_video_encoder_flush(struct pipe_video_codec *codec); + +/// +/// Pipe video interface ends +/// + +enum d3d12_video_encoder_config_dirty_flags +{ + d3d12_video_encoder_config_dirty_flag_none = 0x0, + d3d12_video_encoder_config_dirty_flag_codec = 0x1, + d3d12_video_encoder_config_dirty_flag_profile = 0x2, + d3d12_video_encoder_config_dirty_flag_level = 0x4, + d3d12_video_encoder_config_dirty_flag_codec_config = 0x8, + d3d12_video_encoder_config_dirty_flag_input_format = 0x10, + d3d12_video_encoder_config_dirty_flag_resolution = 0x20, + d3d12_video_encoder_config_dirty_flag_rate_control = 0x40, + d3d12_video_encoder_config_dirty_flag_slices = 0x80, + d3d12_video_encoder_config_dirty_flag_gop = 0x100, + d3d12_video_encoder_config_dirty_flag_motion_precision_limit = 0x200, +}; +DEFINE_ENUM_FLAG_OPERATORS(d3d12_video_encoder_config_dirty_flags); + +/// +/// d3d12_video_encoder functions starts +/// + +struct d3d12_video_encoder +{ + struct pipe_video_codec base; + struct pipe_screen * m_screen; + struct d3d12_screen * m_pD3D12Screen; + + /// + /// D3D12 objects and context info + /// + + const uint m_NodeMask = 0u; + const uint m_NodeIndex = 0u; + + ComPtr m_spFence; + uint m_fenceValue = 1u; + + ComPtr m_spD3D12VideoDevice; + ComPtr m_spVideoEncoder; + ComPtr m_spVideoEncoderHeap; + ComPtr m_spEncodeCommandQueue; + ComPtr m_spCommandAllocator; + ComPtr m_spEncodeCommandList; + ComPtr m_spCopyQueue; + std::vector m_transitionsBeforeCloseCmdList; + + std::unique_ptr m_upDPBManager; + std::unique_ptr m_upDPBStorageManager; + std::unique_ptr m_upBitstreamBuilder; + + bool m_needsGPUFlush = false; + + ComPtr m_spResolvedMetadataBuffer; + ComPtr m_spMetadataOutputBuffer; + + std::vector m_BitstreamHeadersBuffer; + + struct + { + bool m_fArrayOfTexturesDpb; + + D3D12_VIDEO_ENCODER_SUPPORT_FLAGS m_SupportFlags; + D3D12_VIDEO_ENCODER_VALIDATION_FLAGS m_ValidationFlags; + D3D12_FEATURE_DATA_VIDEO_ENCODER_RESOLUTION_SUPPORT_LIMITS m_currentResolutionSupportCaps; + union + { + D3D12_VIDEO_ENCODER_PROFILE_H264 m_H264Profile; + D3D12_VIDEO_ENCODER_PROFILE_HEVC m_HEVCProfile; + } m_encoderSuggestedProfileDesc = {}; + + union + { + D3D12_VIDEO_ENCODER_LEVELS_H264 m_H264LevelSetting; + D3D12_VIDEO_ENCODER_LEVEL_TIER_CONSTRAINTS_HEVC m_HEVCLevelSetting; + } m_encoderLevelSuggestedDesc = {}; + + // Required size for the layout-resolved metadata buffer of current frame to be encoded + size_t m_resolvedLayoutMetadataBufferRequiredSize; + + // The maximum number of slices that the output of the current frame to be encoded will contain + uint32_t m_MaxSlicesInOutput; + + D3D12_FEATURE_DATA_VIDEO_ENCODER_RESOURCE_REQUIREMENTS m_ResourceRequirementsCaps; + + } m_currentEncodeCapabilities; + + struct + { + d3d12_video_encoder_config_dirty_flags m_ConfigDirtyFlags = d3d12_video_encoder_config_dirty_flag_none; + + D3D12_VIDEO_ENCODER_PICTURE_RESOLUTION_DESC m_currentResolution = {}; + D3D12_BOX m_FrameCroppingCodecConfig = {}; + + D3D12_FEATURE_DATA_FORMAT_INFO m_encodeFormatInfo = {}; + + D3D12_VIDEO_ENCODER_CODEC m_encoderCodecDesc = {}; + + D3D12_VIDEO_ENCODER_SEQUENCE_CONTROL_FLAGS m_seqFlags = D3D12_VIDEO_ENCODER_SEQUENCE_CONTROL_FLAG_NONE; + + /// As the following D3D12 Encode types have pointers in their structures, we need to keep a deep copy of them + + union + { + D3D12_VIDEO_ENCODER_PROFILE_H264 m_H264Profile; + D3D12_VIDEO_ENCODER_PROFILE_HEVC m_HEVCProfile; + } m_encoderProfileDesc = {}; + + union + { + D3D12_VIDEO_ENCODER_LEVELS_H264 m_H264LevelSetting; + D3D12_VIDEO_ENCODER_LEVEL_TIER_CONSTRAINTS_HEVC m_HEVCLevelSetting; + } m_encoderLevelDesc = {}; + + struct + { + D3D12_VIDEO_ENCODER_RATE_CONTROL_MODE m_Mode; + D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAGS m_Flags; + DXGI_RATIONAL m_FrameRate; + union + { + D3D12_VIDEO_ENCODER_RATE_CONTROL_CQP m_Configuration_CQP; + D3D12_VIDEO_ENCODER_RATE_CONTROL_CBR m_Configuration_CBR; + D3D12_VIDEO_ENCODER_RATE_CONTROL_VBR m_Configuration_VBR; + D3D12_VIDEO_ENCODER_RATE_CONTROL_QVBR m_Configuration_QVBR; + } m_Config; + } m_encoderRateControlDesc = {}; + + union + { + D3D12_VIDEO_ENCODER_CODEC_CONFIGURATION_H264 m_H264Config; + D3D12_VIDEO_ENCODER_CODEC_CONFIGURATION_HEVC m_HEVCConfig; + } m_encoderCodecSpecificConfigDesc = {}; + + + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE m_encoderSliceConfigMode; + union + { + D3D12_VIDEO_ENCODER_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_SLICES m_SlicesPartition_H264; + D3D12_VIDEO_ENCODER_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_SLICES m_SlicesPartition_HEVC; + } m_encoderSliceConfigDesc = {}; + + union + { + D3D12_VIDEO_ENCODER_SEQUENCE_GOP_STRUCTURE_H264 m_H264GroupOfPictures; + D3D12_VIDEO_ENCODER_SEQUENCE_GOP_STRUCTURE_HEVC m_HEVCGroupOfPictures; + } m_encoderGOPConfigDesc = {}; + + union + { + D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA_H264 m_H264PicData; + D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA_HEVC m_HEVCPicData; + } m_encoderPicParamsDesc = {}; + + D3D12_VIDEO_ENCODER_MOTION_ESTIMATION_PRECISION_MODE m_encoderMotionPrecisionLimit = + D3D12_VIDEO_ENCODER_MOTION_ESTIMATION_PRECISION_MODE_MAXIMUM; + + D3D12_VIDEO_ENCODER_INTRA_REFRESH m_IntraRefresh = { D3D12_VIDEO_ENCODER_INTRA_REFRESH_MODE_NONE, 0 }; + uint32_t m_IntraRefreshCurrentFrameIndex = 0; + + } m_currentEncodeConfig; +}; + +bool +d3d12_video_encoder_create_command_objects(struct d3d12_video_encoder *pD3D12Enc); +bool +d3d12_video_encoder_reconfigure_session(struct d3d12_video_encoder *pD3D12Enc, + struct pipe_video_buffer * srcTexture, + struct pipe_picture_desc * picture); +bool +d3d12_video_encoder_update_current_encoder_config_state(struct d3d12_video_encoder *pD3D12Enc, + struct pipe_video_buffer * srcTexture, + struct pipe_picture_desc * picture); +bool +d3d12_video_encoder_reconfigure_encoder_objects(struct d3d12_video_encoder *pD3D12Enc, + struct pipe_video_buffer * srcTexture, + struct pipe_picture_desc * picture); +D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA +d3d12_video_encoder_get_current_picture_param_settings(struct d3d12_video_encoder *pD3D12Enc); +D3D12_VIDEO_ENCODER_LEVEL_SETTING +d3d12_video_encoder_get_current_level_desc(struct d3d12_video_encoder *pD3D12Enc); +D3D12_VIDEO_ENCODER_CODEC_CONFIGURATION +d3d12_video_encoder_get_current_codec_config_desc(struct d3d12_video_encoder *pD3D12Enc); +D3D12_VIDEO_ENCODER_PROFILE_DESC +d3d12_video_encoder_get_current_profile_desc(struct d3d12_video_encoder *pD3D12Enc); +D3D12_VIDEO_ENCODER_RATE_CONTROL +d3d12_video_encoder_get_current_rate_control_settings(struct d3d12_video_encoder *pD3D12Enc); +D3D12_VIDEO_ENCODER_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA +d3d12_video_encoder_get_current_slice_param_settings(struct d3d12_video_encoder *pD3D12Enc); +D3D12_VIDEO_ENCODER_SEQUENCE_GOP_STRUCTURE +d3d12_video_encoder_get_current_gop_desc(struct d3d12_video_encoder *pD3D12Enc); +uint32_t +d3d12_video_encoder_get_current_max_dpb_capacity(struct d3d12_video_encoder *pD3D12Enc); +void +d3d12_video_encoder_create_reference_picture_manager(struct d3d12_video_encoder *pD3D12Enc); +void +d3d12_video_encoder_update_picparams_tracking(struct d3d12_video_encoder *pD3D12Enc, + struct pipe_video_buffer * srcTexture, + struct pipe_picture_desc * picture); +void +d3d12_video_encoder_calculate_metadata_resolved_buffer_size(uint32_t maxSliceNumber, size_t &bufferSize); +uint32_t +d3d12_video_encoder_calculate_max_slices_count_in_output( + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE slicesMode, + const D3D12_VIDEO_ENCODER_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_SLICES *slicesConfig, + uint32_t MaxSubregionsNumberFromCaps, + D3D12_VIDEO_ENCODER_PICTURE_RESOLUTION_DESC sequenceTargetResolution, + uint32_t SubregionBlockPixelsSize); +bool +d3d12_video_encoder_prepare_output_buffers(struct d3d12_video_encoder *pD3D12Enc, + struct pipe_video_buffer * srcTexture, + struct pipe_picture_desc * picture); +uint32_t +d3d12_video_encoder_build_codec_headers(struct d3d12_video_encoder *pD3D12Enc); +void +d3d12_video_encoder_extract_encode_metadata( + struct d3d12_video_encoder * pD3D12Dec, + ID3D12Resource * pResolvedMetadataBuffer, + size_t resourceMetadataSize, + D3D12_VIDEO_ENCODER_OUTPUT_METADATA & encoderMetadata, + std::vector &pSubregionsMetadata); + +D3D12_VIDEO_ENCODER_CODEC +d3d12_video_encoder_get_current_codec(struct d3d12_video_encoder *pD3D12Enc); + +bool d3d12_video_encoder_negotiate_requested_features_and_d3d12_driver_caps(struct d3d12_video_encoder *pD3D12Enc, D3D12_FEATURE_DATA_VIDEO_ENCODER_SUPPORT &capEncoderSupportData); +bool d3d12_video_encoder_query_d3d12_driver_caps(struct d3d12_video_encoder *pD3D12Enc, D3D12_FEATURE_DATA_VIDEO_ENCODER_SUPPORT &capEncoderSupportData); +bool d3d12_video_encoder_check_subregion_mode_support(struct d3d12_video_encoder *pD3D12Enc, D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE requestedSlicesMode); + +/// +/// d3d12_video_encoder functions ends +/// + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_video_enc_h264.cpp b/src/gallium/drivers/d3d12/d3d12_video_enc_h264.cpp new file mode 100644 index 00000000000..226c7bf13bc --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_enc_h264.cpp @@ -0,0 +1,816 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_video_enc.h" +#include "d3d12_video_enc_h264.h" +#include "util/u_video.h" +#include "d3d12_screen.h" +#include "d3d12_format.h" + +void +d3d12_video_encoder_update_current_rate_control_h264(struct d3d12_video_encoder *pD3D12Enc, + pipe_h264_enc_picture_desc *picture) +{ + auto previousConfig = pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc; + + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc = {}; + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_FrameRate.Numerator = + picture->rate_ctrl[0].frame_rate_num; + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_FrameRate.Denominator = + picture->rate_ctrl[0].frame_rate_den; + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags = D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_NONE; + + switch (picture->rate_ctrl[0].rate_ctrl_method) { + case PIPE_H2645_ENC_RATE_CONTROL_METHOD_VARIABLE_SKIP: + case PIPE_H2645_ENC_RATE_CONTROL_METHOD_VARIABLE: + { + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Mode = D3D12_VIDEO_ENCODER_RATE_CONTROL_MODE_VBR; + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_VBR.TargetAvgBitRate = + picture->rate_ctrl[0].target_bitrate; + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_VBR.PeakBitRate = + picture->rate_ctrl[0].peak_bitrate; + } break; + case PIPE_H2645_ENC_RATE_CONTROL_METHOD_CONSTANT_SKIP: + case PIPE_H2645_ENC_RATE_CONTROL_METHOD_CONSTANT: + { + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Mode = D3D12_VIDEO_ENCODER_RATE_CONTROL_MODE_CBR; + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_CBR.TargetBitRate = + picture->rate_ctrl[0].target_bitrate; + + /* For CBR mode, to guarantee bitrate of generated stream complies with + * target bitrate (e.g. no over +/-10%), vbv_buffer_size should be same + * as target bitrate. Controlled by OS env var D3D12_VIDEO_ENC_CBR_FORCE_VBV_EQUAL_BITRATE + */ + if (D3D12_VIDEO_ENC_CBR_FORCE_VBV_EQUAL_BITRATE) { + debug_printf("[d3d12_video_encoder_h264] d3d12_video_encoder_update_current_rate_control_h264 D3D12_VIDEO_ENC_CBR_FORCE_VBV_EQUAL_BITRATE environment variable is set, " + ", forcing VBV Size = Target Bitrate = %ld (bits)\n", pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_CBR.TargetBitRate); + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags |= + D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_VBV_SIZES; + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_CBR.VBVCapacity = + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_CBR.TargetBitRate; + } + + } break; + case PIPE_H2645_ENC_RATE_CONTROL_METHOD_DISABLE: + { + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Mode = D3D12_VIDEO_ENCODER_RATE_CONTROL_MODE_CQP; + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_CQP + .ConstantQP_FullIntracodedFrame = picture->quant_i_frames; + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_CQP + .ConstantQP_InterPredictedFrame_PrevRefOnly = picture->quant_p_frames; + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_CQP + .ConstantQP_InterPredictedFrame_BiDirectionalRef = picture->quant_b_frames; + } break; + default: + { + debug_printf("[d3d12_video_encoder_h264] d3d12_video_encoder_update_current_rate_control_h264 invalid RC " + "config, using default RC CQP mode\n"); + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Mode = D3D12_VIDEO_ENCODER_RATE_CONTROL_MODE_CQP; + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_CQP + .ConstantQP_FullIntracodedFrame = 30; + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_CQP + .ConstantQP_InterPredictedFrame_PrevRefOnly = 30; + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Config.m_Configuration_CQP + .ConstantQP_InterPredictedFrame_BiDirectionalRef = 30; + } break; + } + + if (memcmp(&previousConfig, + &pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc, + sizeof(pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc)) != 0) { + pD3D12Enc->m_currentEncodeConfig.m_ConfigDirtyFlags |= d3d12_video_encoder_config_dirty_flag_rate_control; + } +} + +void +d3d12_video_encoder_update_current_frame_pic_params_info_h264(struct d3d12_video_encoder *pD3D12Enc, + struct pipe_video_buffer *srcTexture, + struct pipe_picture_desc *picture, + D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA &picParams, + bool &bUsedAsReference) +{ + struct pipe_h264_enc_picture_desc *h264Pic = (struct pipe_h264_enc_picture_desc *) picture; + d3d12_video_bitstream_builder_h264 *pH264BitstreamBuilder = + dynamic_cast(pD3D12Enc->m_upBitstreamBuilder.get()); + assert(pH264BitstreamBuilder != nullptr); + + bUsedAsReference = !h264Pic->not_referenced; + + picParams.pH264PicData->pic_parameter_set_id = pH264BitstreamBuilder->get_active_pps_id(); + picParams.pH264PicData->idr_pic_id = h264Pic->idr_pic_id; + picParams.pH264PicData->FrameType = d3d12_video_encoder_convert_frame_type(h264Pic->picture_type); + picParams.pH264PicData->PictureOrderCountNumber = h264Pic->pic_order_cnt; + picParams.pH264PicData->FrameDecodingOrderNumber = h264Pic->frame_num; + + picParams.pH264PicData->List0ReferenceFramesCount = 0; + picParams.pH264PicData->pList0ReferenceFrames = nullptr; + picParams.pH264PicData->List1ReferenceFramesCount = 0; + picParams.pH264PicData->pList1ReferenceFrames = nullptr; + + if (picParams.pH264PicData->FrameType == D3D12_VIDEO_ENCODER_FRAME_TYPE_H264_P_FRAME) { + picParams.pH264PicData->List0ReferenceFramesCount = h264Pic->num_ref_idx_l0_active_minus1 + 1; + picParams.pH264PicData->pList0ReferenceFrames = h264Pic->ref_idx_l0_list; + } else if (picParams.pH264PicData->FrameType == D3D12_VIDEO_ENCODER_FRAME_TYPE_H264_B_FRAME) { + picParams.pH264PicData->List0ReferenceFramesCount = h264Pic->num_ref_idx_l0_active_minus1 + 1; + picParams.pH264PicData->pList0ReferenceFrames = h264Pic->ref_idx_l0_list; + picParams.pH264PicData->List1ReferenceFramesCount = h264Pic->num_ref_idx_l1_active_minus1 + 1; + picParams.pH264PicData->pList1ReferenceFrames = h264Pic->ref_idx_l1_list; + } +} + +D3D12_VIDEO_ENCODER_FRAME_TYPE_H264 +d3d12_video_encoder_convert_frame_type(enum pipe_h2645_enc_picture_type picType) +{ + switch (picType) { + case PIPE_H2645_ENC_PICTURE_TYPE_P: + { + return D3D12_VIDEO_ENCODER_FRAME_TYPE_H264_P_FRAME; + } break; + case PIPE_H2645_ENC_PICTURE_TYPE_B: + { + return D3D12_VIDEO_ENCODER_FRAME_TYPE_H264_B_FRAME; + } break; + case PIPE_H2645_ENC_PICTURE_TYPE_I: + { + return D3D12_VIDEO_ENCODER_FRAME_TYPE_H264_I_FRAME; + } break; + case PIPE_H2645_ENC_PICTURE_TYPE_IDR: + { + return D3D12_VIDEO_ENCODER_FRAME_TYPE_H264_IDR_FRAME; + } break; + default: + { + unreachable("Unsupported pipe_h2645_enc_picture_type"); + } break; + } +} + +/// +/// Tries to configurate the encoder using the requested slice configuration +/// or falls back to single slice encoding. +/// +bool +d3d12_video_encoder_negotiate_current_h264_slices_configuration(struct d3d12_video_encoder *pD3D12Enc, + pipe_h264_enc_picture_desc *picture) +{ + /// + /// Initialize single slice by default + /// + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE requestedSlicesMode = + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_FULL_FRAME; + D3D12_VIDEO_ENCODER_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_SLICES requestedSlicesConfig = {}; + requestedSlicesConfig.NumberOfSlicesPerFrame = 1; + + /// + /// Try to see if can accomodate for multi-slice request by user + /// + if (picture->num_slice_descriptors > 1) { + /* Last slice can be less for rounding frame size and leave some error for mb rounding */ + bool bUniformSizeSlices = true; + const double rounding_delta = 1.0; + for (uint32_t sliceIdx = 1; (sliceIdx < picture->num_slice_descriptors - 1) && bUniformSizeSlices; sliceIdx++) { + int64_t curSlice = picture->slices_descriptors[sliceIdx].num_macroblocks; + int64_t prevSlice = picture->slices_descriptors[sliceIdx - 1].num_macroblocks; + bUniformSizeSlices = bUniformSizeSlices && (std::abs(curSlice - prevSlice) <= rounding_delta); + } + + uint32_t mbPerScanline = + pD3D12Enc->m_currentEncodeConfig.m_currentResolution.Width / D3D12_VIDEO_H264_MB_IN_PIXELS; + bool bSliceAligned = ((picture->slices_descriptors[0].num_macroblocks % mbPerScanline) == 0); + + if (!bUniformSizeSlices && + d3d12_video_encoder_check_subregion_mode_support( + pD3D12Enc, + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME)) { + + if (D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG) { // Check if fallback mode is enabled, or we should just fail + // without support + // Not supported to have custom slice sizes in D3D12 Video Encode fallback to uniform multi-slice + debug_printf( + "[d3d12_video_encoder_h264] WARNING: Requested slice control mode is not supported: All slices must " + "have the same number of macroblocks. Falling back to encoding uniform %d slices per frame.\n", + picture->num_slice_descriptors); + requestedSlicesMode = + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME; + requestedSlicesConfig.NumberOfSlicesPerFrame = picture->num_slice_descriptors; + debug_printf("[d3d12_video_encoder_h264] Using multi slice encoding mode: " + "D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME " + "with %d slices per frame.\n", + requestedSlicesConfig.NumberOfSlicesPerFrame); + } else { + debug_printf("[d3d12_video_encoder_h264] Requested slice control mode is not supported: All slices must " + "have the same number of macroblocks. To continue with uniform slices as a fallback, must " + "enable the OS environment variable D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG"); + return false; + } + } else if (bUniformSizeSlices && bSliceAligned && + d3d12_video_encoder_check_subregion_mode_support( + pD3D12Enc, + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_ROWS_PER_SUBREGION)) { + + // Number of macroblocks per slice is aligned to a scanline width, in which case we can + // use D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_ROWS_PER_SUBREGION + requestedSlicesMode = D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_ROWS_PER_SUBREGION; + requestedSlicesConfig.NumberOfRowsPerSlice = (picture->slices_descriptors[0].num_macroblocks / mbPerScanline); + debug_printf("[d3d12_video_encoder_h264] Using multi slice encoding mode: " + "D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_ROWS_PER_SUBREGION with " + "%d macroblocks rows per slice.\n", + requestedSlicesConfig.NumberOfRowsPerSlice); + } else if (bUniformSizeSlices && + d3d12_video_encoder_check_subregion_mode_support( + pD3D12Enc, + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME)) { + requestedSlicesMode = + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME; + requestedSlicesConfig.NumberOfSlicesPerFrame = picture->num_slice_descriptors; + debug_printf("[d3d12_video_encoder_h264] Using multi slice encoding mode: " + "D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME " + "with %d slices per frame.\n", + requestedSlicesConfig.NumberOfSlicesPerFrame); + } else if (D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG) { // Check if fallback mode is enabled, or we should just fail + // without support + // Fallback to single slice encoding (assigned by default when initializing variables requestedSlicesMode, + // requestedSlicesConfig) + debug_printf( + "[d3d12_video_encoder_h264] WARNING: Slice mode for %d slices with bUniformSizeSlices: %d - bSliceAligned " + "%d not supported by the D3D12 driver, falling back to encoding a single slice per frame.\n", + picture->num_slice_descriptors, + bUniformSizeSlices, + bSliceAligned); + } else { + debug_printf("[d3d12_video_encoder_h264] Requested slice control mode is not supported: All slices must " + "have the same number of macroblocks. To continue with uniform slices as a fallback, must " + "enable the OS environment variable D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG"); + return false; + } + } + + if (!d3d12_video_encoder_compare_slice_config_h264_hevc( + pD3D12Enc->m_currentEncodeConfig.m_encoderSliceConfigMode, + pD3D12Enc->m_currentEncodeConfig.m_encoderSliceConfigDesc.m_SlicesPartition_H264, + requestedSlicesMode, + requestedSlicesConfig)) { + pD3D12Enc->m_currentEncodeConfig.m_ConfigDirtyFlags |= d3d12_video_encoder_config_dirty_flag_slices; + } + + pD3D12Enc->m_currentEncodeConfig.m_encoderSliceConfigDesc.m_SlicesPartition_H264 = requestedSlicesConfig; + pD3D12Enc->m_currentEncodeConfig.m_encoderSliceConfigMode = requestedSlicesMode; + + return true; +} + +D3D12_VIDEO_ENCODER_MOTION_ESTIMATION_PRECISION_MODE +d3d12_video_encoder_convert_h264_motion_configuration(struct d3d12_video_encoder *pD3D12Enc, + pipe_h264_enc_picture_desc *picture) +{ + return D3D12_VIDEO_ENCODER_MOTION_ESTIMATION_PRECISION_MODE_MAXIMUM; +} + +D3D12_VIDEO_ENCODER_LEVELS_H264 +d3d12_video_encoder_convert_level_h264(uint32_t h264SpecLevel) +{ + switch (h264SpecLevel) { + case 10: + { + return D3D12_VIDEO_ENCODER_LEVELS_H264_1; + } break; + case 11: + { + return D3D12_VIDEO_ENCODER_LEVELS_H264_11; + } break; + case 12: + { + return D3D12_VIDEO_ENCODER_LEVELS_H264_12; + } break; + case 13: + { + return D3D12_VIDEO_ENCODER_LEVELS_H264_13; + } break; + case 20: + { + return D3D12_VIDEO_ENCODER_LEVELS_H264_2; + } break; + case 21: + { + return D3D12_VIDEO_ENCODER_LEVELS_H264_21; + } break; + case 22: + { + return D3D12_VIDEO_ENCODER_LEVELS_H264_22; + } break; + case 30: + { + return D3D12_VIDEO_ENCODER_LEVELS_H264_3; + } break; + case 31: + { + return D3D12_VIDEO_ENCODER_LEVELS_H264_31; + } break; + case 32: + { + return D3D12_VIDEO_ENCODER_LEVELS_H264_32; + } break; + case 40: + { + return D3D12_VIDEO_ENCODER_LEVELS_H264_4; + } break; + case 41: + { + return D3D12_VIDEO_ENCODER_LEVELS_H264_41; + } break; + case 42: + { + return D3D12_VIDEO_ENCODER_LEVELS_H264_42; + } break; + case 50: + { + return D3D12_VIDEO_ENCODER_LEVELS_H264_5; + } break; + case 51: + { + return D3D12_VIDEO_ENCODER_LEVELS_H264_51; + } break; + case 52: + { + return D3D12_VIDEO_ENCODER_LEVELS_H264_52; + } break; + case 60: + { + return D3D12_VIDEO_ENCODER_LEVELS_H264_6; + } break; + case 61: + { + return D3D12_VIDEO_ENCODER_LEVELS_H264_61; + } break; + case 62: + { + return D3D12_VIDEO_ENCODER_LEVELS_H264_62; + } break; + default: + { + unreachable("Unsupported H264 level"); + } break; + } +} + +void +d3d12_video_encoder_convert_from_d3d12_level_h264(D3D12_VIDEO_ENCODER_LEVELS_H264 level12, + uint32_t &specLevel, + uint32_t &constraint_set3_flag) +{ + specLevel = 0; + constraint_set3_flag = 0; + + switch (level12) { + case D3D12_VIDEO_ENCODER_LEVELS_H264_1: + { + specLevel = 10; + } break; + case D3D12_VIDEO_ENCODER_LEVELS_H264_1b: + { + specLevel = 11; + constraint_set3_flag = 1; + } break; + case D3D12_VIDEO_ENCODER_LEVELS_H264_11: + { + specLevel = 11; + } break; + case D3D12_VIDEO_ENCODER_LEVELS_H264_12: + { + specLevel = 12; + } break; + case D3D12_VIDEO_ENCODER_LEVELS_H264_13: + { + specLevel = 13; + } break; + case D3D12_VIDEO_ENCODER_LEVELS_H264_2: + { + specLevel = 20; + } break; + case D3D12_VIDEO_ENCODER_LEVELS_H264_21: + { + specLevel = 21; + } break; + case D3D12_VIDEO_ENCODER_LEVELS_H264_22: + { + specLevel = 22; + } break; + case D3D12_VIDEO_ENCODER_LEVELS_H264_3: + { + specLevel = 30; + } break; + case D3D12_VIDEO_ENCODER_LEVELS_H264_31: + { + specLevel = 31; + } break; + case D3D12_VIDEO_ENCODER_LEVELS_H264_32: + { + specLevel = 32; + } break; + case D3D12_VIDEO_ENCODER_LEVELS_H264_4: + { + specLevel = 40; + } break; + case D3D12_VIDEO_ENCODER_LEVELS_H264_41: + { + specLevel = 41; + } break; + case D3D12_VIDEO_ENCODER_LEVELS_H264_42: + { + specLevel = 42; + } break; + case D3D12_VIDEO_ENCODER_LEVELS_H264_5: + { + specLevel = 50; + } break; + case D3D12_VIDEO_ENCODER_LEVELS_H264_51: + { + specLevel = 51; + } break; + case D3D12_VIDEO_ENCODER_LEVELS_H264_52: + { + specLevel = 52; + } break; + case D3D12_VIDEO_ENCODER_LEVELS_H264_6: + { + specLevel = 60; + } break; + case D3D12_VIDEO_ENCODER_LEVELS_H264_61: + { + specLevel = 61; + } break; + case D3D12_VIDEO_ENCODER_LEVELS_H264_62: + { + specLevel = 62; + } break; + default: + { + unreachable("Unsupported D3D12_VIDEO_ENCODER_LEVELS_H264 value"); + } break; + } +} + +bool +d3d12_video_encoder_update_h264_gop_configuration(struct d3d12_video_encoder *pD3D12Enc, + pipe_h264_enc_picture_desc *picture) +{ + // Only update GOP when it begins + if (picture->gop_cnt == 1) { + uint32_t GOPCoeff = picture->i_remain; + uint32_t GOPLength = picture->gop_size / GOPCoeff; + uint32_t PPicturePeriod = std::ceil(GOPLength / (double) (picture->p_remain / GOPCoeff)) - 1; + + if (picture->pic_order_cnt_type == 1u) { + debug_printf("[d3d12_video_encoder_h264] Upper layer is requesting pic_order_cnt_type %d but D3D12 Video " + "only supports pic_order_cnt_type = 0 or pic_order_cnt_type = 2\n", + picture->pic_order_cnt_type); + return false; + } + + const uint32_t max_pic_order_cnt_lsb = 2 * GOPLength; + const uint32_t max_max_frame_num = GOPLength; + double log2_max_frame_num_minus4 = std::max(0.0, std::ceil(std::log2(max_max_frame_num)) - 4); + double log2_max_pic_order_cnt_lsb_minus4 = std::max(0.0, std::ceil(std::log2(max_pic_order_cnt_lsb)) - 4); + assert(log2_max_frame_num_minus4 < UCHAR_MAX); + assert(log2_max_pic_order_cnt_lsb_minus4 < UCHAR_MAX); + assert(picture->pic_order_cnt_type < UCHAR_MAX); + + // Set dirty flag if m_H264GroupOfPictures changed + auto previousGOPConfig = pD3D12Enc->m_currentEncodeConfig.m_encoderGOPConfigDesc.m_H264GroupOfPictures; + pD3D12Enc->m_currentEncodeConfig.m_encoderGOPConfigDesc.m_H264GroupOfPictures = { + GOPLength, + PPicturePeriod, + static_cast(picture->pic_order_cnt_type), + static_cast(log2_max_frame_num_minus4), + static_cast(log2_max_pic_order_cnt_lsb_minus4) + }; + + if (memcmp(&previousGOPConfig, + &pD3D12Enc->m_currentEncodeConfig.m_encoderGOPConfigDesc.m_H264GroupOfPictures, + sizeof(D3D12_VIDEO_ENCODER_SEQUENCE_GOP_STRUCTURE_H264)) != 0) { + pD3D12Enc->m_currentEncodeConfig.m_ConfigDirtyFlags |= d3d12_video_encoder_config_dirty_flag_gop; + } + } + return true; +} + +D3D12_VIDEO_ENCODER_CODEC_CONFIGURATION_H264 +d3d12_video_encoder_convert_h264_codec_configuration(struct d3d12_video_encoder *pD3D12Enc, + pipe_h264_enc_picture_desc *picture) +{ + D3D12_VIDEO_ENCODER_CODEC_CONFIGURATION_H264 config = { + D3D12_VIDEO_ENCODER_CODEC_CONFIGURATION_H264_FLAG_NONE, + D3D12_VIDEO_ENCODER_CODEC_CONFIGURATION_H264_DIRECT_MODES_DISABLED, + D3D12_VIDEO_ENCODER_CODEC_CONFIGURATION_H264_SLICES_DEBLOCKING_MODE_0_ALL_LUMA_CHROMA_SLICE_BLOCK_EDGES_ALWAYS_FILTERED, + }; + + if (picture->pic_ctrl.enc_cabac_enable) { + config.ConfigurationFlags |= D3D12_VIDEO_ENCODER_CODEC_CONFIGURATION_H264_FLAG_ENABLE_CABAC_ENCODING; + } + + return config; +} + +bool +d3d12_video_encoder_update_current_encoder_config_state_h264(struct d3d12_video_encoder *pD3D12Enc, + struct pipe_video_buffer *srcTexture, + struct pipe_picture_desc *picture) +{ + struct pipe_h264_enc_picture_desc *h264Pic = (struct pipe_h264_enc_picture_desc *) picture; + + // Reset reconfig dirty flags + pD3D12Enc->m_currentEncodeConfig.m_ConfigDirtyFlags = d3d12_video_encoder_config_dirty_flag_none; + // Reset sequence changes flags + pD3D12Enc->m_currentEncodeConfig.m_seqFlags = D3D12_VIDEO_ENCODER_SEQUENCE_CONTROL_FLAG_NONE; + + // Set codec + if (pD3D12Enc->m_currentEncodeConfig.m_encoderCodecDesc != D3D12_VIDEO_ENCODER_CODEC_H264) { + pD3D12Enc->m_currentEncodeConfig.m_ConfigDirtyFlags |= d3d12_video_encoder_config_dirty_flag_codec; + } + pD3D12Enc->m_currentEncodeConfig.m_encoderCodecDesc = D3D12_VIDEO_ENCODER_CODEC_H264; + + // Set input format + DXGI_FORMAT targetFmt = d3d12_convert_pipe_video_profile_to_dxgi_format(pD3D12Enc->base.profile); + if (pD3D12Enc->m_currentEncodeConfig.m_encodeFormatInfo.Format != targetFmt) { + pD3D12Enc->m_currentEncodeConfig.m_ConfigDirtyFlags |= d3d12_video_encoder_config_dirty_flag_input_format; + } + + pD3D12Enc->m_currentEncodeConfig.m_encodeFormatInfo = {}; + pD3D12Enc->m_currentEncodeConfig.m_encodeFormatInfo.Format = targetFmt; + HRESULT hr = pD3D12Enc->m_pD3D12Screen->dev->CheckFeatureSupport(D3D12_FEATURE_FORMAT_INFO, + &pD3D12Enc->m_currentEncodeConfig.m_encodeFormatInfo, + sizeof(pD3D12Enc->m_currentEncodeConfig.m_encodeFormatInfo)); + if (FAILED(hr)) { + debug_printf("CheckFeatureSupport failed with HR %x\n", hr); + return false; + } + + // Set resolution + if ((pD3D12Enc->m_currentEncodeConfig.m_currentResolution.Width != srcTexture->width) || + (pD3D12Enc->m_currentEncodeConfig.m_currentResolution.Height != srcTexture->height)) { + pD3D12Enc->m_currentEncodeConfig.m_ConfigDirtyFlags |= d3d12_video_encoder_config_dirty_flag_resolution; + } + pD3D12Enc->m_currentEncodeConfig.m_currentResolution.Width = srcTexture->width; + pD3D12Enc->m_currentEncodeConfig.m_currentResolution.Height = srcTexture->height; + + // Set resolution codec dimensions (ie. cropping) + if (h264Pic->pic_ctrl.enc_frame_cropping_flag) { + pD3D12Enc->m_currentEncodeConfig.m_FrameCroppingCodecConfig.left = h264Pic->pic_ctrl.enc_frame_crop_left_offset; + pD3D12Enc->m_currentEncodeConfig.m_FrameCroppingCodecConfig.right = h264Pic->pic_ctrl.enc_frame_crop_right_offset; + pD3D12Enc->m_currentEncodeConfig.m_FrameCroppingCodecConfig.top = h264Pic->pic_ctrl.enc_frame_crop_top_offset; + pD3D12Enc->m_currentEncodeConfig.m_FrameCroppingCodecConfig.bottom = + h264Pic->pic_ctrl.enc_frame_crop_bottom_offset; + } else { + memset(&pD3D12Enc->m_currentEncodeConfig.m_FrameCroppingCodecConfig, + 0, + sizeof(pD3D12Enc->m_currentEncodeConfig.m_FrameCroppingCodecConfig)); + } + + // Set profile + auto targetProfile = d3d12_video_encoder_convert_profile_to_d3d12_enc_profile_h264(pD3D12Enc->base.profile); + if (pD3D12Enc->m_currentEncodeConfig.m_encoderProfileDesc.m_H264Profile != targetProfile) { + pD3D12Enc->m_currentEncodeConfig.m_ConfigDirtyFlags |= d3d12_video_encoder_config_dirty_flag_profile; + } + pD3D12Enc->m_currentEncodeConfig.m_encoderProfileDesc.m_H264Profile = targetProfile; + + // Set level + auto targetLevel = d3d12_video_encoder_convert_level_h264(pD3D12Enc->base.level); + if (pD3D12Enc->m_currentEncodeConfig.m_encoderLevelDesc.m_H264LevelSetting != targetLevel) { + pD3D12Enc->m_currentEncodeConfig.m_ConfigDirtyFlags |= d3d12_video_encoder_config_dirty_flag_level; + } + pD3D12Enc->m_currentEncodeConfig.m_encoderLevelDesc.m_H264LevelSetting = targetLevel; + + // Set codec config + auto targetCodecConfig = d3d12_video_encoder_convert_h264_codec_configuration(pD3D12Enc, h264Pic); + if (memcmp(&pD3D12Enc->m_currentEncodeConfig.m_encoderCodecSpecificConfigDesc.m_H264Config, + &targetCodecConfig, + sizeof(D3D12_VIDEO_ENCODER_CODEC_CONFIGURATION_H264)) != 0) { + pD3D12Enc->m_currentEncodeConfig.m_ConfigDirtyFlags |= d3d12_video_encoder_config_dirty_flag_codec_config; + } + pD3D12Enc->m_currentEncodeConfig.m_encoderCodecSpecificConfigDesc.m_H264Config = targetCodecConfig; + + // Set rate control + d3d12_video_encoder_update_current_rate_control_h264(pD3D12Enc, h264Pic); + + // Set slices config + if(!d3d12_video_encoder_negotiate_current_h264_slices_configuration(pD3D12Enc, h264Pic)) { + debug_printf("d3d12_video_encoder_negotiate_current_h264_slices_configuration failed!\n"); + return false; + } + + // Set GOP config + if(!d3d12_video_encoder_update_h264_gop_configuration(pD3D12Enc, h264Pic)) { + debug_printf("d3d12_video_encoder_update_h264_gop_configuration failed!\n"); + return false; + } + + // m_currentEncodeConfig.m_encoderPicParamsDesc pic params are set in d3d12_video_encoder_reconfigure_encoder_objects + // after re-allocating objects if needed + + // Set motion estimation config + auto targetMotionLimit = d3d12_video_encoder_convert_h264_motion_configuration(pD3D12Enc, h264Pic); + if (pD3D12Enc->m_currentEncodeConfig.m_encoderMotionPrecisionLimit != targetMotionLimit) { + pD3D12Enc->m_currentEncodeConfig.m_ConfigDirtyFlags |= + d3d12_video_encoder_config_dirty_flag_motion_precision_limit; + } + pD3D12Enc->m_currentEncodeConfig.m_encoderMotionPrecisionLimit = targetMotionLimit; + + /// + /// Check for video encode support detailed capabilities + /// + + // Will call for d3d12 driver support based on the initial requested features, then + // try to fallback if any of them is not supported and return the negotiated d3d12 settings + D3D12_FEATURE_DATA_VIDEO_ENCODER_SUPPORT capEncoderSupportData = {}; + if (!d3d12_video_encoder_negotiate_requested_features_and_d3d12_driver_caps(pD3D12Enc, capEncoderSupportData)) { + debug_printf("[d3d12_video_encoder_h264] After negotiating caps, D3D12_FEATURE_VIDEO_ENCODER_SUPPORT " + "arguments are not supported - " + "ValidationFlags: 0x%x - SupportFlags: 0x%x\n", + capEncoderSupportData.ValidationFlags, + capEncoderSupportData.SupportFlags); + return false; + } + + /// + // Calculate current settings based on the returned values from the caps query + // + pD3D12Enc->m_currentEncodeCapabilities.m_MaxSlicesInOutput = + d3d12_video_encoder_calculate_max_slices_count_in_output( + pD3D12Enc->m_currentEncodeConfig.m_encoderSliceConfigMode, + &pD3D12Enc->m_currentEncodeConfig.m_encoderSliceConfigDesc.m_SlicesPartition_H264, + pD3D12Enc->m_currentEncodeCapabilities.m_currentResolutionSupportCaps.MaxSubregionsNumber, + pD3D12Enc->m_currentEncodeConfig.m_currentResolution, + pD3D12Enc->m_currentEncodeCapabilities.m_currentResolutionSupportCaps.SubregionBlockPixelsSize); + + // + // Validate caps support returned values against current settings + // + if (pD3D12Enc->m_currentEncodeConfig.m_encoderProfileDesc.m_H264Profile != + pD3D12Enc->m_currentEncodeCapabilities.m_encoderSuggestedProfileDesc.m_H264Profile) { + debug_printf("[d3d12_video_encoder_h264] Warning: Requested D3D12_VIDEO_ENCODER_PROFILE_H264 by upper layer: %d " + "mismatches UMD suggested D3D12_VIDEO_ENCODER_PROFILE_H264: %d\n", + pD3D12Enc->m_currentEncodeConfig.m_encoderProfileDesc.m_H264Profile, + pD3D12Enc->m_currentEncodeCapabilities.m_encoderSuggestedProfileDesc.m_H264Profile); + } + + if (pD3D12Enc->m_currentEncodeConfig.m_encoderLevelDesc.m_H264LevelSetting != + pD3D12Enc->m_currentEncodeCapabilities.m_encoderLevelSuggestedDesc.m_H264LevelSetting) { + debug_printf("[d3d12_video_encoder_h264] Warning: Requested D3D12_VIDEO_ENCODER_LEVELS_H264 by upper layer: %d " + "mismatches UMD suggested D3D12_VIDEO_ENCODER_LEVELS_H264: %d\n", + pD3D12Enc->m_currentEncodeConfig.m_encoderLevelDesc.m_H264LevelSetting, + pD3D12Enc->m_currentEncodeCapabilities.m_encoderLevelSuggestedDesc.m_H264LevelSetting); + } + + if (pD3D12Enc->m_currentEncodeCapabilities.m_MaxSlicesInOutput > + pD3D12Enc->m_currentEncodeCapabilities.m_currentResolutionSupportCaps.MaxSubregionsNumber) { + debug_printf("[d3d12_video_encoder_h264] Desired number of subregions is not supported (higher than max " + "reported slice number in query caps)\n."); + return false; + } + return true; +} + +D3D12_VIDEO_ENCODER_PROFILE_H264 +d3d12_video_encoder_convert_profile_to_d3d12_enc_profile_h264(enum pipe_video_profile profile) +{ + switch (profile) { + case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: + { + return D3D12_VIDEO_ENCODER_PROFILE_H264_MAIN; + + } break; + case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: + { + return D3D12_VIDEO_ENCODER_PROFILE_H264_HIGH; + } break; + case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10: + { + return D3D12_VIDEO_ENCODER_PROFILE_H264_HIGH_10; + } break; + default: + { + unreachable("Unsupported pipe_video_profile"); + } break; + } +} + +D3D12_VIDEO_ENCODER_CODEC +d3d12_video_encoder_convert_codec_to_d3d12_enc_codec(enum pipe_video_profile profile) +{ + switch (u_reduce_video_profile(profile)) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + { + return D3D12_VIDEO_ENCODER_CODEC_H264; + } break; + case PIPE_VIDEO_FORMAT_HEVC: + { + return D3D12_VIDEO_ENCODER_CODEC_HEVC; + } break; + case PIPE_VIDEO_FORMAT_MPEG12: + case PIPE_VIDEO_FORMAT_MPEG4: + case PIPE_VIDEO_FORMAT_VC1: + case PIPE_VIDEO_FORMAT_JPEG: + case PIPE_VIDEO_FORMAT_VP9: + case PIPE_VIDEO_FORMAT_UNKNOWN: + default: + { + unreachable("Unsupported pipe_video_profile"); + } break; + } +} + +bool +d3d12_video_encoder_compare_slice_config_h264_hevc( + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE targetMode, + D3D12_VIDEO_ENCODER_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_SLICES targetConfig, + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE otherMode, + D3D12_VIDEO_ENCODER_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_SLICES otherConfig) +{ + return (targetMode == otherMode) && + (memcmp(&targetConfig, + &otherConfig, + sizeof(D3D12_VIDEO_ENCODER_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_SLICES)) == 0); +} + +uint32_t +d3d12_video_encoder_build_codec_headers_h264(struct d3d12_video_encoder *pD3D12Enc) +{ + D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA currentPicParams = + d3d12_video_encoder_get_current_picture_param_settings(pD3D12Enc); + + auto profDesc = d3d12_video_encoder_get_current_profile_desc(pD3D12Enc); + auto levelDesc = d3d12_video_encoder_get_current_level_desc(pD3D12Enc); + auto codecConfigDesc = d3d12_video_encoder_get_current_codec_config_desc(pD3D12Enc); + auto MaxDPBCapacity = d3d12_video_encoder_get_current_max_dpb_capacity(pD3D12Enc); + + size_t writtenSPSBytesCount = 0; + bool isFirstFrame = (pD3D12Enc->m_fenceValue == 1); + bool writeNewSPS = isFirstFrame // on first frame + || ((pD3D12Enc->m_currentEncodeConfig.m_seqFlags & // also on resolution change + D3D12_VIDEO_ENCODER_SEQUENCE_CONTROL_FLAG_RESOLUTION_CHANGE) != 0); + + d3d12_video_bitstream_builder_h264 *pH264BitstreamBuilder = + dynamic_cast(pD3D12Enc->m_upBitstreamBuilder.get()); + assert(pH264BitstreamBuilder); + + uint32_t active_seq_parameter_set_id = pH264BitstreamBuilder->get_active_sps_id(); + + if (writeNewSPS) { + // For every new SPS for reconfiguration, increase the active_sps_id + if (!isFirstFrame) { + active_seq_parameter_set_id++; + pH264BitstreamBuilder->set_active_sps_id(active_seq_parameter_set_id); + } + pH264BitstreamBuilder->build_sps(*profDesc.pH264Profile, + *levelDesc.pH264LevelSetting, + pD3D12Enc->m_currentEncodeConfig.m_encodeFormatInfo.Format, + *codecConfigDesc.pH264Config, + pD3D12Enc->m_currentEncodeConfig.m_encoderGOPConfigDesc.m_H264GroupOfPictures, + active_seq_parameter_set_id, + MaxDPBCapacity, // max_num_ref_frames + pD3D12Enc->m_currentEncodeConfig.m_currentResolution, + pD3D12Enc->m_currentEncodeConfig.m_FrameCroppingCodecConfig, + pD3D12Enc->m_BitstreamHeadersBuffer, + pD3D12Enc->m_BitstreamHeadersBuffer.begin(), + writtenSPSBytesCount); + } + + size_t writtenPPSBytesCount = 0; + pH264BitstreamBuilder->build_pps(*profDesc.pH264Profile, + *codecConfigDesc.pH264Config, + *currentPicParams.pH264PicData, + currentPicParams.pH264PicData->pic_parameter_set_id, + active_seq_parameter_set_id, + pD3D12Enc->m_BitstreamHeadersBuffer, + pD3D12Enc->m_BitstreamHeadersBuffer.begin() + writtenSPSBytesCount, + writtenPPSBytesCount); + + // Shrink buffer to fit the headers + if (pD3D12Enc->m_BitstreamHeadersBuffer.size() > (writtenPPSBytesCount + writtenSPSBytesCount)) { + pD3D12Enc->m_BitstreamHeadersBuffer.resize(writtenPPSBytesCount + writtenSPSBytesCount); + } + + return pD3D12Enc->m_BitstreamHeadersBuffer.size(); +} diff --git a/src/gallium/drivers/d3d12/d3d12_video_enc_h264.h b/src/gallium/drivers/d3d12/d3d12_video_enc_h264.h new file mode 100644 index 00000000000..68b4d89ee2c --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_enc_h264.h @@ -0,0 +1,67 @@ + +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_VIDEO_ENC_H264_H +#define D3D12_VIDEO_ENC_H264_H +#include "d3d12_video_types.h" + +bool +d3d12_video_encoder_update_current_encoder_config_state_h264(struct d3d12_video_encoder *pD3D12Enc, + struct pipe_video_buffer * srcTexture, + struct pipe_picture_desc * picture); +void +d3d12_video_encoder_update_current_rate_control_h264(struct d3d12_video_encoder *pD3D12Enc, + pipe_h264_enc_picture_desc *picture); +bool +d3d12_video_encoder_negotiate_current_h264_slices_configuration(struct d3d12_video_encoder *pD3D12Enc, + pipe_h264_enc_picture_desc *picture); +bool +d3d12_video_encoder_update_h264_gop_configuration(struct d3d12_video_encoder *pD3D12Enc, + pipe_h264_enc_picture_desc *picture); +D3D12_VIDEO_ENCODER_MOTION_ESTIMATION_PRECISION_MODE +d3d12_video_encoder_convert_h264_motion_configuration(struct d3d12_video_encoder *pD3D12Enc, + pipe_h264_enc_picture_desc *picture); +D3D12_VIDEO_ENCODER_LEVELS_H264 +d3d12_video_encoder_convert_level_h264(uint32_t h264SpecLevel); +D3D12_VIDEO_ENCODER_CODEC_CONFIGURATION_H264 +d3d12_video_encoder_convert_h264_codec_configuration(struct d3d12_video_encoder *pD3D12Enc, + pipe_h264_enc_picture_desc *picture); +void +d3d12_video_encoder_update_current_frame_pic_params_info_h264(struct d3d12_video_encoder *pD3D12Enc, + struct pipe_video_buffer * srcTexture, + struct pipe_picture_desc * picture, + D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA &picParams, + bool &bUsedAsReference); +D3D12_VIDEO_ENCODER_FRAME_TYPE_H264 +d3d12_video_encoder_convert_frame_type(enum pipe_h2645_enc_picture_type picType); +uint32_t +d3d12_video_encoder_build_codec_headers_h264(struct d3d12_video_encoder *pD3D12Enc); +bool +d3d12_video_encoder_compare_slice_config_h264_hevc( + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE targetMode, + D3D12_VIDEO_ENCODER_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_SLICES targetConfig, + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE otherMode, + D3D12_VIDEO_ENCODER_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_SLICES otherConfig); + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream.cpp b/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream.cpp new file mode 100644 index 00000000000..2625f36ea9b --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream.cpp @@ -0,0 +1,276 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include "d3d12_video_encoder_bitstream.h" + +d3d12_video_encoder_bitstream::d3d12_video_encoder_bitstream() +{ + m_pBitsBuffer = nullptr; + m_uiBitsBufferSize = 0; + m_iBitsToGo = 32; + m_uintEncBuffer = 0; + m_bExternalBuffer = false; + m_bBufferOverflow = false; + m_bPreventStartCode = false; + m_bAllowReallocate = false; +} + +d3d12_video_encoder_bitstream::~d3d12_video_encoder_bitstream() +{ + if (!m_bExternalBuffer) { + if (m_pBitsBuffer) { + delete[](m_pBitsBuffer); + (m_pBitsBuffer) = NULL; + } + } +} + +int32_t +d3d12_video_encoder_bitstream::get_exp_golomb0_code_len(uint32_t uiVal) +{ + int32_t iLen = 0; + uiVal++; + + if (uiVal >= 0x10000) { + uiVal >>= 16; + iLen += 16; + } + if (uiVal >= 0x100) { + uiVal >>= 8; + iLen += 8; + } + + assert(uiVal < 256); + + return iLen + m_iLog_2_N[uiVal]; +} + +void +d3d12_video_encoder_bitstream::exp_Golomb_ue(uint32_t uiVal) +{ + if (uiVal != UINT32_MAX) { + int32_t iLen = get_exp_golomb0_code_len(uiVal); + put_bits((iLen << 1) + 1, uiVal + 1); + } else { + put_bits(32, 0); + put_bits(1, 1); + put_bits(32, 1); + } +} + +void +d3d12_video_encoder_bitstream::exp_Golomb_se(int32_t iVal) +{ + if (iVal > 0) { + exp_Golomb_ue((iVal << 1) - 1); + } else { + exp_Golomb_ue(((-iVal) << 1) - (iVal == INT_MIN)); + } +} + +void +d3d12_video_encoder_bitstream::setup_bitstream(uint32_t uiInitBufferSize, uint8_t *pBuffer) +{ + m_pBitsBuffer = pBuffer; + m_uiBitsBufferSize = uiInitBufferSize; + m_uiOffset = 0; + memset(m_pBitsBuffer, 0, m_uiBitsBufferSize); + m_bExternalBuffer = true; + m_bAllowReallocate = false; +} + +bool +d3d12_video_encoder_bitstream::create_bitstream(uint32_t uiInitBufferSize) +{ + assert((uiInitBufferSize) >= 4 && !(uiInitBufferSize & 3)); + + m_pBitsBuffer = (uint8_t *) new uint8_t[uiInitBufferSize]; + + if (nullptr == m_pBitsBuffer) { + return false; + } + + m_uiBitsBufferSize = uiInitBufferSize; + m_uiOffset = 0; + memset(m_pBitsBuffer, 0, m_uiBitsBufferSize); + m_bExternalBuffer = false; + + return true; +} + +bool +d3d12_video_encoder_bitstream::reallocate_buffer() +{ + uint32_t uiBufferSize = m_uiBitsBufferSize * 3 / 2; + uint8_t *pNewBuffer = (uint8_t *) new uint8_t[uiBufferSize]; + + if (nullptr == pNewBuffer) { + return false; + } + + memcpy(pNewBuffer, m_pBitsBuffer, m_uiOffset * sizeof(uint8_t)); + if (m_pBitsBuffer) { + delete[](m_pBitsBuffer); + (m_pBitsBuffer) = NULL; + } + m_pBitsBuffer = pNewBuffer; + m_uiBitsBufferSize = uiBufferSize; + return true; +} + +bool +d3d12_video_encoder_bitstream::verify_buffer(uint32_t uiBytesToWrite) +{ + if (!m_bBufferOverflow) { + if (m_uiOffset + uiBytesToWrite > m_uiBitsBufferSize) { + if (!m_bAllowReallocate || !reallocate_buffer()) { + m_bBufferOverflow = true; + return false; + } + } + + return true; + } + + return false; +} + +void +d3d12_video_encoder_bitstream::inc_current_offset(int32_t dwOffset) +{ + assert(32 == m_iBitsToGo && m_uiOffset < m_uiBitsBufferSize); + m_uiOffset += dwOffset; +} + +void +d3d12_video_encoder_bitstream::get_current_buffer_position_and_size(uint8_t **ppCurrBufPos, int32_t *pdwLeftBufSize) +{ + assert(32 == m_iBitsToGo && m_uiOffset < m_uiBitsBufferSize); + *ppCurrBufPos = m_pBitsBuffer + m_uiOffset; + *pdwLeftBufSize = m_uiBitsBufferSize - m_uiOffset; +} + +void +d3d12_video_encoder_bitstream::attach(uint8_t *pBitsBuffer, uint32_t uiBufferSize) +{ + m_pBitsBuffer = pBitsBuffer; + m_uiBitsBufferSize = uiBufferSize; + m_bExternalBuffer = true; + m_bBufferOverflow = false; + m_bAllowReallocate = false; + + clear(); +} + +void +d3d12_video_encoder_bitstream::write_byte_start_code_prevention(uint8_t u8Val) +{ + int32_t iOffset = m_uiOffset; + uint8_t *pBuffer = m_pBitsBuffer + iOffset; + + if (m_bPreventStartCode && iOffset > 1) { + if (((u8Val & 0xfc) | pBuffer[-2] | pBuffer[-1]) == 0) { + *pBuffer++ = 3; + iOffset++; + } + } + + *pBuffer = u8Val; + iOffset++; + + m_uiOffset = iOffset; +} + +#define WRITE_BYTE(byte) write_byte_start_code_prevention(byte) + +void +d3d12_video_encoder_bitstream::put_bits(int32_t uiBitsCount, uint32_t iBitsVal) +{ + assert(uiBitsCount <= 32); + + if (uiBitsCount < m_iBitsToGo) { + m_uintEncBuffer |= (iBitsVal << (m_iBitsToGo - uiBitsCount)); + m_iBitsToGo -= uiBitsCount; + } else if (verify_buffer(4)) { + int32_t iLeftOverBits = uiBitsCount - m_iBitsToGo; + m_uintEncBuffer |= (iBitsVal >> iLeftOverBits); + + uint8_t *temp = (uint8_t *) (&m_uintEncBuffer); + WRITE_BYTE(*(temp + 3)); + WRITE_BYTE(*(temp + 2)); + WRITE_BYTE(*(temp + 1)); + WRITE_BYTE(*temp); + + m_uintEncBuffer = 0; + m_iBitsToGo = 32 - iLeftOverBits; + + if (iLeftOverBits > 0) { + m_uintEncBuffer = (iBitsVal << (32 - iLeftOverBits)); + } + } +} + +void +d3d12_video_encoder_bitstream::flush() +{ + bool isAligned = is_byte_aligned(); // causes side-effects in object state, don't put inside assert() + assert(isAligned); + + uint32_t temp = (uint32_t)(32 - m_iBitsToGo); + + if (!verify_buffer(temp >> 3)) { + return; + } + + while (temp > 0) { + WRITE_BYTE((uint8_t)(m_uintEncBuffer >> 24)); + m_uintEncBuffer <<= 8; + temp -= 8; + } + + m_iBitsToGo = 32; + m_uintEncBuffer = 0; +} + +void +d3d12_video_encoder_bitstream::append_byte_stream(d3d12_video_encoder_bitstream *pStream) +{ + bool isStreamAligned = + pStream->is_byte_aligned(); // causes side-effects in object state, don't put inside assert() + assert(isStreamAligned); + bool isThisAligned = is_byte_aligned(); // causes side-effects in object state, don't put inside assert() + assert(isThisAligned); + assert(m_iBitsToGo == 32); + + uint8_t *pDst = m_pBitsBuffer + m_uiOffset; + uint8_t *pSrc = pStream->get_bitstream_buffer(); + uint32_t uiLen = (uint32_t) pStream->get_byte_count(); + + if (!verify_buffer(uiLen)) { + return; + } + + memcpy(pDst, pSrc, uiLen); + m_uiOffset += uiLen; +} diff --git a/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream.h b/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream.h new file mode 100644 index 00000000000..73fdc632ab6 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream.h @@ -0,0 +1,119 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_VIDEO_ENC_BITSTREAM_H +#define D3D12_VIDEO_ENC_BITSTREAM_H + +#include "d3d12_video_types.h" + +class d3d12_video_encoder_bitstream +{ + public: + d3d12_video_encoder_bitstream(); + ~d3d12_video_encoder_bitstream(); + + public: + void get_current_buffer_position_and_size(uint8_t **ppCurrBufPos, int32_t *pdwLeftBufSize); + void inc_current_offset(int32_t dwOffset); + bool create_bitstream(uint32_t uiInitBufferSize); + void setup_bitstream(uint32_t uiInitBufferSize, uint8_t *pBuffer); + void attach(uint8_t *pBitsBuffer, uint32_t uiBufferSize); + void put_bits(int32_t uiBitsCount, uint32_t iBitsVal); + void flush(); + void exp_Golomb_ue(uint32_t uiVal); + void exp_Golomb_se(int32_t iVal); + + inline void clear() + { + m_iBitsToGo = 32; + m_uiOffset = 0; + m_uintEncBuffer = 0; + }; + + void append_byte_stream(d3d12_video_encoder_bitstream *pStream); + + void set_start_code_prevention(bool bSCP) + { + m_bPreventStartCode = bSCP; + } + int32_t get_bits_count() + { + return m_uiOffset * 8 + (32 - m_iBitsToGo); + } + int32_t get_byte_count() + { + return m_uiOffset + ((32 - m_iBitsToGo) >> 3); + } + uint8_t *get_bitstream_buffer() + { + return m_pBitsBuffer; + } + bool is_byte_aligned() + { + if (m_bBufferOverflow) { + m_iBitsToGo = 32; + } + return !(m_iBitsToGo & 7); + } + int32_t get_num_bits_for_byte_align() + { + return (m_iBitsToGo & 7); + } + bool get_start_code_prevention_status() + { + return m_bPreventStartCode; + } + bool verify_buffer(uint32_t uiBytesToWrite); + + public: + bool m_bBufferOverflow; + bool m_bAllowReallocate; + + private: + void write_byte_start_code_prevention(uint8_t u8Val); + bool reallocate_buffer(); + int32_t get_exp_golomb0_code_len(uint32_t uiVal); + + const uint8_t m_iLog_2_N[256] = { + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 + }; + + private: + uint8_t *m_pBitsBuffer; + uint32_t m_uiBitsBufferSize; + uint32_t m_uiOffset; + + bool m_bExternalBuffer; + uint32_t m_uintEncBuffer; + int32_t m_iBitsToGo; + + bool m_bPreventStartCode; +}; + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder.h b/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder.h new file mode 100644 index 00000000000..d69e45ee542 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder.h @@ -0,0 +1,37 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#ifndef D3D12_VIDEO_ENC_BITSTREAM_BUILDER_H +#define D3D12_VIDEO_ENC_BITSTREAM_BUILDER_H + +#include "d3d12_video_types.h" + +class d3d12_video_bitstream_builder_interface +{ + public: + virtual ~d3d12_video_bitstream_builder_interface() + { } +}; + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder_h264.cpp b/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder_h264.cpp new file mode 100644 index 00000000000..f67ed9e3904 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder_h264.cpp @@ -0,0 +1,257 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_video_encoder_bitstream_builder_h264.h" + +inline H264_SPEC_PROFILES +Convert12ToSpecH264Profiles(D3D12_VIDEO_ENCODER_PROFILE_H264 profile12) +{ + switch (profile12) { + case D3D12_VIDEO_ENCODER_PROFILE_H264_MAIN: + { + return H264_PROFILE_MAIN; + } break; + case D3D12_VIDEO_ENCODER_PROFILE_H264_HIGH: + { + return H264_PROFILE_HIGH; + } break; + case D3D12_VIDEO_ENCODER_PROFILE_H264_HIGH_10: + { + return H264_PROFILE_HIGH10; + } break; + default: + { + unreachable("Unsupported D3D12_VIDEO_ENCODER_PROFILE_H264"); + } break; + } +} + +void +d3d12_video_bitstream_builder_h264::build_sps(const D3D12_VIDEO_ENCODER_PROFILE_H264 & profile, + const D3D12_VIDEO_ENCODER_LEVELS_H264 & level, + const DXGI_FORMAT & inputFmt, + const D3D12_VIDEO_ENCODER_CODEC_CONFIGURATION_H264 & codecConfig, + const D3D12_VIDEO_ENCODER_SEQUENCE_GOP_STRUCTURE_H264 &gopConfig, + uint32_t seq_parameter_set_id, + uint32_t max_num_ref_frames, + D3D12_VIDEO_ENCODER_PICTURE_RESOLUTION_DESC sequenceTargetResolution, + D3D12_BOX frame_cropping_codec_config, + std::vector & headerBitstream, + std::vector::iterator placingPositionStart, + size_t & writtenBytes) +{ + H264_SPEC_PROFILES profile_idc = Convert12ToSpecH264Profiles(profile); + uint32_t constraint_set3_flag = 0; + uint32_t level_idc = 0; + d3d12_video_encoder_convert_from_d3d12_level_h264( + level, + level_idc, + constraint_set3_flag /*Always 0 except if level is 11 or 1b in which case 0 means 11, 1 means 1b*/); + + // constraint_set3_flag is for Main profile only and levels 11 or 1b: levels 11 if off, level 1b if on. Always 0 for + // HIGH/HIGH10 profiles + if ((profile == D3D12_VIDEO_ENCODER_PROFILE_H264_HIGH) || (profile == D3D12_VIDEO_ENCODER_PROFILE_H264_HIGH_10)) { + // Force 0 for high profiles + constraint_set3_flag = 0; + } + + assert((inputFmt == DXGI_FORMAT_NV12) || (inputFmt == DXGI_FORMAT_P010)); + + // Assume NV12 YUV 420 8 bits + uint32_t bit_depth_luma_minus8 = 0; + uint32_t bit_depth_chroma_minus8 = 0; + + // In case is 420 10 bits fix it + if (inputFmt == DXGI_FORMAT_P010) { + bit_depth_luma_minus8 = 2; + bit_depth_chroma_minus8 = 2; + } + + // Calculate sequence resolution sizes in MBs + // Always in MBs since we don't support interlace in D3D12 Encode + uint32_t pic_width_in_mbs_minus1 = static_cast(std::ceil(sequenceTargetResolution.Width / 16.0)) - 1; + uint32_t pic_height_in_map_units_minus1 = + static_cast(std::ceil(sequenceTargetResolution.Height / 16.0)) - 1; + + uint32_t frame_cropping_flag = 0; + if (frame_cropping_codec_config.left + || frame_cropping_codec_config.right + || frame_cropping_codec_config.top + || frame_cropping_codec_config.bottom + ) { + frame_cropping_flag = 1; + } + + H264_SPS spsStructure = { static_cast(profile_idc), + constraint_set3_flag, + level_idc, + seq_parameter_set_id, + bit_depth_luma_minus8, + bit_depth_chroma_minus8, + gopConfig.log2_max_frame_num_minus4, + gopConfig.pic_order_cnt_type, + gopConfig.log2_max_pic_order_cnt_lsb_minus4, + max_num_ref_frames, + 0, // gaps_in_frame_num_value_allowed_flag + pic_width_in_mbs_minus1, + pic_height_in_map_units_minus1, + ((codecConfig.ConfigurationFlags & + D3D12_VIDEO_ENCODER_CODEC_CONFIGURATION_H264_FLAG_USE_ADAPTIVE_8x8_TRANSFORM) != 0) ? + 1u : + 0u, // direct_8x8_inference_flag + frame_cropping_flag, + frame_cropping_codec_config.left, + frame_cropping_codec_config.right, + frame_cropping_codec_config.top, + frame_cropping_codec_config.bottom }; + + // Print built PPS structure + debug_printf( + "[D3D12 d3d12_video_bitstream_builder_h264] H264_SPS Structure generated before writing to bitstream:\n"); + print_sps(spsStructure); + + // Convert the H264 SPS structure into bytes + m_h264Encoder.sps_to_nalu_bytes(&spsStructure, headerBitstream, placingPositionStart, writtenBytes); +} + +void +d3d12_video_bitstream_builder_h264::write_end_of_stream_nalu(std::vector & headerBitstream, + std::vector::iterator placingPositionStart, + size_t & writtenBytes) +{ + m_h264Encoder.write_end_of_stream_nalu(headerBitstream, placingPositionStart, writtenBytes); +} + +void +d3d12_video_bitstream_builder_h264::write_end_of_sequence_nalu(std::vector & headerBitstream, + std::vector::iterator placingPositionStart, + size_t & writtenBytes) +{ + m_h264Encoder.write_end_of_sequence_nalu(headerBitstream, placingPositionStart, writtenBytes); +} + +void +d3d12_video_bitstream_builder_h264::build_pps(const D3D12_VIDEO_ENCODER_PROFILE_H264 & profile, + const D3D12_VIDEO_ENCODER_CODEC_CONFIGURATION_H264 & codecConfig, + const D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA_H264 &pictureControl, + uint32_t pic_parameter_set_id, + uint32_t seq_parameter_set_id, + std::vector & headerBitstream, + std::vector::iterator placingPositionStart, + size_t & writtenBytes) +{ + BOOL bIsHighProfile = + ((profile == D3D12_VIDEO_ENCODER_PROFILE_H264_HIGH) || (profile == D3D12_VIDEO_ENCODER_PROFILE_H264_HIGH_10)); + + H264_PPS ppsStructure = { + pic_parameter_set_id, + seq_parameter_set_id, + ((codecConfig.ConfigurationFlags & D3D12_VIDEO_ENCODER_CODEC_CONFIGURATION_H264_FLAG_ENABLE_CABAC_ENCODING) != 0) ? + 1u : + 0u, // entropy_coding_mode_flag + 0, // pic_order_present_flag (bottom_field_pic_order_in_frame_present_flag) - will use pic_cnt 0 or 2, always + // off ; used with pic_cnt_type 1 and deltas. + static_cast(std::max(static_cast(pictureControl.List0ReferenceFramesCount) - 1, + 0)), // num_ref_idx_l0_active_minus1 + static_cast(std::max(static_cast(pictureControl.List1ReferenceFramesCount) - 1, + 0)), // num_ref_idx_l1_active_minus1 + ((codecConfig.ConfigurationFlags & + D3D12_VIDEO_ENCODER_CODEC_CONFIGURATION_H264_FLAG_USE_CONSTRAINED_INTRAPREDICTION) != 0) ? + 1u : + 0u, // constrained_intra_pred_flag + ((codecConfig.ConfigurationFlags & + D3D12_VIDEO_ENCODER_CODEC_CONFIGURATION_H264_FLAG_USE_ADAPTIVE_8x8_TRANSFORM) != 0) ? + 1u : + 0u // transform_8x8_mode_flag + }; + + // Print built PPS structure + debug_printf( + "[D3D12 d3d12_video_bitstream_builder_h264] H264_PPS Structure generated before writing to bitstream:\n"); + print_pps(ppsStructure); + + // Convert the H264 SPS structure into bytes + m_h264Encoder.pps_to_nalu_bytes(&ppsStructure, headerBitstream, bIsHighProfile, placingPositionStart, writtenBytes); +} + +void +d3d12_video_bitstream_builder_h264::print_pps(const H264_PPS &pps) +{ + // Be careful that build_pps also wraps some other NALU bytes in pps_to_nalu_bytes so bitstream returned by build_pps + // won't be exactly the bytes from the H264_PPS struct + + static_assert(sizeof(H264_PPS) == + (sizeof(uint32_t) * + 8), "Update the number of uint32_t in struct in assert and add case below if structure changes"); + + // Declared fields from definition in d3d12_video_encoder_bitstream_builder_h264.h + + debug_printf("[D3D12 d3d12_video_bitstream_builder_h264] H264_PPS values below:\n"); + debug_printf("pic_parameter_set_id: %d\n", pps.pic_parameter_set_id); + debug_printf("seq_parameter_set_id: %d\n", pps.seq_parameter_set_id); + debug_printf("entropy_coding_mode_flag: %d\n", pps.entropy_coding_mode_flag); + debug_printf("pic_order_present_flag: %d\n", pps.pic_order_present_flag); + debug_printf("num_ref_idx_l0_active_minus1: %d\n", pps.num_ref_idx_l0_active_minus1); + debug_printf("num_ref_idx_l1_active_minus1: %d\n", pps.num_ref_idx_l1_active_minus1); + debug_printf("constrained_intra_pred_flag: %d\n", pps.constrained_intra_pred_flag); + debug_printf("transform_8x8_mode_flag: %d\n", pps.transform_8x8_mode_flag); + debug_printf( + "[D3D12 d3d12_video_bitstream_builder_h264] H264_PPS values end\n--------------------------------------\n"); +} + +void +d3d12_video_bitstream_builder_h264::print_sps(const H264_SPS &sps) +{ + // Be careful when calling this method that build_sps also wraps some other NALU bytes in sps_to_nalu_bytes so + // bitstream returned by build_sps won't be exactly the bytes from the H264_SPS struct From definition in + // d3d12_video_encoder_bitstream_builder_h264.h + + static_assert(sizeof(H264_SPS) == + (sizeof(uint32_t) * + 19), "Update the number of uint32_t in struct in assert and add case below if structure changes"); + + // Declared fields from definition in d3d12_video_encoder_bitstream_builder_h264.h + + debug_printf("[D3D12 d3d12_video_bitstream_builder_h264] H264_SPS values below:\n"); + debug_printf("profile_idc: %d\n", sps.profile_idc); + debug_printf("constraint_set3_flag: %d\n", sps.constraint_set3_flag); + debug_printf("level_idc: %d\n", sps.level_idc); + debug_printf("seq_parameter_set_id: %d\n", sps.seq_parameter_set_id); + debug_printf("bit_depth_luma_minus8: %d\n", sps.bit_depth_luma_minus8); + debug_printf("bit_depth_chroma_minus8: %d\n", sps.bit_depth_chroma_minus8); + debug_printf("log2_max_frame_num_minus4: %d\n", sps.log2_max_frame_num_minus4); + debug_printf("pic_order_cnt_type: %d\n", sps.pic_order_cnt_type); + debug_printf("log2_max_pic_order_cnt_lsb_minus4: %d\n", sps.log2_max_pic_order_cnt_lsb_minus4); + debug_printf("max_num_ref_frames: %d\n", sps.max_num_ref_frames); + debug_printf("gaps_in_frame_num_value_allowed_flag: %d\n", sps.gaps_in_frame_num_value_allowed_flag); + debug_printf("pic_width_in_mbs_minus1: %d\n", sps.pic_width_in_mbs_minus1); + debug_printf("pic_height_in_map_units_minus1: %d\n", sps.pic_height_in_map_units_minus1); + debug_printf("direct_8x8_inference_flag: %d\n", sps.direct_8x8_inference_flag); + debug_printf("frame_cropping_flag: %d\n", sps.frame_cropping_flag); + debug_printf("frame_cropping_rect_left_offset: %d\n", sps.frame_cropping_rect_left_offset); + debug_printf("frame_cropping_rect_right_offset: %d\n", sps.frame_cropping_rect_right_offset); + debug_printf("frame_cropping_rect_top_offset: %d\n", sps.frame_cropping_rect_top_offset); + debug_printf("frame_cropping_rect_bottom_offset: %d\n", sps.frame_cropping_rect_bottom_offset); + debug_printf( + "[D3D12 d3d12_video_bitstream_builder_h264] H264_SPS values end\n--------------------------------------\n"); +} diff --git a/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder_h264.h b/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder_h264.h new file mode 100644 index 00000000000..ca569a2da4f --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder_h264.h @@ -0,0 +1,96 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_VIDEO_ENC_BITSTREAM_BUILDER_H264_H +#define D3D12_VIDEO_ENC_BITSTREAM_BUILDER_H264_H + +#include "d3d12_video_encoder_nalu_writer_h264.h" +#include "d3d12_video_encoder_bitstream_builder.h" + +class d3d12_video_bitstream_builder_h264 : public d3d12_video_bitstream_builder_interface +{ + + public: + d3d12_video_bitstream_builder_h264() {}; + ~d3d12_video_bitstream_builder_h264() {}; + + void build_sps(const D3D12_VIDEO_ENCODER_PROFILE_H264 & profile, + const D3D12_VIDEO_ENCODER_LEVELS_H264 & level, + const DXGI_FORMAT & inputFmt, + const D3D12_VIDEO_ENCODER_CODEC_CONFIGURATION_H264 & codecConfig, + const D3D12_VIDEO_ENCODER_SEQUENCE_GOP_STRUCTURE_H264 &gopConfig, + uint32_t seq_parameter_set_id, + uint32_t max_num_ref_frames, + D3D12_VIDEO_ENCODER_PICTURE_RESOLUTION_DESC sequenceTargetResolution, + D3D12_BOX frame_cropping_codec_config, + std::vector & headerBitstream, + std::vector::iterator placingPositionStart, + size_t & writtenBytes); + + void build_pps(const D3D12_VIDEO_ENCODER_PROFILE_H264 & profile, + const D3D12_VIDEO_ENCODER_CODEC_CONFIGURATION_H264 & codecConfig, + const D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA_H264 &pictureControl, + uint32_t pic_parameter_set_id, + uint32_t seq_parameter_set_id, + std::vector & headerBitstream, + std::vector::iterator placingPositionStart, + size_t & writtenBytes); + + void write_end_of_stream_nalu(std::vector & headerBitstream, + std::vector::iterator placingPositionStart, + size_t & writtenBytes); + void write_end_of_sequence_nalu(std::vector & headerBitstream, + std::vector::iterator placingPositionStart, + size_t & writtenBytes); + + void print_pps(const H264_PPS &pps); + void print_sps(const H264_SPS &sps); + + uint32_t m_activeSPSIndex = 0; + uint32_t m_activePPSIndex = 0; + + uint32_t get_active_sps_id() + { + return m_activeSPSIndex; + }; + uint32_t get_active_pps_id() + { + return m_activePPSIndex; + }; + + void set_active_sps_id(uint32_t active_sps_id) + { + m_activeSPSIndex = active_sps_id; + debug_printf("[d3d12_video_bitstream_builder_h264] Setting new active SPS ID: %d ", m_activeSPSIndex); + }; + void set_active_pps_id(uint32_t active_pps_id) + { + m_activePPSIndex = active_pps_id; + debug_printf("[d3d12_video_bitstream_builder_h264] Setting new active PPS ID: %d ", m_activePPSIndex); + }; + + private: + d3d12_video_nalu_writer_h264 m_h264Encoder; +}; + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_video_encoder_nalu_writer_h264.cpp b/src/gallium/drivers/d3d12/d3d12_video_encoder_nalu_writer_h264.cpp new file mode 100644 index 00000000000..afd7804ed3b --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_encoder_nalu_writer_h264.cpp @@ -0,0 +1,400 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_video_encoder_nalu_writer_h264.h" +#include + +void +d3d12_video_nalu_writer_h264::rbsp_trailing(d3d12_video_encoder_bitstream *pBitstream) +{ + pBitstream->put_bits(1, 1); + int32_t iLeft = pBitstream->get_num_bits_for_byte_align(); + + if (iLeft) { + pBitstream->put_bits(iLeft, 0); + } + + bool isAligned = pBitstream->is_byte_aligned(); // causes side-effects in object state, don't put inside assert() + assert(isAligned); +} + +uint32_t +d3d12_video_nalu_writer_h264::write_sps_bytes(d3d12_video_encoder_bitstream *pBitstream, H264_SPS *pSPS) +{ + int32_t iBytesWritten = pBitstream->get_byte_count(); + + // Standard constraint to be between 0 and 31 inclusive + assert(pSPS->seq_parameter_set_id >= 0); + assert(pSPS->seq_parameter_set_id < 32); + + pBitstream->put_bits(8, pSPS->profile_idc); + pBitstream->put_bits(1, 0); // constraint_set0_flag + pBitstream->put_bits(1, 0); // constraint_set1_flag + pBitstream->put_bits(1, 0); // constraint_set2_flag + pBitstream->put_bits(1, pSPS->constraint_set3_flag); + pBitstream->put_bits(1, 0); // constraint_set4_flag + pBitstream->put_bits(1, 0); // constraint_set5_flag + pBitstream->put_bits(2, 0); + pBitstream->put_bits(8, pSPS->level_idc); + pBitstream->exp_Golomb_ue(pSPS->seq_parameter_set_id); + + // Only support profiles defined in D3D12 Video Encode + // If adding new profile support, check that the chroma_format_idc and bit depth are set correctly below + // for the new additions + assert((pSPS->profile_idc == H264_PROFILE_MAIN) || (pSPS->profile_idc == H264_PROFILE_HIGH) || + (pSPS->profile_idc == H264_PROFILE_HIGH10)); + + if ((pSPS->profile_idc == H264_PROFILE_HIGH) || (pSPS->profile_idc == H264_PROFILE_HIGH10)) { + // chroma_format_idc always 4.2.0 + pBitstream->exp_Golomb_ue(1); + // Assume no separate_colour_plane_flag given chroma_format_idc = 1 + pBitstream->exp_Golomb_ue(pSPS->bit_depth_luma_minus8); + pBitstream->exp_Golomb_ue(pSPS->bit_depth_chroma_minus8); + // qpprime_y_zero_transform_bypass_flag + pBitstream->put_bits(1, 0); + // seq_scaling_matrix_present_flag) + pBitstream->put_bits(1, 0); + } + + pBitstream->exp_Golomb_ue(pSPS->log2_max_frame_num_minus4); + + pBitstream->exp_Golomb_ue(pSPS->pic_order_cnt_type); + if (pSPS->pic_order_cnt_type == 0) { + pBitstream->exp_Golomb_ue(pSPS->log2_max_pic_order_cnt_lsb_minus4); + } + pBitstream->exp_Golomb_ue(pSPS->max_num_ref_frames); + pBitstream->put_bits(1, pSPS->gaps_in_frame_num_value_allowed_flag); + pBitstream->exp_Golomb_ue(pSPS->pic_width_in_mbs_minus1); + pBitstream->exp_Golomb_ue(pSPS->pic_height_in_map_units_minus1); + + // No support for interlace in D3D12 Video Encode + // frame_mbs_only_flag coded as 1 + pBitstream->put_bits(1, 1); // frame_mbs_only_flag + pBitstream->put_bits(1, pSPS->direct_8x8_inference_flag); + + // no cropping + pBitstream->put_bits(1, pSPS->frame_cropping_flag); // frame_cropping_flag + if (pSPS->frame_cropping_flag) { + pBitstream->exp_Golomb_ue(pSPS->frame_cropping_rect_left_offset); + pBitstream->exp_Golomb_ue(pSPS->frame_cropping_rect_right_offset); + pBitstream->exp_Golomb_ue(pSPS->frame_cropping_rect_top_offset); + pBitstream->exp_Golomb_ue(pSPS->frame_cropping_rect_bottom_offset); + } + + // We're not including the VUI so this better be zero. + pBitstream->put_bits(1, 0); // vui_paramenters_present_flag + + rbsp_trailing(pBitstream); + pBitstream->flush(); + + iBytesWritten = pBitstream->get_byte_count() - iBytesWritten; + return (uint32_t) iBytesWritten; +} + +uint32_t +d3d12_video_nalu_writer_h264::write_pps_bytes(d3d12_video_encoder_bitstream *pBitstream, + H264_PPS * pPPS, + BOOL bIsHighProfile) +{ + int32_t iBytesWritten = pBitstream->get_byte_count(); + + // Standard constraint to be between 0 and 31 inclusive + assert(pPPS->seq_parameter_set_id >= 0); + assert(pPPS->seq_parameter_set_id < 32); + + // Standard constraint to be between 0 and 255 inclusive + assert(pPPS->pic_parameter_set_id >= 0); + assert(pPPS->pic_parameter_set_id < 256); + + pBitstream->exp_Golomb_ue(pPPS->pic_parameter_set_id); + pBitstream->exp_Golomb_ue(pPPS->seq_parameter_set_id); + pBitstream->put_bits(1, pPPS->entropy_coding_mode_flag); + pBitstream->put_bits(1, pPPS->pic_order_present_flag); // bottom_field_pic_order_in_frame_present_flag + pBitstream->exp_Golomb_ue(0); // num_slice_groups_minus1 + + + pBitstream->exp_Golomb_ue(pPPS->num_ref_idx_l0_active_minus1); + pBitstream->exp_Golomb_ue(pPPS->num_ref_idx_l1_active_minus1); + pBitstream->put_bits(1, 0); // weighted_pred_flag + pBitstream->put_bits(2, 0); // weighted_bipred_idc + pBitstream->exp_Golomb_se(0); // pic_init_qp_minus26 + pBitstream->exp_Golomb_se(0); // pic_init_qs_minus26 + pBitstream->exp_Golomb_se(0); // chroma_qp_index_offset + pBitstream->put_bits(1, 1); // deblocking_filter_control_present_flag + pBitstream->put_bits(1, pPPS->constrained_intra_pred_flag); + pBitstream->put_bits(1, 0); // redundant_pic_cnt_present_flag + + if (bIsHighProfile) { + pBitstream->put_bits(1, pPPS->transform_8x8_mode_flag); + pBitstream->put_bits(1, 0); // pic_scaling_matrix_present_flag + pBitstream->exp_Golomb_se(0); // second_chroma_qp_index_offset + } + + rbsp_trailing(pBitstream); + pBitstream->flush(); + + iBytesWritten = pBitstream->get_byte_count() - iBytesWritten; + return (uint32_t) iBytesWritten; +} + +uint32_t +d3d12_video_nalu_writer_h264::wrap_sps_nalu(d3d12_video_encoder_bitstream *pNALU, d3d12_video_encoder_bitstream *pRBSP) +{ + return wrap_rbsp_into_nalu(pNALU, pRBSP, NAL_REFIDC_REF, NAL_TYPE_SPS); +} + +uint32_t +d3d12_video_nalu_writer_h264::wrap_pps_nalu(d3d12_video_encoder_bitstream *pNALU, d3d12_video_encoder_bitstream *pRBSP) +{ + return wrap_rbsp_into_nalu(pNALU, pRBSP, NAL_REFIDC_REF, NAL_TYPE_PPS); +} + +void +d3d12_video_nalu_writer_h264::write_nalu_end(d3d12_video_encoder_bitstream *pNALU) +{ + pNALU->flush(); + pNALU->set_start_code_prevention(FALSE); + int32_t iNALUnitLen = pNALU->get_byte_count(); + + if (FALSE == pNALU->m_bBufferOverflow && 0x00 == pNALU->get_bitstream_buffer()[iNALUnitLen - 1]) { + pNALU->put_bits(8, 0x03); + pNALU->flush(); + } +} + +uint32_t +d3d12_video_nalu_writer_h264::wrap_rbsp_into_nalu(d3d12_video_encoder_bitstream *pNALU, + d3d12_video_encoder_bitstream *pRBSP, + uint32_t iNaluIdc, + uint32_t iNaluType) +{ + bool isAligned = pRBSP->is_byte_aligned(); // causes side-effects in object state, don't put inside assert() + assert(isAligned); + + int32_t iBytesWritten = pNALU->get_byte_count(); + + pNALU->set_start_code_prevention(FALSE); + + // NAL start code + pNALU->put_bits(24, 0); + pNALU->put_bits(8, 1); + + // NAL header + pNALU->put_bits(1, 0); + pNALU->put_bits(2, iNaluIdc); + pNALU->put_bits(5, iNaluType); + pNALU->flush(); + + // NAL body + pRBSP->flush(); + + if (pRBSP->get_start_code_prevention_status()) { + // Direct copying. + pNALU->append_byte_stream(pRBSP); + } else { + // Copy with start code prevention. + pNALU->set_start_code_prevention(TRUE); + int32_t iLength = pRBSP->get_byte_count(); + uint8_t *pBuffer = pRBSP->get_bitstream_buffer(); + + for (int32_t i = 0; i < iLength; i++) { + pNALU->put_bits(8, pBuffer[i]); + } + } + + isAligned = pNALU->is_byte_aligned(); // causes side-effects in object state, don't put inside assert() + assert(isAligned); + write_nalu_end(pNALU); + + pNALU->flush(); + + iBytesWritten = pNALU->get_byte_count() - iBytesWritten; + return (uint32_t) iBytesWritten; +} + +void +d3d12_video_nalu_writer_h264::sps_to_nalu_bytes(H264_SPS * pSPS, + std::vector & headerBitstream, + std::vector::iterator placingPositionStart, + size_t & writtenBytes) +{ + // Wrap SPS into NALU and copy full NALU into output byte array + d3d12_video_encoder_bitstream rbsp, nalu; + + if (!rbsp.create_bitstream(MAX_COMPRESSED_SPS)) { + debug_printf("rbsp.create_bitstream(MAX_COMPRESSED_SPS) failed\n"); + assert(false); + } + + if (!nalu.create_bitstream(2 * MAX_COMPRESSED_SPS)) { + debug_printf("nalu.create_bitstream(2 * MAX_COMPRESSED_SPS) failed\n"); + assert(false); + } + + rbsp.set_start_code_prevention(TRUE); + if (write_sps_bytes(&rbsp, pSPS) <= 0u) { + debug_printf("write_sps_bytes(&rbsp, pSPS) didn't write any bytes.\n"); + assert(false); + } + + if (wrap_sps_nalu(&nalu, &rbsp) <= 0u) { + debug_printf("wrap_sps_nalu(&nalu, &rbsp) didn't write any bytes.\n"); + assert(false); + } + + // Deep copy nalu into headerBitstream, nalu gets out of scope here and its destructor frees the nalu object buffer + // memory. + uint8_t *naluBytes = nalu.get_bitstream_buffer(); + size_t naluByteSize = nalu.get_byte_count(); + + auto startDstIndex = std::distance(headerBitstream.begin(), placingPositionStart); + if (headerBitstream.size() < (startDstIndex + naluByteSize)) { + headerBitstream.resize(startDstIndex + naluByteSize); + } + + std::copy_n(&naluBytes[0], naluByteSize, &headerBitstream.data()[startDstIndex]); + + writtenBytes = naluByteSize; +} + +void +d3d12_video_nalu_writer_h264::pps_to_nalu_bytes(H264_PPS * pPPS, + std::vector & headerBitstream, + BOOL bIsHighProfile, + std::vector::iterator placingPositionStart, + size_t & writtenBytes) +{ + // Wrap PPS into NALU and copy full NALU into output byte array + d3d12_video_encoder_bitstream rbsp, nalu; + if (!rbsp.create_bitstream(MAX_COMPRESSED_PPS)) { + debug_printf("rbsp.create_bitstream(MAX_COMPRESSED_PPS) failed\n"); + assert(false); + } + + if (!nalu.create_bitstream(2 * MAX_COMPRESSED_PPS)) { + debug_printf("nalu.create_bitstream(2 * MAX_COMPRESSED_PPS) failed\n"); + assert(false); + } + + rbsp.set_start_code_prevention(TRUE); + + if (write_pps_bytes(&rbsp, pPPS, bIsHighProfile) <= 0u) { + debug_printf("write_pps_bytes(&rbsp, pPPS, bIsHighProfile) didn't write any bytes.\n"); + assert(false); + } + + if (wrap_pps_nalu(&nalu, &rbsp) <= 0u) { + debug_printf("wrap_pps_nalu(&nalu, &rbsp) didn't write any bytes.\n"); + assert(false); + } + + // Deep copy nalu into headerBitstream, nalu gets out of scope here and its destructor frees the nalu object buffer + // memory. + uint8_t *naluBytes = nalu.get_bitstream_buffer(); + size_t naluByteSize = nalu.get_byte_count(); + + auto startDstIndex = std::distance(headerBitstream.begin(), placingPositionStart); + if (headerBitstream.size() < (startDstIndex + naluByteSize)) { + headerBitstream.resize(startDstIndex + naluByteSize); + } + + std::copy_n(&naluBytes[0], naluByteSize, &headerBitstream.data()[startDstIndex]); + + writtenBytes = naluByteSize; +} + +void +d3d12_video_nalu_writer_h264::write_end_of_stream_nalu(std::vector & headerBitstream, + std::vector::iterator placingPositionStart, + size_t & writtenBytes) +{ + d3d12_video_encoder_bitstream rbsp, nalu; + if (!rbsp.create_bitstream(8)) { + debug_printf("rbsp.create_bitstream(8) failed\n"); + assert(false); + } + if (!nalu.create_bitstream(2 * MAX_COMPRESSED_PPS)) { + debug_printf("nalu.create_bitstream(2 * MAX_COMPRESSED_PPS) failed\n"); + assert(false); + } + + rbsp.set_start_code_prevention(TRUE); + if (wrap_rbsp_into_nalu(&nalu, &rbsp, NAL_REFIDC_REF, NAL_TYPE_END_OF_STREAM) <= 0u) { + debug_printf( + "wrap_rbsp_into_nalu(&nalu, &rbsp, NAL_REFIDC_REF, NAL_TYPE_END_OF_STREAM) didn't write any bytes.\n");; + assert(false); + } + + // Deep copy nalu into headerBitstream, nalu gets out of scope here and its destructor frees the nalu object buffer + // memory. + uint8_t *naluBytes = nalu.get_bitstream_buffer(); + size_t naluByteSize = nalu.get_byte_count(); + + auto startDstIndex = std::distance(headerBitstream.begin(), placingPositionStart); + if (headerBitstream.size() < (startDstIndex + naluByteSize)) { + headerBitstream.resize(startDstIndex + naluByteSize); + } + + std::copy_n(&naluBytes[0], naluByteSize, &headerBitstream.data()[startDstIndex]); + + writtenBytes = naluByteSize; +} + +void +d3d12_video_nalu_writer_h264::write_end_of_sequence_nalu(std::vector & headerBitstream, + std::vector::iterator placingPositionStart, + size_t & writtenBytes) +{ + d3d12_video_encoder_bitstream rbsp, nalu; + if (!rbsp.create_bitstream(8)) { + debug_printf("rbsp.create_bitstream(8) failed.\n"); + assert(false); + } + + if (!nalu.create_bitstream(2 * MAX_COMPRESSED_PPS)) { + debug_printf("nalu.create_bitstream(2 * MAX_COMPRESSED_PPS) failed.\n"); + assert(false); + } + + rbsp.set_start_code_prevention(TRUE); + if (wrap_rbsp_into_nalu(&nalu, &rbsp, NAL_REFIDC_REF, NAL_TYPE_END_OF_SEQUENCE) <= 0u) { + + debug_printf( + "wrap_rbsp_into_nalu(&nalu, &rbsp, NAL_REFIDC_REF, NAL_TYPE_END_OF_SEQUENCE) didn't write any bytes.\n"); + assert(false); + } + + // Deep copy nalu into headerBitstream, nalu gets out of scope here and its destructor frees the nalu object buffer + // memory. + uint8_t *naluBytes = nalu.get_bitstream_buffer(); + size_t naluByteSize = nalu.get_byte_count(); + + auto startDstIndex = std::distance(headerBitstream.begin(), placingPositionStart); + if (headerBitstream.size() < (startDstIndex + naluByteSize)) { + headerBitstream.resize(startDstIndex + naluByteSize); + } + + std::copy_n(&naluBytes[0], naluByteSize, &headerBitstream.data()[startDstIndex]); + + writtenBytes = naluByteSize; +} diff --git a/src/gallium/drivers/d3d12/d3d12_video_encoder_nalu_writer_h264.h b/src/gallium/drivers/d3d12/d3d12_video_encoder_nalu_writer_h264.h new file mode 100644 index 00000000000..905adaa57b8 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_encoder_nalu_writer_h264.h @@ -0,0 +1,151 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_VIDEO_ENC_NALU_WRITER_H264_H +#define D3D12_VIDEO_ENC_NALU_WRITER_H264_H + +#include "d3d12_video_encoder_bitstream.h" + +enum H264_NALREF_IDC +{ + NAL_REFIDC_REF = 3, + NAL_REFIDC_NONREF = 0 +}; + +enum H264_NALU_TYPE +{ + NAL_TYPE_UNSPECIFIED = 0, + NAL_TYPE_SLICE = 1, + NAL_TYPE_SLICEDATA_A = 2, + NAL_TYPE_SLICEDATA_B = 3, + NAL_TYPE_SLICEDATA_C = 4, + NAL_TYPE_IDR = 5, + NAL_TYPE_SEI = 6, + NAL_TYPE_SPS = 7, + NAL_TYPE_PPS = 8, + NAL_TYPE_ACCESS_UNIT_DEMILITER = 9, + NAL_TYPE_END_OF_SEQUENCE = 10, + NAL_TYPE_END_OF_STREAM = 11, + NAL_TYPE_FILLER_DATA = 12, + NAL_TYPE_SPS_EXTENSION = 13, + NAL_TYPE_PREFIX = 14, + /* 15...18 RESERVED */ + NAL_TYPE_AUXILIARY_SLICE = 19, + /* 20...23 RESERVED */ + /* 24...31 UNSPECIFIED */ +}; + +struct H264_SPS +{ + uint32_t profile_idc; + uint32_t constraint_set3_flag; + uint32_t level_idc; + uint32_t seq_parameter_set_id; + uint32_t bit_depth_luma_minus8; + uint32_t bit_depth_chroma_minus8; + uint32_t log2_max_frame_num_minus4; + uint32_t pic_order_cnt_type; + uint32_t log2_max_pic_order_cnt_lsb_minus4; + uint32_t max_num_ref_frames; + uint32_t gaps_in_frame_num_value_allowed_flag; + uint32_t pic_width_in_mbs_minus1; + uint32_t pic_height_in_map_units_minus1; + uint32_t direct_8x8_inference_flag; + uint32_t frame_cropping_flag; + uint32_t frame_cropping_rect_left_offset; + uint32_t frame_cropping_rect_right_offset; + uint32_t frame_cropping_rect_top_offset; + uint32_t frame_cropping_rect_bottom_offset; +}; + +struct H264_PPS +{ + uint32_t pic_parameter_set_id; + uint32_t seq_parameter_set_id; + uint32_t entropy_coding_mode_flag; + uint32_t pic_order_present_flag; + uint32_t num_ref_idx_l0_active_minus1; + uint32_t num_ref_idx_l1_active_minus1; + uint32_t constrained_intra_pred_flag; + uint32_t transform_8x8_mode_flag; +}; + +enum H264_SPEC_PROFILES +{ + H264_PROFILE_MAIN = 77, + H264_PROFILE_HIGH = 100, + H264_PROFILE_HIGH10 = 110, +}; + +#define MAX_COMPRESSED_PPS 256 +#define MAX_COMPRESSED_SPS 256 + +class d3d12_video_nalu_writer_h264 +{ + public: + d3d12_video_nalu_writer_h264() + { } + ~d3d12_video_nalu_writer_h264() + { } + + // Writes the H264 SPS structure into a bitstream passed in headerBitstream + // Function resizes bitstream accordingly and puts result in byte vector + void sps_to_nalu_bytes(H264_SPS * pSPS, + std::vector & headerBitstream, + std::vector::iterator placingPositionStart, + size_t & writtenBytes); + + // Writes the H264 PPS structure into a bitstream passed in headerBitstream + // Function resizes bitstream accordingly and puts result in byte vector + void pps_to_nalu_bytes(H264_PPS * pPPS, + std::vector & headerBitstream, + BOOL bIsFREXTProfile, + std::vector::iterator placingPositionStart, + size_t & writtenBytes); + + void write_end_of_stream_nalu(std::vector & headerBitstream, + std::vector::iterator placingPositionStart, + size_t & writtenBytes); + void write_end_of_sequence_nalu(std::vector & headerBitstream, + std::vector::iterator placingPositionStart, + size_t & writtenBytes); + + private: + // Writes from structure into bitstream with RBSP trailing but WITHOUT NAL unit wrap (eg. nal_idc_type, etc) + uint32_t write_sps_bytes(d3d12_video_encoder_bitstream *pBitstream, H264_SPS *pSPS); + uint32_t write_pps_bytes(d3d12_video_encoder_bitstream *pBitstream, H264_PPS *pPPS, BOOL bIsFREXTProfile); + + // Adds NALU wrapping into structures and ending NALU control bits + uint32_t wrap_sps_nalu(d3d12_video_encoder_bitstream *pNALU, d3d12_video_encoder_bitstream *pRBSP); + uint32_t wrap_pps_nalu(d3d12_video_encoder_bitstream *pNALU, d3d12_video_encoder_bitstream *pRBSP); + + // Helpers + void write_nalu_end(d3d12_video_encoder_bitstream *pNALU); + void rbsp_trailing(d3d12_video_encoder_bitstream *pBitstream); + uint32_t wrap_rbsp_into_nalu(d3d12_video_encoder_bitstream *pNALU, + d3d12_video_encoder_bitstream *pRBSP, + uint32_t iNaluIdc, + uint32_t iNaluType); +}; + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_video_encoder_references_manager.h b/src/gallium/drivers/d3d12/d3d12_video_encoder_references_manager.h new file mode 100644 index 00000000000..50c95b81a52 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_encoder_references_manager.h @@ -0,0 +1,44 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#ifndef D3D12_VIDEO_ENCODE_REFERENCES_MANAGER_INTERFACE_H +#define D3D12_VIDEO_ENCODE_REFERENCES_MANAGER_INTERFACE_H + +#include "d3d12_video_types.h" + +class d3d12_video_encoder_references_manager_interface +{ + public: + virtual void begin_frame(D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA, bool bUsedAsReference) = 0; + virtual void end_frame() = 0; + virtual D3D12_VIDEO_ENCODER_RECONSTRUCTED_PICTURE get_current_frame_recon_pic_output_allocation() = 0; + virtual void + get_current_frame_picture_control_data(D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA &codecAllocation) = 0; + virtual bool is_current_frame_used_as_reference() = 0; + virtual D3D12_VIDEO_ENCODE_REFERENCE_FRAMES get_current_reference_frames() = 0; + virtual ~d3d12_video_encoder_references_manager_interface() + { } +}; + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_video_encoder_references_manager_h264.cpp b/src/gallium/drivers/d3d12/d3d12_video_encoder_references_manager_h264.cpp new file mode 100644 index 00000000000..dbdd7a9e544 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_encoder_references_manager_h264.cpp @@ -0,0 +1,422 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_video_encoder_references_manager_h264.h" +#include +#include +#include "d3d12_screen.h" + +using namespace std; + +d3d12_video_encoder_references_manager_h264::d3d12_video_encoder_references_manager_h264( + bool gopHasIorPFrames, d3d12_video_dpb_storage_manager_interface &rDpbStorageManager, uint32_t MaxDPBCapacity) + : m_MaxDPBCapacity(MaxDPBCapacity), + m_rDPBStorageManager(rDpbStorageManager), + m_CurrentFrameReferencesData({}), + m_gopHasInterFrames(gopHasIorPFrames) +{ + assert((m_MaxDPBCapacity + 1 /*extra for cur frame output recon pic*/) == + m_rDPBStorageManager.get_number_of_tracked_allocations()); + + debug_printf("[D3D12 Video Encoder Picture Manager H264] Completed construction of " + "d3d12_video_encoder_references_manager_h264 instance, settings are\n"); + debug_printf("[D3D12 Video Encoder Picture Manager H264] m_MaxDPBCapacity: %d\n", m_MaxDPBCapacity); +} + +void +d3d12_video_encoder_references_manager_h264::reset_gop_tracking_and_dpb() +{ + // Reset m_CurrentFrameReferencesData tracking + m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors.clear(); + m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors.reserve(m_MaxDPBCapacity); + m_CurrentFrameReferencesData.ReconstructedPicTexture = { nullptr, 0 }; + + // Reset DPB storage + uint32_t numPicsBeforeClearInDPB = m_rDPBStorageManager.get_number_of_pics_in_dpb(); + uint32_t cFreedResources = m_rDPBStorageManager.clear_decode_picture_buffer(); + assert(numPicsBeforeClearInDPB == cFreedResources); + + // Initialize if needed the reconstructed picture allocation for the first IDR picture in the GOP + // This needs to be done after initializing the GOP tracking state above since it makes decisions based on the + // current picture type. + prepare_current_frame_recon_pic_allocation(); + + // After clearing the DPB, outstanding used allocations should be 1u only for the first allocation for the + // reconstructed picture of the initial IDR in the GOP + assert(m_rDPBStorageManager.get_number_of_in_use_allocations() == m_gopHasInterFrames ? 1u : 0u); + assert(m_rDPBStorageManager.get_number_of_tracked_allocations() <= + (m_MaxDPBCapacity + 1)); // pool is not extended beyond maximum expected usage +} + +// Calculates the picture control structure for the current frame +void +d3d12_video_encoder_references_manager_h264::get_current_frame_picture_control_data( + D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA &codecAllocation) +{ + // Update reference picture control structures (L0/L1 and DPB descriptors lists based on current frame and next frame + // in GOP) for next frame + + debug_printf("[D3D12 Video Encoder Picture Manager H264] %d resources IN USE out of a total of %d ALLOCATED " + "resources at frame with POC: %d\n", + m_rDPBStorageManager.get_number_of_in_use_allocations(), + m_rDPBStorageManager.get_number_of_tracked_allocations(), + m_curFrameState.PictureOrderCountNumber); + + // See casts below + assert(m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors.size() < UINT32_MAX); + + bool needsL0List = (m_curFrameState.FrameType == D3D12_VIDEO_ENCODER_FRAME_TYPE_H264_P_FRAME) || + (m_curFrameState.FrameType == D3D12_VIDEO_ENCODER_FRAME_TYPE_H264_B_FRAME); + bool needsL1List = (m_curFrameState.FrameType == D3D12_VIDEO_ENCODER_FRAME_TYPE_H264_B_FRAME); + + assert(codecAllocation.DataSize == sizeof(D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA_H264)); + + // See D3D12 Encode spec below + // pList0ReferenceFrames + // List of past frame reference frames to be used for this frame. Each integer value in this array indices into + // pReferenceFramesReconPictureDescriptors to reference pictures kept in the DPB. + // pList1ReferenceFrames + // List of future frame reference frames to be used for this frame. Each integer value in this array indices into + // pReferenceFramesReconPictureDescriptors to reference pictures kept in the DPB. + + // Need to map from frame_num in the receiving ref_idx_l0_list/ref_idx_l1_list to the position with that + // FrameDecodingOrderNumber in the DPB descriptor + + if (needsL0List && (m_curFrameState.List0ReferenceFramesCount > 0)) { + std::vector tmpL0(m_curFrameState.List0ReferenceFramesCount, 0); + memcpy(tmpL0.data(), + m_curFrameState.pList0ReferenceFrames, + m_curFrameState.List0ReferenceFramesCount * sizeof(m_curFrameState.pList0ReferenceFrames[0])); + + for (size_t l0Idx = 0; l0Idx < m_curFrameState.List0ReferenceFramesCount; l0Idx++) { + // tmpL0[l0Idx] has frame_num's (FrameDecodingOrderNumber) + // m_curFrameState.pList0ReferenceFrames[l0Idx] needs to have the index j of + // pReferenceFramesReconPictureDescriptors where + // pReferenceFramesReconPictureDescriptors[j].FrameDecodingOrderNumber == tmpL0[l0Idx] + + auto value = tmpL0[l0Idx]; + auto foundItemIt = std::find_if(m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors.begin(), + m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors.end(), + [&value](const D3D12_VIDEO_ENCODER_REFERENCE_PICTURE_DESCRIPTOR_H264 &p) { + return p.FrameDecodingOrderNumber == value; + }); + + assert(foundItemIt != m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors.end()); + m_curFrameState.pList0ReferenceFrames[l0Idx] = + std::distance(m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors.begin(), foundItemIt); + } + } + + if (needsL1List && (m_curFrameState.List1ReferenceFramesCount > 0)) { + std::vector tmpL1(m_curFrameState.List1ReferenceFramesCount, 0); + memcpy(tmpL1.data(), + m_curFrameState.pList1ReferenceFrames, + m_curFrameState.List1ReferenceFramesCount * sizeof(m_curFrameState.pList1ReferenceFrames[0])); + + for (size_t l1Idx = 0; l1Idx < m_curFrameState.List1ReferenceFramesCount; l1Idx++) { + // tmpL1[l1Idx] has frame_num's (FrameDecodingOrderNumber) + // m_curFrameState.pList1ReferenceFrames[l1Idx] needs to have the index j of + // pReferenceFramesReconPictureDescriptors where + // pReferenceFramesReconPictureDescriptors[j].FrameDecodingOrderNumber == tmpL1[l1Idx] + + auto value = tmpL1[l1Idx]; + auto foundItemIt = std::find_if(m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors.begin(), + m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors.end(), + [&value](const D3D12_VIDEO_ENCODER_REFERENCE_PICTURE_DESCRIPTOR_H264 &p) { + return p.FrameDecodingOrderNumber == value; + }); + + assert(foundItemIt != m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors.end()); + m_curFrameState.pList1ReferenceFrames[l1Idx] = + std::distance(m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors.begin(), foundItemIt); + } + } + + m_curFrameState.List0ReferenceFramesCount = needsL0List ? m_curFrameState.List0ReferenceFramesCount : 0; + m_curFrameState.pList0ReferenceFrames = needsL0List ? m_curFrameState.pList0ReferenceFrames : nullptr, + m_curFrameState.List1ReferenceFramesCount = needsL1List ? m_curFrameState.List1ReferenceFramesCount : 0, + m_curFrameState.pList1ReferenceFrames = needsL1List ? m_curFrameState.pList1ReferenceFrames : nullptr, + m_curFrameState.ReferenceFramesReconPictureDescriptorsCount = + needsL0List ? static_cast(m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors.size()) : + 0, + m_curFrameState.pReferenceFramesReconPictureDescriptors = + needsL0List ? m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors.data() : nullptr, + + *codecAllocation.pH264PicData = m_curFrameState; + + print_l0_l1_lists(); + print_dpb(); +} + +// Returns the resource allocation for a reconstructed picture output for the current frame +D3D12_VIDEO_ENCODER_RECONSTRUCTED_PICTURE +d3d12_video_encoder_references_manager_h264::get_current_frame_recon_pic_output_allocation() +{ + return m_CurrentFrameReferencesData.ReconstructedPicTexture; +} + +D3D12_VIDEO_ENCODE_REFERENCE_FRAMES +d3d12_video_encoder_references_manager_h264::get_current_reference_frames() +{ + D3D12_VIDEO_ENCODE_REFERENCE_FRAMES retVal = { 0, + // ppTexture2Ds + nullptr, + // pSubresources + nullptr }; + + // Return nullptr for fully intra frames (eg IDR) + // and return references information for inter frames (eg.P/B) and I frame that doesn't flush DPB + + if ((m_curFrameState.FrameType != D3D12_VIDEO_ENCODER_FRAME_TYPE_H264_IDR_FRAME) && + (m_curFrameState.FrameType != D3D12_VIDEO_ENCODER_FRAME_TYPE_H264_I_FRAME) && m_gopHasInterFrames) { + auto curRef = m_rDPBStorageManager.get_current_reference_frames(); + retVal.NumTexture2Ds = curRef.NumTexture2Ds; + retVal.ppTexture2Ds = curRef.ppTexture2Ds; + retVal.pSubresources = curRef.pSubresources; + } + + return retVal; +} + +void +d3d12_video_encoder_references_manager_h264::prepare_current_frame_recon_pic_allocation() +{ + m_CurrentFrameReferencesData.ReconstructedPicTexture = { nullptr, 0 }; + + // If all GOP are intra frames, no point in doing reference pic allocations + if (is_current_frame_used_as_reference() && m_gopHasInterFrames) { + auto reconPic = m_rDPBStorageManager.get_new_tracked_picture_allocation(); + m_CurrentFrameReferencesData.ReconstructedPicTexture.pReconstructedPicture = reconPic.pReconstructedPicture; + m_CurrentFrameReferencesData.ReconstructedPicTexture.ReconstructedPictureSubresource = + reconPic.ReconstructedPictureSubresource; + } +} + +void +d3d12_video_encoder_references_manager_h264::update_fifo_dpb_push_front_cur_recon_pic() +{ + // Keep the order of the dpb storage and dpb descriptors in a circular buffer + // order such that the DPB array consists of a sequence of frames in DECREASING encoding order + // eg. last frame encoded at first, followed by one to last frames encoded, and at the end + // the most distant frame encoded (currentFrameEncodeOrderNumber - MaxDPBSize) + + // If current pic was not used as reference, current reconstructed picture resource is empty, + // No need to to anything in that case. + // Otherwise extract the reconstructed picture result and add it to the DPB + + // If GOP are all intra frames, do nothing also. + if (is_current_frame_used_as_reference() && m_gopHasInterFrames) { + debug_printf("[D3D12 Video Encoder Picture Manager H264] MaxDPBCapacity is %d - Number of pics in DPB is %d " + "when trying to put frame with POC %d at front of the DPB\n", + m_MaxDPBCapacity, + m_rDPBStorageManager.get_number_of_pics_in_dpb(), + m_curFrameState.PictureOrderCountNumber); + + // Release least recently used in DPB if we filled the m_MaxDPBCapacity allowed + if (m_rDPBStorageManager.get_number_of_pics_in_dpb() == m_MaxDPBCapacity) { + bool untrackedRes = false; + m_rDPBStorageManager.remove_reference_frame(m_rDPBStorageManager.get_number_of_pics_in_dpb() - 1, + &untrackedRes); // Remove last entry + // Verify that resource was untracked since this class is using the pool completely for allocations + assert(untrackedRes); + m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors.pop_back(); // Remove last entry + } + + // Add new dpb to front of DPB + D3D12_VIDEO_ENCODER_RECONSTRUCTED_PICTURE recAlloc = get_current_frame_recon_pic_output_allocation(); + d3d12_video_reconstructed_picture refFrameDesc = {}; + refFrameDesc.pReconstructedPicture = recAlloc.pReconstructedPicture; + refFrameDesc.ReconstructedPictureSubresource = recAlloc.ReconstructedPictureSubresource; + refFrameDesc.pVideoHeap = nullptr; // D3D12 Video Encode does not need the D3D12VideoEncoderHeap struct for H264 + // (used for no-key-frame resolution change in VC1, AV1, etc) + m_rDPBStorageManager.insert_reference_frame(refFrameDesc, 0); + + // Prepare D3D12_VIDEO_ENCODER_REFERENCE_PICTURE_DESCRIPTOR_H264 for added DPB member + D3D12_VIDEO_ENCODER_REFERENCE_PICTURE_DESCRIPTOR_H264 newDPBDescriptor = { + // uint32_t ReconstructedPictureResourceIndex; + 0, // the associated reconstructed picture is also being pushed_front in m_rDPBStorageManager + // BOOL IsLongTermReference; + false, + // uint32_t LongTermPictureIdx; + 0, + // uint32_t PictureOrderCountNumber; + m_curFrameState.PictureOrderCountNumber, + // uint32_t FrameDecodingOrderNumber; + m_curFrameState.FrameDecodingOrderNumber, + // uint32_t TemporalLayerIndex; + 0 // NO B-hierarchy in this impl of the picture manager + }; + + // Add DPB entry + m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors.insert( + m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors.begin(), + newDPBDescriptor); + + // Update the indices for ReconstructedPictureResourceIndex in pReferenceFramesReconPictureDescriptors + // to be in identity mapping with m_rDPBStorageManager indices + // after pushing the elements to the right in the push_front operation + for (uint32_t dpbResIdx = 1; + dpbResIdx < m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors.size(); + dpbResIdx++) { + auto &dpbDesc = m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors[dpbResIdx]; + dpbDesc.ReconstructedPictureResourceIndex = dpbResIdx; + } + } + + // Number of allocations, disregarding if they are used or not, should not exceed this limit due to reuse policies on + // DPB items removal. + assert(m_rDPBStorageManager.get_number_of_tracked_allocations() <= (m_MaxDPBCapacity + 1)); +} + +void +d3d12_video_encoder_references_manager_h264::print_l0_l1_lists() +{ + if ((D3D12_DEBUG_VERBOSE & d3d12_debug) && + ((m_curFrameState.FrameType == D3D12_VIDEO_ENCODER_FRAME_TYPE_H264_P_FRAME) || + (m_curFrameState.FrameType == D3D12_VIDEO_ENCODER_FRAME_TYPE_H264_B_FRAME))) { + std::string list0ContentsString; + for (uint32_t idx = 0; idx < m_curFrameState.List0ReferenceFramesCount; idx++) { + uint32_t value = m_curFrameState.pList0ReferenceFrames[idx]; + list0ContentsString += "{ DPBidx: "; + list0ContentsString += std::to_string(value); + list0ContentsString += " - POC: "; + list0ContentsString += std::to_string( + m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors[value].PictureOrderCountNumber); + list0ContentsString += " - FrameDecodingOrderNumber: "; + list0ContentsString += std::to_string( + m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors[value].FrameDecodingOrderNumber); + list0ContentsString += "}\n"; + } + + debug_printf( + "[D3D12 Video Encoder Picture Manager H264] L0 list for frame with POC %d - frame_num (%d) is: \n %s \n", + m_curFrameState.PictureOrderCountNumber, + m_curFrameState.FrameDecodingOrderNumber, + list0ContentsString.c_str()); + + std::string list1ContentsString; + for (uint32_t idx = 0; idx < m_curFrameState.List1ReferenceFramesCount; idx++) { + uint32_t value = m_curFrameState.pList1ReferenceFrames[idx]; + list1ContentsString += "{ DPBidx: "; + list1ContentsString += std::to_string(value); + list1ContentsString += " - POC: "; + list1ContentsString += std::to_string( + m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors[value].PictureOrderCountNumber); + list1ContentsString += " - FrameDecodingOrderNumber: "; + list1ContentsString += std::to_string( + m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors[value].FrameDecodingOrderNumber); + list1ContentsString += "}\n"; + } + + debug_printf( + "[D3D12 Video Encoder Picture Manager H264] L1 list for frame with POC %d - frame_num (%d) is: \n %s \n", + m_curFrameState.PictureOrderCountNumber, + m_curFrameState.FrameDecodingOrderNumber, + list1ContentsString.c_str()); + } +} + +void +d3d12_video_encoder_references_manager_h264::print_dpb() +{ + if (D3D12_DEBUG_VERBOSE & d3d12_debug) { + std::string dpbContents; + for (uint32_t dpbResIdx = 0; + dpbResIdx < m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors.size(); + dpbResIdx++) { + auto &dpbDesc = m_CurrentFrameReferencesData.pReferenceFramesReconPictureDescriptors[dpbResIdx]; + auto dpbEntry = m_rDPBStorageManager.get_reference_frame(dpbDesc.ReconstructedPictureResourceIndex); + + dpbContents += "{ DPBidx: "; + dpbContents += std::to_string(dpbResIdx); + dpbContents += " - POC: "; + dpbContents += std::to_string(dpbDesc.PictureOrderCountNumber); + dpbContents += " - FrameDecodingOrderNumber: "; + dpbContents += std::to_string(dpbDesc.FrameDecodingOrderNumber); + dpbContents += " - DPBStorageIdx: "; + dpbContents += std::to_string(dpbDesc.ReconstructedPictureResourceIndex); + dpbContents += " - DPBStorageResourcePtr: "; + char strBuf[256]; + memset(&strBuf, '\0', 256); + sprintf(strBuf, "%p", dpbEntry.pReconstructedPicture); + dpbContents += std::string(strBuf); + dpbContents += " - DPBStorageSubresource: "; + dpbContents += std::to_string(dpbEntry.ReconstructedPictureSubresource); + dpbContents += "}\n"; + } + + debug_printf("[D3D12 Video Encoder Picture Manager H264] DPB has %d frames - DPB references for frame with POC " + "%d (frame_num: %d) are: \n %s \n", + m_rDPBStorageManager.get_number_of_pics_in_dpb(), + m_curFrameState.PictureOrderCountNumber, + m_curFrameState.FrameDecodingOrderNumber, + dpbContents.c_str()); + } +} + +// Advances state to next frame in GOP; subsequent calls to GetCurrentFrame* point to the advanced frame status +void +d3d12_video_encoder_references_manager_h264::end_frame() +{ + debug_printf("[D3D12 Video Encoder Picture Manager H264] %d resources IN USE out of a total of %d ALLOCATED " + "resources at end_frame for frame with POC: %d\n", + m_rDPBStorageManager.get_number_of_in_use_allocations(), + m_rDPBStorageManager.get_number_of_tracked_allocations(), + m_curFrameState.PictureOrderCountNumber); + + // Adds last used (if not null) get_current_frame_recon_pic_output_allocation to DPB for next EncodeFrame if + // necessary updates pReferenceFramesReconPictureDescriptors and updates the dpb storage + + update_fifo_dpb_push_front_cur_recon_pic(); +} + +bool +d3d12_video_encoder_references_manager_h264::is_current_frame_used_as_reference() +{ + return m_isCurrentFrameUsedAsReference; +} + +void +d3d12_video_encoder_references_manager_h264::begin_frame(D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA curFrameData, + bool bUsedAsReference) +{ + m_curFrameState = *curFrameData.pH264PicData; + m_isCurrentFrameUsedAsReference = bUsedAsReference; + debug_printf("Marking frame_num %d (POC %d) as reference ? %d\n", + curFrameData.pH264PicData->FrameDecodingOrderNumber, + curFrameData.pH264PicData->PictureOrderCountNumber, + bUsedAsReference); + + // Advance the GOP tracking state + bool isDPBFlushNeeded = (m_curFrameState.FrameType == D3D12_VIDEO_ENCODER_FRAME_TYPE_H264_IDR_FRAME); + if (isDPBFlushNeeded) { + reset_gop_tracking_and_dpb(); + } else { + // Get new allocation from DPB storage for reconstructed picture + // This is only necessary for the frames that come after an IDR + // since in the initial state already has this initialized + // and re-initialized by reset_gop_tracking_and_dpb above + + prepare_current_frame_recon_pic_allocation(); + } +} diff --git a/src/gallium/drivers/d3d12/d3d12_video_encoder_references_manager_h264.h b/src/gallium/drivers/d3d12/d3d12_video_encoder_references_manager_h264.h new file mode 100644 index 00000000000..c99db528aec --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_encoder_references_manager_h264.h @@ -0,0 +1,76 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_VIDEO_ENCODE_FIFO_REFERENCES_MANAGER_H264_H +#define D3D12_VIDEO_ENCODE_FIFO_REFERENCES_MANAGER_H264_H + +#include "d3d12_video_types.h" +#include "d3d12_video_encoder_references_manager.h" +#include "d3d12_video_dpb_storage_manager.h" + +class d3d12_video_encoder_references_manager_h264 : public d3d12_video_encoder_references_manager_interface +{ + public: + void end_frame(); + void begin_frame(D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA curFrameData, bool bUsedAsReference); + D3D12_VIDEO_ENCODER_RECONSTRUCTED_PICTURE get_current_frame_recon_pic_output_allocation(); + void get_current_frame_picture_control_data(D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA &codecAllocation); + bool is_current_frame_used_as_reference(); + D3D12_VIDEO_ENCODE_REFERENCE_FRAMES get_current_reference_frames(); + + d3d12_video_encoder_references_manager_h264(bool gopHasInterCodedFrames, + d3d12_video_dpb_storage_manager_interface &rDpbStorageManager, + uint32_t MaxDPBCapacity); + + ~d3d12_video_encoder_references_manager_h264() + { } + + private: + // Class helpers + void prepare_current_frame_recon_pic_allocation(); + void reset_gop_tracking_and_dpb(); + void update_fifo_dpb_push_front_cur_recon_pic(); + void print_dpb(); + void print_l0_l1_lists(); + + // Class members + + uint32_t m_MaxDPBCapacity = 0; + + struct current_frame_references_data + { + std::vector pReferenceFramesReconPictureDescriptors; + D3D12_VIDEO_ENCODER_RECONSTRUCTED_PICTURE ReconstructedPicTexture; + }; + + d3d12_video_dpb_storage_manager_interface &m_rDPBStorageManager; + + current_frame_references_data m_CurrentFrameReferencesData; + + bool m_gopHasInterFrames = false; + + bool m_isCurrentFrameUsedAsReference = false; + D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA_H264 m_curFrameState = {}; +}; + +#endif diff --git a/src/gallium/drivers/d3d12/meson.build b/src/gallium/drivers/d3d12/meson.build index 78538956071..d95045aa6f8 100644 --- a/src/gallium/drivers/d3d12/meson.build +++ b/src/gallium/drivers/d3d12/meson.build @@ -51,6 +51,12 @@ files_libd3d12 = files( 'd3d12_video_dec_references_mgr.cpp', 'd3d12_video_dec_h264.cpp', 'd3d12_video_buffer.cpp', + 'd3d12_video_enc.cpp', + 'd3d12_video_enc_h264.cpp', + 'd3d12_video_encoder_references_manager_h264.cpp', + 'd3d12_video_encoder_nalu_writer_h264.cpp', + 'd3d12_video_encoder_bitstream_builder_h264.cpp', + 'd3d12_video_encoder_bitstream.cpp', 'd3d12_video_texture_array_dpb_manager.cpp', 'd3d12_video_array_of_textures_dpb_manager.cpp', )