d3d12: Video Decode - Implement get_decoder_fence and async queing

Reviewed-by: Jesse Natalie <jenatali@microsoft.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23969>
This commit is contained in:
Sil Vilerino 2023-06-30 15:44:36 -04:00 committed by Marge Bot
parent fb1783616e
commit af0b4eacab
2 changed files with 321 additions and 158 deletions

View file

@ -40,6 +40,12 @@
#include "util/u_memory.h" #include "util/u_memory.h"
#include "util/u_video.h" #include "util/u_video.h"
uint64_t
d3d12_video_decoder_pool_current_index(struct d3d12_video_decoder *pD3D12Dec)
{
return pD3D12Dec->m_fenceValue % D3D12_VIDEO_DEC_ASYNC_DEPTH;
}
struct pipe_video_codec * struct pipe_video_codec *
d3d12_video_create_decoder(struct pipe_context *context, const struct pipe_video_codec *codec) d3d12_video_create_decoder(struct pipe_context *context, const struct pipe_video_codec *codec)
{ {
@ -51,6 +57,8 @@ d3d12_video_create_decoder(struct pipe_context *context, const struct pipe_video
// Not using new doesn't call ctor and the initializations in the class declaration are lost // Not using new doesn't call ctor and the initializations in the class declaration are lost
struct d3d12_video_decoder *pD3D12Dec = new d3d12_video_decoder; struct d3d12_video_decoder *pD3D12Dec = new d3d12_video_decoder;
pD3D12Dec->m_inflightResourcesPool.resize(D3D12_VIDEO_DEC_ASYNC_DEPTH, { 0 });
pD3D12Dec->base = *codec; pD3D12Dec->base = *codec;
pD3D12Dec->m_screen = context->screen; pD3D12Dec->m_screen = context->screen;
@ -137,9 +145,14 @@ d3d12_video_decoder_destroy(struct pipe_video_codec *codec)
return; return;
} }
d3d12_video_decoder_flush(codec); // Flush pending work before destroying. // Flush pending work before destroying.
struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec; struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
uint64_t curBatchFence = pD3D12Dec->m_fenceValue;
if (pD3D12Dec->m_needsGPUFlush)
{
d3d12_video_decoder_flush(codec);
d3d12_video_decoder_sync_completion(codec, curBatchFence, OS_TIMEOUT_INFINITE);
}
// //
// Destroys a decoder // Destroys a decoder
@ -173,6 +186,26 @@ d3d12_video_decoder_begin_frame(struct pipe_video_codec *codec,
// d3d12_video_decoder_decode_bitstream // d3d12_video_decoder_decode_bitstream
struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec; struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
assert(pD3D12Dec); assert(pD3D12Dec);
///
/// Wait here to make sure the next in flight resource set is empty before using it
///
uint64_t fenceValueToWaitOn = static_cast<uint64_t>(std::max(static_cast<int64_t>(0l), static_cast<int64_t>(pD3D12Dec->m_fenceValue) - static_cast<int64_t>(D3D12_VIDEO_DEC_ASYNC_DEPTH) ));
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_begin_frame Waiting for completion of in flight resource sets with previous work with fenceValue: %" PRIu64 "\n",
fenceValueToWaitOn);
ASSERTED bool wait_res = d3d12_video_decoder_sync_completion(codec, fenceValueToWaitOn, OS_TIMEOUT_INFINITE);
assert(wait_res);
HRESULT hr = pD3D12Dec->m_spDecodeCommandList->Reset(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_spCommandAllocator.Get());
if (FAILED(hr)) {
debug_printf(
"[d3d12_video_decoder] resetting ID3D12GraphicsCommandList failed with HR %x\n",
hr);
assert(false);
}
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_begin_frame finalized for fenceValue: %d\n", debug_printf("[d3d12_video_decoder] d3d12_video_decoder_begin_frame finalized for fenceValue: %d\n",
pD3D12Dec->m_fenceValue); pD3D12Dec->m_fenceValue);
} }
@ -275,14 +308,14 @@ d3d12_video_decoder_decode_bitstream(struct pipe_video_codec *codec,
} }
// Bytes of data pre-staged before this decode_frame call // Bytes of data pre-staged before this decode_frame call
size_t preStagedDataSize = pD3D12Dec->m_stagingDecodeBitstream.size(); size_t preStagedDataSize = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_stagingDecodeBitstream.size();
// Extend the staging buffer size, as decode_frame can be called several times before end_frame // Extend the staging buffer size, as decode_frame can be called several times before end_frame
pD3D12Dec->m_stagingDecodeBitstream.resize(preStagedDataSize + totalReceivedBuffersSize); pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_stagingDecodeBitstream.resize(preStagedDataSize + totalReceivedBuffersSize);
// Point newSliceDataPositionDstBase to the end of the pre-staged data in m_stagingDecodeBitstream, where the new // Point newSliceDataPositionDstBase to the end of the pre-staged data in m_stagingDecodeBitstream, where the new
// buffers will be appended // buffers will be appended
uint8_t *newSliceDataPositionDstBase = pD3D12Dec->m_stagingDecodeBitstream.data() + preStagedDataSize; uint8_t *newSliceDataPositionDstBase = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_stagingDecodeBitstream.data() + preStagedDataSize;
// Append new data at the end. // Append new data at the end.
size_t dstOffset = 0u; size_t dstOffset = 0u;
@ -363,28 +396,28 @@ d3d12_video_decoder_end_frame(struct pipe_video_codec *codec,
/// ///
d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(pD3D12Dec, picture, pD3D12VideoBuffer); d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(pD3D12Dec, picture, pD3D12VideoBuffer);
assert(pD3D12Dec->m_picParamsBuffer.size() > 0); assert(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_picParamsBuffer.size() > 0);
/// ///
/// Prepare Slice control buffers before clearing staging buffer /// Prepare Slice control buffers before clearing staging buffer
/// ///
assert(pD3D12Dec->m_stagingDecodeBitstream.size() > 0); // Make sure the staging wasn't cleared yet in end_frame assert(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_stagingDecodeBitstream.size() > 0); // Make sure the staging wasn't cleared yet in end_frame
d3d12_video_decoder_prepare_dxva_slices_control(pD3D12Dec, picture); d3d12_video_decoder_prepare_dxva_slices_control(pD3D12Dec, picture);
assert(pD3D12Dec->m_SliceControlBuffer.size() > 0); assert(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_SliceControlBuffer.size() > 0);
/// ///
/// Upload m_stagingDecodeBitstream to GPU memory now that end_frame is called and clear staging buffer /// Upload m_stagingDecodeBitstream to GPU memory now that end_frame is called and clear staging buffer
/// ///
uint64_t sliceDataStagingBufferSize = pD3D12Dec->m_stagingDecodeBitstream.size(); uint64_t sliceDataStagingBufferSize = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_stagingDecodeBitstream.size();
uint8_t *sliceDataStagingBufferPtr = pD3D12Dec->m_stagingDecodeBitstream.data(); uint8_t *sliceDataStagingBufferPtr = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_stagingDecodeBitstream.data();
// Reallocate if necessary to accomodate the current frame bitstream buffer in GPU memory // Reallocate if necessary to accomodate the current frame bitstream buffer in GPU memory
if (pD3D12Dec->m_curFrameCompressedBitstreamBufferAllocatedSize < sliceDataStagingBufferSize) { if (pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_curFrameCompressedBitstreamBufferAllocatedSize < sliceDataStagingBufferSize) {
if (!d3d12_video_decoder_create_staging_bitstream_buffer(pD3D12Screen, pD3D12Dec, sliceDataStagingBufferSize)) { if (!d3d12_video_decoder_create_staging_bitstream_buffer(pD3D12Screen, pD3D12Dec, sliceDataStagingBufferSize)) {
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Failure on " debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Failure on "
"d3d12_video_decoder_create_staging_bitstream_buffer\n"); "d3d12_video_decoder_create_staging_bitstream_buffer\n");
debug_printf("[d3d12_video_encoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n", debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n",
pD3D12Dec->m_fenceValue); pD3D12Dec->m_fenceValue);
assert(false); assert(false);
return; return;
@ -392,41 +425,33 @@ d3d12_video_decoder_end_frame(struct pipe_video_codec *codec,
} }
// Upload frame bitstream CPU data to ID3D12Resource buffer // Upload frame bitstream CPU data to ID3D12Resource buffer
pD3D12Dec->m_curFrameCompressedBitstreamBufferPayloadSize = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_curFrameCompressedBitstreamBufferPayloadSize =
sliceDataStagingBufferSize; // This can be less than m_curFrameCompressedBitstreamBufferAllocatedSize. sliceDataStagingBufferSize; // This can be less than m_curFrameCompressedBitstreamBufferAllocatedSize.
assert(pD3D12Dec->m_curFrameCompressedBitstreamBufferPayloadSize <= assert(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_curFrameCompressedBitstreamBufferPayloadSize <=
pD3D12Dec->m_curFrameCompressedBitstreamBufferAllocatedSize); pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_curFrameCompressedBitstreamBufferAllocatedSize);
/* One-shot transfer operation with data supplied in a user /* One-shot transfer operation with data supplied in a user
* pointer. * pointer.
*/ */
pipe_resource *pPipeCompressedBufferObj = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].pPipeCompressedBufferObj =
d3d12_resource_from_resource(&pD3D12Screen->base, pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Get()); d3d12_resource_from_resource(&pD3D12Screen->base, pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_curFrameCompressedBitstreamBuffer.Get());
assert(pPipeCompressedBufferObj); assert(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].pPipeCompressedBufferObj);
pD3D12Dec->base.context->buffer_subdata(pD3D12Dec->base.context, // context pD3D12Dec->base.context->buffer_subdata(pD3D12Dec->base.context, // context
pPipeCompressedBufferObj, // dst buffer pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].pPipeCompressedBufferObj, // dst buffer
PIPE_MAP_WRITE, // usage PIPE_MAP_x PIPE_MAP_WRITE, // usage PIPE_MAP_x
0, // offset 0, // offset
sizeof(*sliceDataStagingBufferPtr) * sliceDataStagingBufferSize, // size sizeof(*sliceDataStagingBufferPtr) * sliceDataStagingBufferSize, // size
sliceDataStagingBufferPtr // data sliceDataStagingBufferPtr // data
); );
// Flush buffer_subdata batch and wait on this CPU thread for GPU work completion // Flush buffer_subdata batch
// before deleting the source CPU buffer below // before deleting the source CPU buffer below
struct pipe_fence_handle *pUploadGPUCompletionFence = NULL;
pD3D12Dec->base.context->flush(pD3D12Dec->base.context,
&pUploadGPUCompletionFence,
PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH);
assert(pUploadGPUCompletionFence);
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Waiting on GPU completion fence for "
"buffer_subdata to upload compressed bitstream.\n");
pD3D12Screen->base.fence_finish(&pD3D12Screen->base, NULL, pUploadGPUCompletionFence, OS_TIMEOUT_INFINITE);
pD3D12Screen->base.fence_reference(&pD3D12Screen->base, &pUploadGPUCompletionFence, NULL);
pipe_resource_reference(&pPipeCompressedBufferObj, NULL);
// [After buffer_subdata GPU work is finished] Clear CPU staging buffer now that end_frame is called and was uploaded pD3D12Dec->base.context->flush(pD3D12Dec->base.context,
// to GPU for DecodeFrame call. &pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_pBitstreamUploadGPUCompletionFence,
pD3D12Dec->m_stagingDecodeBitstream.resize(0); PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH);
assert(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_pBitstreamUploadGPUCompletionFence);
// To be waited on GPU fence before flushing current frame DecodeFrame to GPU
/// ///
/// Proceed to record the GPU Decode commands /// Proceed to record the GPU Decode commands
@ -442,14 +467,14 @@ d3d12_video_decoder_end_frame(struct pipe_video_codec *codec,
// Translate input D3D12 structure // Translate input D3D12 structure
D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS d3d12InputArguments = {}; D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS d3d12InputArguments = {};
d3d12InputArguments.CompressedBitstream.pBuffer = pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Get(); d3d12InputArguments.CompressedBitstream.pBuffer = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_curFrameCompressedBitstreamBuffer.Get();
d3d12InputArguments.CompressedBitstream.Offset = 0u; d3d12InputArguments.CompressedBitstream.Offset = 0u;
ASSERTED constexpr uint64_t d3d12BitstreamOffsetAlignment = ASSERTED constexpr uint64_t d3d12BitstreamOffsetAlignment =
128u; // specified in 128u; // specified in
// https://docs.microsoft.com/en-us/windows/win32/api/d3d12video/ne-d3d12video-d3d12_video_decode_tier // https://docs.microsoft.com/en-us/windows/win32/api/d3d12video/ne-d3d12video-d3d12_video_decode_tier
assert((d3d12InputArguments.CompressedBitstream.Offset == 0) || assert((d3d12InputArguments.CompressedBitstream.Offset == 0) ||
((d3d12InputArguments.CompressedBitstream.Offset % d3d12BitstreamOffsetAlignment) == 0)); ((d3d12InputArguments.CompressedBitstream.Offset % d3d12BitstreamOffsetAlignment) == 0));
d3d12InputArguments.CompressedBitstream.Size = pD3D12Dec->m_curFrameCompressedBitstreamBufferPayloadSize; d3d12InputArguments.CompressedBitstream.Size = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_curFrameCompressedBitstreamBufferPayloadSize;
D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = { D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = {
CD3DX12_RESOURCE_BARRIER::Transition(d3d12InputArguments.CompressedBitstream.pBuffer, CD3DX12_RESOURCE_BARRIER::Transition(d3d12InputArguments.CompressedBitstream.pBuffer,
@ -488,7 +513,7 @@ d3d12_video_decoder_end_frame(struct pipe_video_codec *codec,
requestedConversionArguments)) { requestedConversionArguments)) {
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Failure on " debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Failure on "
"d3d12_video_decoder_prepare_for_decode_frame\n"); "d3d12_video_decoder_prepare_for_decode_frame\n");
debug_printf("[d3d12_video_encoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n", debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n",
pD3D12Dec->m_fenceValue); pD3D12Dec->m_fenceValue);
assert(false); assert(false);
return; return;
@ -502,25 +527,25 @@ d3d12_video_decoder_end_frame(struct pipe_video_codec *codec,
1u; // Only the codec data received from the above layer with picture params 1u; // Only the codec data received from the above layer with picture params
d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = { d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = {
D3D12_VIDEO_DECODE_ARGUMENT_TYPE_PICTURE_PARAMETERS, D3D12_VIDEO_DECODE_ARGUMENT_TYPE_PICTURE_PARAMETERS,
static_cast<uint32_t>(pD3D12Dec->m_picParamsBuffer.size()), static_cast<uint32_t>(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_picParamsBuffer.size()),
pD3D12Dec->m_picParamsBuffer.data(), pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_picParamsBuffer.data(),
}; };
if (pD3D12Dec->m_SliceControlBuffer.size() > 0) { if (pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_SliceControlBuffer.size() > 0) {
d3d12InputArguments.NumFrameArguments++; d3d12InputArguments.NumFrameArguments++;
d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = { d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = {
D3D12_VIDEO_DECODE_ARGUMENT_TYPE_SLICE_CONTROL, D3D12_VIDEO_DECODE_ARGUMENT_TYPE_SLICE_CONTROL,
static_cast<uint32_t>(pD3D12Dec->m_SliceControlBuffer.size()), static_cast<uint32_t>(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_SliceControlBuffer.size()),
pD3D12Dec->m_SliceControlBuffer.data(), pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_SliceControlBuffer.data(),
}; };
} }
if (pD3D12Dec->qp_matrix_frame_argument_enabled && (pD3D12Dec->m_InverseQuantMatrixBuffer.size() > 0)) { if (pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].qp_matrix_frame_argument_enabled && (pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_InverseQuantMatrixBuffer.size() > 0)) {
d3d12InputArguments.NumFrameArguments++; d3d12InputArguments.NumFrameArguments++;
d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = { d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = {
D3D12_VIDEO_DECODE_ARGUMENT_TYPE_INVERSE_QUANTIZATION_MATRIX, D3D12_VIDEO_DECODE_ARGUMENT_TYPE_INVERSE_QUANTIZATION_MATRIX,
static_cast<uint32_t>(pD3D12Dec->m_InverseQuantMatrixBuffer.size()), static_cast<uint32_t>(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_InverseQuantMatrixBuffer.size()),
pD3D12Dec->m_InverseQuantMatrixBuffer.data(), pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_InverseQuantMatrixBuffer.data(),
}; };
} }
@ -608,13 +633,27 @@ d3d12_video_decoder_end_frame(struct pipe_video_codec *codec,
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame finalized for fenceValue: %d\n", debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame finalized for fenceValue: %d\n",
pD3D12Dec->m_fenceValue); pD3D12Dec->m_fenceValue);
// Save extra references of Decoder, DecoderHeap and DPB allocations in case
// there's a reconfiguration that trigers the construction of new objects
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_spDecoder = pD3D12Dec->m_spVideoDecoder;
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_spDecoderHeap = pD3D12Dec->m_spVideoDecoderHeap;
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_References = pD3D12Dec->m_spDPBManager;
/// ///
/// Flush work to the GPU and blocking wait until decode finishes /// Flush work to the GPU
/// ///
pD3D12Dec->m_needsGPUFlush = true; pD3D12Dec->m_needsGPUFlush = true;
d3d12_video_decoder_flush(codec); d3d12_video_decoder_flush(codec);
// Call to d3d12_video_decoder_flush increases m_FenceValue
uint64_t inflightIndexBeforeFlush = (pD3D12Dec->m_fenceValue - 1u) % D3D12_VIDEO_DEC_ASYNC_DEPTH;
if (!pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()) { if (pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation())
{
// No need to copy, the output surface fence is merely the decode queue fence
*picture->fence = (pipe_fence_handle*) &pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_FenceData;
}
else
{
/// ///
/// If !pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation() /// If !pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()
/// We cannot use the standalone video buffer allocation directly and we must use instead /// We cannot use the standalone video buffer allocation directly and we must use instead
@ -631,8 +670,13 @@ d3d12_video_decoder_end_frame(struct pipe_video_codec *codec,
d3d12_resource_from_resource(&pD3D12Screen->base, d3d12OutputArguments.pOutputTexture2D); d3d12_resource_from_resource(&pD3D12Screen->base, d3d12OutputArguments.pOutputTexture2D);
assert(pPipeSrc); assert(pPipeSrc);
// Copy all format subresources/texture planes // GPU wait on the graphics context which will do the copy until the decode finishes
pD3D12Screen->cmdqueue->Wait(
pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_FenceData.cmdqueue_fence,
pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_FenceData.value
);
// Copy all format subresources/texture planes
for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) { for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
assert(d3d12OutputArguments.OutputSubresource < INT16_MAX); assert(d3d12OutputArguments.OutputSubresource < INT16_MAX);
struct pipe_box box = { 0, struct pipe_box box = { 0,
@ -653,22 +697,12 @@ d3d12_video_decoder_end_frame(struct pipe_video_codec *codec,
0, // src level 0, // src level
&box); &box);
} }
// Flush resource_copy_region batch and wait on this CPU thread for GPU work completion // Flush resource_copy_region batch
struct pipe_fence_handle *completion_fence = NULL; // The output surface fence is the graphics queue that will signal after the copy ends
pD3D12Dec->base.context->flush(pD3D12Dec->base.context, pD3D12Dec->base.context->flush(pD3D12Dec->base.context,
&completion_fence, picture->fence,
PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH); PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH);
assert(completion_fence);
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Waiting on GPU completion fence for "
"resource_copy_region on decoded frame.\n");
pD3D12Screen->base.fence_finish(&pD3D12Screen->base, NULL, completion_fence, OS_TIMEOUT_INFINITE);
pD3D12Screen->base.fence_reference(&pD3D12Screen->base, &completion_fence, NULL);
pipe_resource_reference(&pPipeSrc, NULL);
} }
// We do not use the async fence for now but set it to
// NULL to avoid uninitialized memory in VA frontend
*picture->fence = NULL;
} }
/** /**
@ -678,16 +712,15 @@ int d3d12_video_decoder_get_decoder_fence(struct pipe_video_codec *codec,
struct pipe_fence_handle *fence, struct pipe_fence_handle *fence,
uint64_t timeout) uint64_t timeout)
{ {
/* No need to wait for anything, we're already flushing struct d3d12_fence *fenceValueToWaitOn = (struct d3d12_fence *) fence;
and waiting in d3d12_video_decoder_end_frame */ assert(fenceValueToWaitOn);
// We set NULL in d3d12_video_decoder_end_frame ASSERTED bool wait_res = d3d12_video_decoder_sync_completion(codec, fenceValueToWaitOn->value, timeout);
assert(fence == NULL);
// Return semantics based on p_video_codec interface // Return semantics based on p_video_codec interface
// ret == 0 -> Decode in progress // ret == 0 -> Decode in progress
// ret != 0 -> Decode completed // ret != 0 -> Decode completed
return 1; return wait_res ? 1 : 0;
} }
/** /**
@ -717,9 +750,6 @@ d3d12_video_decoder_flush(struct pipe_video_codec *codec)
goto flush_fail; goto flush_fail;
} }
// Close and execute command list and wait for idle on CPU blocking
// this method before resetting list and allocator for next submission.
if (pD3D12Dec->m_transitionsBeforeCloseCmdList.size() > 0) { if (pD3D12Dec->m_transitionsBeforeCloseCmdList.size() > 0) {
pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(pD3D12Dec->m_transitionsBeforeCloseCmdList.size(), pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(pD3D12Dec->m_transitionsBeforeCloseCmdList.size(),
pD3D12Dec->m_transitionsBeforeCloseCmdList.data()); pD3D12Dec->m_transitionsBeforeCloseCmdList.data());
@ -733,28 +763,10 @@ d3d12_video_decoder_flush(struct pipe_video_codec *codec)
} }
ID3D12CommandList *ppCommandLists[1] = { pD3D12Dec->m_spDecodeCommandList.Get() }; ID3D12CommandList *ppCommandLists[1] = { pD3D12Dec->m_spDecodeCommandList.Get() };
struct d3d12_fence* pUploadBitstreamFence = d3d12_fence(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_pBitstreamUploadGPUCompletionFence);
pD3D12Dec->m_spDecodeCommandQueue->Wait(pUploadBitstreamFence->cmdqueue_fence, pUploadBitstreamFence->value);
pD3D12Dec->m_spDecodeCommandQueue->ExecuteCommandLists(1, ppCommandLists); pD3D12Dec->m_spDecodeCommandQueue->ExecuteCommandLists(1, ppCommandLists);
pD3D12Dec->m_spDecodeCommandQueue->Signal(pD3D12Dec->m_spFence.Get(), pD3D12Dec->m_fenceValue); pD3D12Dec->m_spDecodeCommandQueue->Signal(pD3D12Dec->m_spFence.Get(), pD3D12Dec->m_fenceValue);
pD3D12Dec->m_spFence->SetEventOnCompletion(pD3D12Dec->m_fenceValue, nullptr);
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush - ExecuteCommandLists finished on signal with "
"fenceValue: %d\n",
pD3D12Dec->m_fenceValue);
hr = pD3D12Dec->m_spCommandAllocator->Reset();
if (FAILED(hr)) {
debug_printf(
"[d3d12_video_decoder] d3d12_video_decoder_flush - resetting ID3D12CommandAllocator failed with HR %x\n",
hr);
goto flush_fail;
}
hr = pD3D12Dec->m_spDecodeCommandList->Reset(pD3D12Dec->m_spCommandAllocator.Get());
if (FAILED(hr)) {
debug_printf(
"[d3d12_video_decoder] d3d12_video_decoder_flush - resetting ID3D12GraphicsCommandList failed with HR %x\n",
hr);
goto flush_fail;
}
// Validate device was not removed // Validate device was not removed
hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason(); hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason();
@ -766,9 +778,13 @@ d3d12_video_decoder_flush(struct pipe_video_codec *codec)
goto flush_fail; goto flush_fail;
} }
debug_printf( // Set async fence info
"[d3d12_video_decoder] d3d12_video_decoder_flush - GPU signaled execution finalized for fenceValue: %d\n", memset(&pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_FenceData,
pD3D12Dec->m_fenceValue); 0,
sizeof(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_FenceData));
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_FenceData.value = pD3D12Dec->m_fenceValue;
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_FenceData.cmdqueue_fence = pD3D12Dec->m_spFence.Get();
pD3D12Dec->m_fenceValue++; pD3D12Dec->m_fenceValue++;
pD3D12Dec->m_needsGPUFlush = false; pD3D12Dec->m_needsGPUFlush = false;
@ -804,20 +820,31 @@ d3d12_video_decoder_create_command_objects(const struct d3d12_screen *pD3D12Scre
return false; return false;
} }
hr = pD3D12Screen->dev->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE, for (auto& inputResource : pD3D12Dec->m_inflightResourcesPool)
IID_PPV_ARGS(pD3D12Dec->m_spCommandAllocator.GetAddressOf())); {
if (FAILED(hr)) { hr = pD3D12Dec->m_pD3D12Screen->dev->CreateCommandAllocator(
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to " D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
"CreateCommandAllocator failed with HR %x\n", IID_PPV_ARGS(inputResource.m_spCommandAllocator.GetAddressOf()));
hr); if (FAILED(hr)) {
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to "
"CreateCommandAllocator failed with HR %x\n",
hr);
return false;
}
}
ComPtr<ID3D12Device4> spD3D12Device4;
if (FAILED(pD3D12Dec->m_pD3D12Screen->dev->QueryInterface(
IID_PPV_ARGS(spD3D12Device4.GetAddressOf())))) {
debug_printf(
"[d3d12_video_decoder] d3d12_video_decoder_create_decoder - D3D12 Device has no ID3D12Device4 support\n");
return false; return false;
} }
hr = pD3D12Screen->dev->CreateCommandList(0, hr = spD3D12Device4->CreateCommandList1(0,
D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE, D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
pD3D12Dec->m_spCommandAllocator.Get(), D3D12_COMMAND_LIST_FLAG_NONE,
nullptr, IID_PPV_ARGS(pD3D12Dec->m_spDecodeCommandList.GetAddressOf()));
IID_PPV_ARGS(pD3D12Dec->m_spDecodeCommandList.GetAddressOf()));
if (FAILED(hr)) { if (FAILED(hr)) {
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateCommandList " debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateCommandList "
@ -922,8 +949,8 @@ d3d12_video_decoder_create_staging_bitstream_buffer(const struct d3d12_screen *p
{ {
assert(pD3D12Dec->m_spD3D12VideoDevice); assert(pD3D12Dec->m_spD3D12VideoDevice);
if (pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Get() != nullptr) { if (pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_curFrameCompressedBitstreamBuffer.Get() != nullptr) {
pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Reset(); pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_curFrameCompressedBitstreamBuffer.Reset();
} }
auto descHeap = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, pD3D12Dec->m_NodeMask, pD3D12Dec->m_NodeMask); auto descHeap = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, pD3D12Dec->m_NodeMask, pD3D12Dec->m_NodeMask);
@ -934,7 +961,7 @@ d3d12_video_decoder_create_staging_bitstream_buffer(const struct d3d12_screen *p
&descResource, &descResource,
D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COMMON,
nullptr, nullptr,
IID_PPV_ARGS(pD3D12Dec->m_curFrameCompressedBitstreamBuffer.GetAddressOf())); IID_PPV_ARGS(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_curFrameCompressedBitstreamBuffer.GetAddressOf()));
if (FAILED(hr)) { if (FAILED(hr)) {
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_staging_bitstream_buffer - " debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_staging_bitstream_buffer - "
"CreateCommittedResource failed with HR %x\n", "CreateCommittedResource failed with HR %x\n",
@ -942,7 +969,7 @@ d3d12_video_decoder_create_staging_bitstream_buffer(const struct d3d12_screen *p
return false; return false;
} }
pD3D12Dec->m_curFrameCompressedBitstreamBufferAllocatedSize = bufSize; pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_curFrameCompressedBitstreamBufferAllocatedSize = bufSize;
return true; return true;
} }
@ -1252,7 +1279,7 @@ d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(
d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(codec->base.profile); d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(codec->base.profile);
ID3D12Resource *pPipeD3D12DstResource = d3d12_resource_resource(pD3D12VideoBuffer->texture); ID3D12Resource *pPipeD3D12DstResource = d3d12_resource_resource(pD3D12VideoBuffer->texture);
D3D12_RESOURCE_DESC outputResourceDesc = GetDesc(pPipeD3D12DstResource); D3D12_RESOURCE_DESC outputResourceDesc = GetDesc(pPipeD3D12DstResource);
pD3D12Dec->qp_matrix_frame_argument_enabled = false; pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].qp_matrix_frame_argument_enabled = false;
switch (profileType) { switch (profileType) {
case d3d12_video_decode_profile_type_h264: case d3d12_video_decode_profile_type_h264:
{ {
@ -1273,7 +1300,7 @@ d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(
DXVA_Qmatrix_H264 dxvaQmatrixH264 = {}; DXVA_Qmatrix_H264 dxvaQmatrixH264 = {};
d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_h264((pipe_h264_picture_desc *) picture, d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_h264((pipe_h264_picture_desc *) picture,
dxvaQmatrixH264); dxvaQmatrixH264);
pD3D12Dec->qp_matrix_frame_argument_enabled = true; // We don't have a way of knowing from the pipe params so send always pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].qp_matrix_frame_argument_enabled = true; // We don't have a way of knowing from the pipe params so send always
d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(codec, &dxvaQmatrixH264, dxvaQMatrixBufferSize); d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(codec, &dxvaQmatrixH264, dxvaQMatrixBufferSize);
} break; } break;
@ -1292,10 +1319,10 @@ d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(
size_t dxvaQMatrixBufferSize = sizeof(DXVA_Qmatrix_HEVC); size_t dxvaQMatrixBufferSize = sizeof(DXVA_Qmatrix_HEVC);
DXVA_Qmatrix_HEVC dxvaQmatrixHEVC = {}; DXVA_Qmatrix_HEVC dxvaQmatrixHEVC = {};
pD3D12Dec->qp_matrix_frame_argument_enabled = false; pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].qp_matrix_frame_argument_enabled = false;
d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_hevc((pipe_h265_picture_desc *) picture, d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_hevc((pipe_h265_picture_desc *) picture,
dxvaQmatrixHEVC, dxvaQmatrixHEVC,
pD3D12Dec->qp_matrix_frame_argument_enabled); pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].qp_matrix_frame_argument_enabled);
d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(codec, &dxvaQmatrixHEVC, dxvaQMatrixBufferSize); d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(codec, &dxvaQmatrixHEVC, dxvaQMatrixBufferSize);
} break; } break;
case d3d12_video_decode_profile_type_av1: case d3d12_video_decode_profile_type_av1:
@ -1310,7 +1337,7 @@ d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(
d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec, d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec,
&dxvaPicParamsAV1, &dxvaPicParamsAV1,
dxvaPicParamsBufferSize); dxvaPicParamsBufferSize);
pD3D12Dec->qp_matrix_frame_argument_enabled = false; pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].qp_matrix_frame_argument_enabled = false;
} break; } break;
case d3d12_video_decode_profile_type_vp9: case d3d12_video_decode_profile_type_vp9:
{ {
@ -1324,7 +1351,7 @@ d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(
d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec, d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec,
&dxvaPicParamsVP9, &dxvaPicParamsVP9,
dxvaPicParamsBufferSize); dxvaPicParamsBufferSize);
pD3D12Dec->qp_matrix_frame_argument_enabled = false; pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].qp_matrix_frame_argument_enabled = false;
} break; } break;
default: default:
{ {
@ -1344,20 +1371,20 @@ d3d12_video_decoder_prepare_dxva_slices_control(
switch (profileType) { switch (profileType) {
case d3d12_video_decode_profile_type_h264: case d3d12_video_decode_profile_type_h264:
{ {
d3d12_video_decoder_prepare_dxva_slices_control_h264(pD3D12Dec, pD3D12Dec->m_SliceControlBuffer, (struct pipe_h264_picture_desc*) picture); d3d12_video_decoder_prepare_dxva_slices_control_h264(pD3D12Dec, pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_SliceControlBuffer, (struct pipe_h264_picture_desc*) picture);
} break; } break;
case d3d12_video_decode_profile_type_hevc: case d3d12_video_decode_profile_type_hevc:
{ {
d3d12_video_decoder_prepare_dxva_slices_control_hevc(pD3D12Dec, pD3D12Dec->m_SliceControlBuffer, (struct pipe_h265_picture_desc*) picture); d3d12_video_decoder_prepare_dxva_slices_control_hevc(pD3D12Dec, pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_SliceControlBuffer, (struct pipe_h265_picture_desc*) picture);
} break; } break;
case d3d12_video_decode_profile_type_av1: case d3d12_video_decode_profile_type_av1:
{ {
d3d12_video_decoder_prepare_dxva_slices_control_av1(pD3D12Dec, pD3D12Dec->m_SliceControlBuffer, (struct pipe_av1_picture_desc*) picture); d3d12_video_decoder_prepare_dxva_slices_control_av1(pD3D12Dec, pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_SliceControlBuffer, (struct pipe_av1_picture_desc*) picture);
} break; } break;
case d3d12_video_decode_profile_type_vp9: case d3d12_video_decode_profile_type_vp9:
{ {
d3d12_video_decoder_prepare_dxva_slices_control_vp9(pD3D12Dec, pD3D12Dec->m_SliceControlBuffer, (struct pipe_vp9_picture_desc*) picture); d3d12_video_decoder_prepare_dxva_slices_control_vp9(pD3D12Dec, pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_SliceControlBuffer, (struct pipe_vp9_picture_desc*) picture);
} break; } break;
default: default:
@ -1372,12 +1399,12 @@ d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(struct d3d12_video_deco
void *pDXVAStruct, void *pDXVAStruct,
uint64_t DXVAStructSize) uint64_t DXVAStructSize)
{ {
if (pD3D12Dec->m_InverseQuantMatrixBuffer.capacity() < DXVAStructSize) { if (pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_InverseQuantMatrixBuffer.capacity() < DXVAStructSize) {
pD3D12Dec->m_InverseQuantMatrixBuffer.reserve(DXVAStructSize); pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_InverseQuantMatrixBuffer.reserve(DXVAStructSize);
} }
pD3D12Dec->m_InverseQuantMatrixBuffer.resize(DXVAStructSize); pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_InverseQuantMatrixBuffer.resize(DXVAStructSize);
memcpy(pD3D12Dec->m_InverseQuantMatrixBuffer.data(), pDXVAStruct, DXVAStructSize); memcpy(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_InverseQuantMatrixBuffer.data(), pDXVAStruct, DXVAStructSize);
} }
void void
@ -1385,12 +1412,12 @@ d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(struct d3d12_video_
void *pDXVAStruct, void *pDXVAStruct,
uint64_t DXVAStructSize) uint64_t DXVAStructSize)
{ {
if (pD3D12Dec->m_picParamsBuffer.capacity() < DXVAStructSize) { if (pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_picParamsBuffer.capacity() < DXVAStructSize) {
pD3D12Dec->m_picParamsBuffer.reserve(DXVAStructSize); pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_picParamsBuffer.reserve(DXVAStructSize);
} }
pD3D12Dec->m_picParamsBuffer.resize(DXVAStructSize); pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_picParamsBuffer.resize(DXVAStructSize);
memcpy(pD3D12Dec->m_picParamsBuffer.data(), pDXVAStruct, DXVAStructSize); memcpy(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_picParamsBuffer.data(), pDXVAStruct, DXVAStructSize);
} }
bool bool
@ -1506,3 +1533,102 @@ d3d12_video_decoder_resolve_profile(d3d12_video_decode_profile_type profileType,
} break; } break;
} }
} }
bool
d3d12_video_decoder_ensure_fence_finished(struct pipe_video_codec *codec, uint64_t fenceValueToWaitOn, uint64_t timeout_ns)
{
bool wait_result = true;
struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
HRESULT hr = S_OK;
uint64_t completedValue = pD3D12Dec->m_spFence->GetCompletedValue();
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - Waiting for fence (with timeout_ns %" PRIu64 ") to finish with "
"fenceValue: %" PRIu64 " - Current Fence Completed Value %" PRIu64 "\n",
timeout_ns, fenceValueToWaitOn, completedValue);
if(completedValue < fenceValueToWaitOn) {
HANDLE event = { };
int event_fd = 0;
event = d3d12_fence_create_event(&event_fd);
hr = pD3D12Dec->m_spFence->SetEventOnCompletion(fenceValueToWaitOn, event);
if (FAILED(hr)) {
debug_printf(
"[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - SetEventOnCompletion for fenceValue %" PRIu64 " failed with HR %x\n",
fenceValueToWaitOn, hr);
goto ensure_fence_finished_fail;
}
wait_result = d3d12_fence_wait_event(event, event_fd, timeout_ns);
d3d12_fence_close_event(event, event_fd);
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - Waiting on fence to be done with "
"fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n",
fenceValueToWaitOn,
completedValue);
} else {
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - Fence already done with "
"fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n",
fenceValueToWaitOn,
completedValue);
}
return wait_result;
ensure_fence_finished_fail:
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_sync_completion failed for fenceValue: %" PRIu64 "\n", fenceValueToWaitOn);
assert(false);
return false;
}
bool
d3d12_video_decoder_sync_completion(struct pipe_video_codec *codec, uint64_t fenceValueToWaitOn, uint64_t timeout_ns)
{
struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
assert(pD3D12Dec);
assert(pD3D12Dec->m_spD3D12VideoDevice);
assert(pD3D12Dec->m_spDecodeCommandQueue);
HRESULT hr = S_OK;
ASSERTED bool wait_result = d3d12_video_decoder_ensure_fence_finished(codec, fenceValueToWaitOn, timeout_ns);
assert(wait_result);
// Release references granted on end_frame for this inflight operations
pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_spDecoder.Reset();
pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_spDecoderHeap.Reset();
pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_References.reset();
pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_stagingDecodeBitstream.resize(0);
pipe_resource_reference(&pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].pPipeCompressedBufferObj, NULL);
struct d3d12_screen *pD3D12Screen = (struct d3d12_screen *) pD3D12Dec->m_pD3D12Screen;
assert(pD3D12Screen);
pD3D12Screen->base.fence_reference(&pD3D12Screen->base, &pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_pBitstreamUploadGPUCompletionFence, NULL);
hr = pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_spCommandAllocator->Reset();
if(FAILED(hr)) {
debug_printf("failed with %x.\n", hr);
goto sync_with_token_fail;
}
// Validate device was not removed
hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason();
if (hr != S_OK) {
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_sync_completion"
" - D3D12Device was removed AFTER d3d12_video_decoder_ensure_fence_finished "
"execution with HR %x, but wasn't before.\n",
hr);
goto sync_with_token_fail;
}
debug_printf(
"[d3d12_video_decoder] d3d12_video_decoder_sync_completion - GPU execution finalized for fenceValue: %" PRIu64 "\n",
fenceValueToWaitOn);
return wait_result;
sync_with_token_fail:
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_sync_completion failed for fenceValue: %" PRIu64 "\n", fenceValueToWaitOn);
assert(false);
return false;
}

View file

@ -92,6 +92,9 @@ int d3d12_video_decoder_get_decoder_fence(struct pipe_video_codec *codec,
/// d3d12_video_decoder functions starts /// d3d12_video_decoder functions starts
/// ///
// We need enough to so next item in pipeline doesn't ask for a fence value we lost
const uint64_t D3D12_VIDEO_DEC_ASYNC_DEPTH = 36;
struct d3d12_video_decoder_reference_poc_entry { struct d3d12_video_decoder_reference_poc_entry {
uint8_t refpicset_index; uint8_t refpicset_index;
int32_t poc_value; int32_t poc_value;
@ -117,7 +120,6 @@ struct d3d12_video_decoder
ComPtr<ID3D12VideoDecoder> m_spVideoDecoder; ComPtr<ID3D12VideoDecoder> m_spVideoDecoder;
ComPtr<ID3D12VideoDecoderHeap> m_spVideoDecoderHeap; ComPtr<ID3D12VideoDecoderHeap> m_spVideoDecoderHeap;
ComPtr<ID3D12CommandQueue> m_spDecodeCommandQueue; ComPtr<ID3D12CommandQueue> m_spDecodeCommandQueue;
ComPtr<ID3D12CommandAllocator> m_spCommandAllocator;
ComPtr<ID3D12VideoDecodeCommandList1> m_spDecodeCommandList; ComPtr<ID3D12VideoDecodeCommandList1> m_spDecodeCommandList;
std::vector<D3D12_RESOURCE_BARRIER> m_transitionsBeforeCloseCmdList; std::vector<D3D12_RESOURCE_BARRIER> m_transitionsBeforeCloseCmdList;
@ -138,43 +140,66 @@ struct d3d12_video_decoder
/// ///
// Tracks DPB and reference picture textures // Tracks DPB and reference picture textures
std::unique_ptr<d3d12_video_decoder_references_manager> m_spDPBManager; std::shared_ptr<d3d12_video_decoder_references_manager> m_spDPBManager;
static const uint64_t m_InitialCompBitstreamGPUBufferSize = (1024 /*1K*/ * 1024 /*1MB*/) * 8 /*8 MB*/; // 8MB
struct InFlightDecodeResources
{
struct pipe_fence_handle *m_pBitstreamUploadGPUCompletionFence;
struct d3d12_fence m_FenceData;
// In case of reconfigurations that trigger creation of new
// decoder or decoderheap or reference frames allocations
// we need to keep a reference alive to the ones that
// are currently in-flight
ComPtr<ID3D12VideoDecoder> m_spDecoder;
ComPtr<ID3D12VideoDecoderHeap> m_spDecoderHeap;
// Tracks DPB and reference picture textures
std::shared_ptr<d3d12_video_decoder_references_manager> m_References;
ComPtr<ID3D12CommandAllocator> m_spCommandAllocator;
// Holds the input bitstream buffer while it's being constructed in decode_bitstream calls
std::vector<uint8_t> m_stagingDecodeBitstream;
// Holds the input bitstream buffer in GPU video memory
ComPtr<ID3D12Resource> m_curFrameCompressedBitstreamBuffer;
// Actual number of allocated bytes available in the buffer (after
// m_curFrameCompressedBitstreamBufferPayloadSize might be garbage)
uint64_t m_curFrameCompressedBitstreamBufferAllocatedSize =0;
uint64_t m_curFrameCompressedBitstreamBufferPayloadSize = 0u; // Actual number of bytes of valid data
// Holds a buffer for the DXVA struct layout of the picture params of the current frame
std::vector<uint8_t> m_picParamsBuffer; // size() has the byte size of the currently held picparams ; capacity()
// has the underlying container allocation size
// Set for each frame indicating whether to send VIDEO_DECODE_BUFFER_TYPE_INVERSE_QUANTIZATION_MATRIX
bool qp_matrix_frame_argument_enabled = false;
// Holds a buffer for the DXVA struct layout of the VIDEO_DECODE_BUFFER_TYPE_INVERSE_QUANTIZATION_MATRIX of the
// current frame m_InverseQuantMatrixBuffer.size() == 0 means no quantization matrix buffer is set for current frame
std::vector<uint8_t> m_InverseQuantMatrixBuffer; // size() has the byte size of the currently held
// VIDEO_DECODE_BUFFER_TYPE_INVERSE_QUANTIZATION_MATRIX ;
// capacity() has the underlying container allocation size
// Holds a buffer for the DXVA struct layout of the VIDEO_DECODE_BUFFER_TYPE_SLICE_CONTROL of the current frame
// m_SliceControlBuffer.size() == 0 means no quantization matrix buffer is set for current frame
std::vector<uint8_t>
m_SliceControlBuffer; // size() has the byte size of the currently held VIDEO_DECODE_BUFFER_TYPE_SLICE_CONTROL ;
// capacity() has the underlying container allocation size
pipe_resource* pPipeCompressedBufferObj = NULL;
};
std::vector<InFlightDecodeResources> m_inflightResourcesPool;
// Holds pointers to current decode output target texture and reference textures from upper layer // Holds pointers to current decode output target texture and reference textures from upper layer
struct pipe_video_buffer *m_pCurrentDecodeTarget; struct pipe_video_buffer *m_pCurrentDecodeTarget;
struct pipe_video_buffer **m_pCurrentReferenceTargets; struct pipe_video_buffer **m_pCurrentReferenceTargets;
// Holds the input bitstream buffer while it's being constructed in decode_bitstream calls
std::vector<uint8_t> m_stagingDecodeBitstream;
const uint64_t m_InitialCompBitstreamGPUBufferSize = (1024 /*1K*/ * 1024 /*1MB*/) * 8 /*8 MB*/; // 8MB
// Holds the input bitstream buffer in GPU video memory
ComPtr<ID3D12Resource> m_curFrameCompressedBitstreamBuffer;
uint64_t m_curFrameCompressedBitstreamBufferAllocatedSize =
m_InitialCompBitstreamGPUBufferSize; // Actual number of allocated bytes available in the buffer (after
// m_curFrameCompressedBitstreamBufferPayloadSize might be garbage)
uint64_t m_curFrameCompressedBitstreamBufferPayloadSize = 0u; // Actual number of bytes of valid data
// Holds a buffer for the DXVA struct layout of the picture params of the current frame
std::vector<uint8_t> m_picParamsBuffer; // size() has the byte size of the currently held picparams ; capacity()
// has the underlying container allocation size
// Set for each frame indicating whether to send VIDEO_DECODE_BUFFER_TYPE_INVERSE_QUANTIZATION_MATRIX
bool qp_matrix_frame_argument_enabled = false;
// Holds a buffer for the DXVA struct layout of the VIDEO_DECODE_BUFFER_TYPE_INVERSE_QUANTIZATION_MATRIX of the
// current frame m_InverseQuantMatrixBuffer.size() == 0 means no quantization matrix buffer is set for current frame
std::vector<uint8_t> m_InverseQuantMatrixBuffer; // size() has the byte size of the currently held
// VIDEO_DECODE_BUFFER_TYPE_INVERSE_QUANTIZATION_MATRIX ;
// capacity() has the underlying container allocation size
// Holds a buffer for the DXVA struct layout of the VIDEO_DECODE_BUFFER_TYPE_SLICE_CONTROL of the current frame
// m_SliceControlBuffer.size() == 0 means no quantization matrix buffer is set for current frame
std::vector<uint8_t>
m_SliceControlBuffer; // size() has the byte size of the currently held VIDEO_DECODE_BUFFER_TYPE_SLICE_CONTROL ;
// capacity() has the underlying container allocation size
// Indicates if GPU commands have not been flushed and are pending. // Indicates if GPU commands have not been flushed and are pending.
bool m_needsGPUFlush = false; bool m_needsGPUFlush = false;
@ -220,11 +245,17 @@ void
d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(struct d3d12_video_decoder *codec, d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(struct d3d12_video_decoder *codec,
struct pipe_picture_desc * picture, struct pipe_picture_desc * picture,
struct d3d12_video_buffer * pD3D12VideoBuffer); struct d3d12_video_buffer * pD3D12VideoBuffer);
uint64_t
d3d12_video_decoder_pool_current_index(struct d3d12_video_decoder *pD3D12Dec);
template <typename T> template <typename T>
T * T *
d3d12_video_decoder_get_current_dxva_picparams(struct d3d12_video_decoder *codec) d3d12_video_decoder_get_current_dxva_picparams(struct d3d12_video_decoder *codec)
{ {
return reinterpret_cast<T *>(codec->m_picParamsBuffer.data()); struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
assert(pD3D12Dec);
return reinterpret_cast<T *>(codec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_picParamsBuffer.data());
} }
bool bool
d3d12_video_decoder_supports_aot_dpb(D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport, d3d12_video_decoder_supports_aot_dpb(D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport,
@ -244,6 +275,12 @@ d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(struct d3d12_video_deco
void void
d3d12_video_decoder_prepare_dxva_slices_control(struct d3d12_video_decoder *pD3D12Dec, struct pipe_picture_desc *picture); d3d12_video_decoder_prepare_dxva_slices_control(struct d3d12_video_decoder *pD3D12Dec, struct pipe_picture_desc *picture);
bool
d3d12_video_decoder_ensure_fence_finished(struct pipe_video_codec *codec, uint64_t fenceValueToWaitOn, uint64_t timeout_ns);
bool
d3d12_video_decoder_sync_completion(struct pipe_video_codec *codec, uint64_t fenceValueToWaitOn, uint64_t timeout_ns);
/// ///
/// d3d12_video_decoder functions ends /// d3d12_video_decoder functions ends
/// ///