mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-26 04:10:09 +01:00
d3d12: Video Decode - Implement get_decoder_fence and async queing
Reviewed-by: Jesse Natalie <jenatali@microsoft.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23969>
This commit is contained in:
parent
fb1783616e
commit
af0b4eacab
2 changed files with 321 additions and 158 deletions
|
|
@ -40,6 +40,12 @@
|
|||
#include "util/u_memory.h"
|
||||
#include "util/u_video.h"
|
||||
|
||||
uint64_t
|
||||
d3d12_video_decoder_pool_current_index(struct d3d12_video_decoder *pD3D12Dec)
|
||||
{
|
||||
return pD3D12Dec->m_fenceValue % D3D12_VIDEO_DEC_ASYNC_DEPTH;
|
||||
}
|
||||
|
||||
struct pipe_video_codec *
|
||||
d3d12_video_create_decoder(struct pipe_context *context, const struct pipe_video_codec *codec)
|
||||
{
|
||||
|
|
@ -51,6 +57,8 @@ d3d12_video_create_decoder(struct pipe_context *context, const struct pipe_video
|
|||
// Not using new doesn't call ctor and the initializations in the class declaration are lost
|
||||
struct d3d12_video_decoder *pD3D12Dec = new d3d12_video_decoder;
|
||||
|
||||
pD3D12Dec->m_inflightResourcesPool.resize(D3D12_VIDEO_DEC_ASYNC_DEPTH, { 0 });
|
||||
|
||||
pD3D12Dec->base = *codec;
|
||||
pD3D12Dec->m_screen = context->screen;
|
||||
|
||||
|
|
@ -137,10 +145,15 @@ d3d12_video_decoder_destroy(struct pipe_video_codec *codec)
|
|||
return;
|
||||
}
|
||||
|
||||
d3d12_video_decoder_flush(codec); // Flush pending work before destroying.
|
||||
|
||||
// Flush pending work before destroying.
|
||||
struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
|
||||
|
||||
uint64_t curBatchFence = pD3D12Dec->m_fenceValue;
|
||||
if (pD3D12Dec->m_needsGPUFlush)
|
||||
{
|
||||
d3d12_video_decoder_flush(codec);
|
||||
d3d12_video_decoder_sync_completion(codec, curBatchFence, OS_TIMEOUT_INFINITE);
|
||||
}
|
||||
|
||||
//
|
||||
// Destroys a decoder
|
||||
// Call destroy_XX for applicable XX nested member types before deallocating
|
||||
|
|
@ -173,6 +186,26 @@ d3d12_video_decoder_begin_frame(struct pipe_video_codec *codec,
|
|||
// d3d12_video_decoder_decode_bitstream
|
||||
struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
|
||||
assert(pD3D12Dec);
|
||||
|
||||
///
|
||||
/// Wait here to make sure the next in flight resource set is empty before using it
|
||||
///
|
||||
uint64_t fenceValueToWaitOn = static_cast<uint64_t>(std::max(static_cast<int64_t>(0l), static_cast<int64_t>(pD3D12Dec->m_fenceValue) - static_cast<int64_t>(D3D12_VIDEO_DEC_ASYNC_DEPTH) ));
|
||||
|
||||
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_begin_frame Waiting for completion of in flight resource sets with previous work with fenceValue: %" PRIu64 "\n",
|
||||
fenceValueToWaitOn);
|
||||
|
||||
ASSERTED bool wait_res = d3d12_video_decoder_sync_completion(codec, fenceValueToWaitOn, OS_TIMEOUT_INFINITE);
|
||||
assert(wait_res);
|
||||
|
||||
HRESULT hr = pD3D12Dec->m_spDecodeCommandList->Reset(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_spCommandAllocator.Get());
|
||||
if (FAILED(hr)) {
|
||||
debug_printf(
|
||||
"[d3d12_video_decoder] resetting ID3D12GraphicsCommandList failed with HR %x\n",
|
||||
hr);
|
||||
assert(false);
|
||||
}
|
||||
|
||||
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_begin_frame finalized for fenceValue: %d\n",
|
||||
pD3D12Dec->m_fenceValue);
|
||||
}
|
||||
|
|
@ -275,14 +308,14 @@ d3d12_video_decoder_decode_bitstream(struct pipe_video_codec *codec,
|
|||
}
|
||||
|
||||
// Bytes of data pre-staged before this decode_frame call
|
||||
size_t preStagedDataSize = pD3D12Dec->m_stagingDecodeBitstream.size();
|
||||
size_t preStagedDataSize = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_stagingDecodeBitstream.size();
|
||||
|
||||
// Extend the staging buffer size, as decode_frame can be called several times before end_frame
|
||||
pD3D12Dec->m_stagingDecodeBitstream.resize(preStagedDataSize + totalReceivedBuffersSize);
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_stagingDecodeBitstream.resize(preStagedDataSize + totalReceivedBuffersSize);
|
||||
|
||||
// Point newSliceDataPositionDstBase to the end of the pre-staged data in m_stagingDecodeBitstream, where the new
|
||||
// buffers will be appended
|
||||
uint8_t *newSliceDataPositionDstBase = pD3D12Dec->m_stagingDecodeBitstream.data() + preStagedDataSize;
|
||||
uint8_t *newSliceDataPositionDstBase = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_stagingDecodeBitstream.data() + preStagedDataSize;
|
||||
|
||||
// Append new data at the end.
|
||||
size_t dstOffset = 0u;
|
||||
|
|
@ -363,28 +396,28 @@ d3d12_video_decoder_end_frame(struct pipe_video_codec *codec,
|
|||
///
|
||||
|
||||
d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(pD3D12Dec, picture, pD3D12VideoBuffer);
|
||||
assert(pD3D12Dec->m_picParamsBuffer.size() > 0);
|
||||
assert(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_picParamsBuffer.size() > 0);
|
||||
|
||||
///
|
||||
/// Prepare Slice control buffers before clearing staging buffer
|
||||
///
|
||||
assert(pD3D12Dec->m_stagingDecodeBitstream.size() > 0); // Make sure the staging wasn't cleared yet in end_frame
|
||||
assert(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_stagingDecodeBitstream.size() > 0); // Make sure the staging wasn't cleared yet in end_frame
|
||||
d3d12_video_decoder_prepare_dxva_slices_control(pD3D12Dec, picture);
|
||||
assert(pD3D12Dec->m_SliceControlBuffer.size() > 0);
|
||||
assert(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_SliceControlBuffer.size() > 0);
|
||||
|
||||
///
|
||||
/// Upload m_stagingDecodeBitstream to GPU memory now that end_frame is called and clear staging buffer
|
||||
///
|
||||
|
||||
uint64_t sliceDataStagingBufferSize = pD3D12Dec->m_stagingDecodeBitstream.size();
|
||||
uint8_t *sliceDataStagingBufferPtr = pD3D12Dec->m_stagingDecodeBitstream.data();
|
||||
uint64_t sliceDataStagingBufferSize = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_stagingDecodeBitstream.size();
|
||||
uint8_t *sliceDataStagingBufferPtr = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_stagingDecodeBitstream.data();
|
||||
|
||||
// Reallocate if necessary to accomodate the current frame bitstream buffer in GPU memory
|
||||
if (pD3D12Dec->m_curFrameCompressedBitstreamBufferAllocatedSize < sliceDataStagingBufferSize) {
|
||||
if (pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_curFrameCompressedBitstreamBufferAllocatedSize < sliceDataStagingBufferSize) {
|
||||
if (!d3d12_video_decoder_create_staging_bitstream_buffer(pD3D12Screen, pD3D12Dec, sliceDataStagingBufferSize)) {
|
||||
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Failure on "
|
||||
"d3d12_video_decoder_create_staging_bitstream_buffer\n");
|
||||
debug_printf("[d3d12_video_encoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n",
|
||||
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n",
|
||||
pD3D12Dec->m_fenceValue);
|
||||
assert(false);
|
||||
return;
|
||||
|
|
@ -392,41 +425,33 @@ d3d12_video_decoder_end_frame(struct pipe_video_codec *codec,
|
|||
}
|
||||
|
||||
// Upload frame bitstream CPU data to ID3D12Resource buffer
|
||||
pD3D12Dec->m_curFrameCompressedBitstreamBufferPayloadSize =
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_curFrameCompressedBitstreamBufferPayloadSize =
|
||||
sliceDataStagingBufferSize; // This can be less than m_curFrameCompressedBitstreamBufferAllocatedSize.
|
||||
assert(pD3D12Dec->m_curFrameCompressedBitstreamBufferPayloadSize <=
|
||||
pD3D12Dec->m_curFrameCompressedBitstreamBufferAllocatedSize);
|
||||
assert(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_curFrameCompressedBitstreamBufferPayloadSize <=
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_curFrameCompressedBitstreamBufferAllocatedSize);
|
||||
|
||||
/* One-shot transfer operation with data supplied in a user
|
||||
* pointer.
|
||||
*/
|
||||
pipe_resource *pPipeCompressedBufferObj =
|
||||
d3d12_resource_from_resource(&pD3D12Screen->base, pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Get());
|
||||
assert(pPipeCompressedBufferObj);
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].pPipeCompressedBufferObj =
|
||||
d3d12_resource_from_resource(&pD3D12Screen->base, pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_curFrameCompressedBitstreamBuffer.Get());
|
||||
assert(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].pPipeCompressedBufferObj);
|
||||
pD3D12Dec->base.context->buffer_subdata(pD3D12Dec->base.context, // context
|
||||
pPipeCompressedBufferObj, // dst buffer
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].pPipeCompressedBufferObj, // dst buffer
|
||||
PIPE_MAP_WRITE, // usage PIPE_MAP_x
|
||||
0, // offset
|
||||
sizeof(*sliceDataStagingBufferPtr) * sliceDataStagingBufferSize, // size
|
||||
sliceDataStagingBufferPtr // data
|
||||
);
|
||||
|
||||
// Flush buffer_subdata batch and wait on this CPU thread for GPU work completion
|
||||
// Flush buffer_subdata batch
|
||||
// before deleting the source CPU buffer below
|
||||
struct pipe_fence_handle *pUploadGPUCompletionFence = NULL;
|
||||
|
||||
pD3D12Dec->base.context->flush(pD3D12Dec->base.context,
|
||||
&pUploadGPUCompletionFence,
|
||||
&pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_pBitstreamUploadGPUCompletionFence,
|
||||
PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH);
|
||||
assert(pUploadGPUCompletionFence);
|
||||
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Waiting on GPU completion fence for "
|
||||
"buffer_subdata to upload compressed bitstream.\n");
|
||||
pD3D12Screen->base.fence_finish(&pD3D12Screen->base, NULL, pUploadGPUCompletionFence, OS_TIMEOUT_INFINITE);
|
||||
pD3D12Screen->base.fence_reference(&pD3D12Screen->base, &pUploadGPUCompletionFence, NULL);
|
||||
pipe_resource_reference(&pPipeCompressedBufferObj, NULL);
|
||||
|
||||
// [After buffer_subdata GPU work is finished] Clear CPU staging buffer now that end_frame is called and was uploaded
|
||||
// to GPU for DecodeFrame call.
|
||||
pD3D12Dec->m_stagingDecodeBitstream.resize(0);
|
||||
assert(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_pBitstreamUploadGPUCompletionFence);
|
||||
// To be waited on GPU fence before flushing current frame DecodeFrame to GPU
|
||||
|
||||
///
|
||||
/// Proceed to record the GPU Decode commands
|
||||
|
|
@ -442,14 +467,14 @@ d3d12_video_decoder_end_frame(struct pipe_video_codec *codec,
|
|||
// Translate input D3D12 structure
|
||||
D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS d3d12InputArguments = {};
|
||||
|
||||
d3d12InputArguments.CompressedBitstream.pBuffer = pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Get();
|
||||
d3d12InputArguments.CompressedBitstream.pBuffer = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_curFrameCompressedBitstreamBuffer.Get();
|
||||
d3d12InputArguments.CompressedBitstream.Offset = 0u;
|
||||
ASSERTED constexpr uint64_t d3d12BitstreamOffsetAlignment =
|
||||
128u; // specified in
|
||||
// https://docs.microsoft.com/en-us/windows/win32/api/d3d12video/ne-d3d12video-d3d12_video_decode_tier
|
||||
assert((d3d12InputArguments.CompressedBitstream.Offset == 0) ||
|
||||
((d3d12InputArguments.CompressedBitstream.Offset % d3d12BitstreamOffsetAlignment) == 0));
|
||||
d3d12InputArguments.CompressedBitstream.Size = pD3D12Dec->m_curFrameCompressedBitstreamBufferPayloadSize;
|
||||
d3d12InputArguments.CompressedBitstream.Size = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_curFrameCompressedBitstreamBufferPayloadSize;
|
||||
|
||||
D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = {
|
||||
CD3DX12_RESOURCE_BARRIER::Transition(d3d12InputArguments.CompressedBitstream.pBuffer,
|
||||
|
|
@ -488,7 +513,7 @@ d3d12_video_decoder_end_frame(struct pipe_video_codec *codec,
|
|||
requestedConversionArguments)) {
|
||||
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Failure on "
|
||||
"d3d12_video_decoder_prepare_for_decode_frame\n");
|
||||
debug_printf("[d3d12_video_encoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n",
|
||||
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n",
|
||||
pD3D12Dec->m_fenceValue);
|
||||
assert(false);
|
||||
return;
|
||||
|
|
@ -502,25 +527,25 @@ d3d12_video_decoder_end_frame(struct pipe_video_codec *codec,
|
|||
1u; // Only the codec data received from the above layer with picture params
|
||||
d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = {
|
||||
D3D12_VIDEO_DECODE_ARGUMENT_TYPE_PICTURE_PARAMETERS,
|
||||
static_cast<uint32_t>(pD3D12Dec->m_picParamsBuffer.size()),
|
||||
pD3D12Dec->m_picParamsBuffer.data(),
|
||||
static_cast<uint32_t>(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_picParamsBuffer.size()),
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_picParamsBuffer.data(),
|
||||
};
|
||||
|
||||
if (pD3D12Dec->m_SliceControlBuffer.size() > 0) {
|
||||
if (pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_SliceControlBuffer.size() > 0) {
|
||||
d3d12InputArguments.NumFrameArguments++;
|
||||
d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = {
|
||||
D3D12_VIDEO_DECODE_ARGUMENT_TYPE_SLICE_CONTROL,
|
||||
static_cast<uint32_t>(pD3D12Dec->m_SliceControlBuffer.size()),
|
||||
pD3D12Dec->m_SliceControlBuffer.data(),
|
||||
static_cast<uint32_t>(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_SliceControlBuffer.size()),
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_SliceControlBuffer.data(),
|
||||
};
|
||||
}
|
||||
|
||||
if (pD3D12Dec->qp_matrix_frame_argument_enabled && (pD3D12Dec->m_InverseQuantMatrixBuffer.size() > 0)) {
|
||||
if (pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].qp_matrix_frame_argument_enabled && (pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_InverseQuantMatrixBuffer.size() > 0)) {
|
||||
d3d12InputArguments.NumFrameArguments++;
|
||||
d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = {
|
||||
D3D12_VIDEO_DECODE_ARGUMENT_TYPE_INVERSE_QUANTIZATION_MATRIX,
|
||||
static_cast<uint32_t>(pD3D12Dec->m_InverseQuantMatrixBuffer.size()),
|
||||
pD3D12Dec->m_InverseQuantMatrixBuffer.data(),
|
||||
static_cast<uint32_t>(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_InverseQuantMatrixBuffer.size()),
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_InverseQuantMatrixBuffer.data(),
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -608,13 +633,27 @@ d3d12_video_decoder_end_frame(struct pipe_video_codec *codec,
|
|||
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame finalized for fenceValue: %d\n",
|
||||
pD3D12Dec->m_fenceValue);
|
||||
|
||||
// Save extra references of Decoder, DecoderHeap and DPB allocations in case
|
||||
// there's a reconfiguration that trigers the construction of new objects
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_spDecoder = pD3D12Dec->m_spVideoDecoder;
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_spDecoderHeap = pD3D12Dec->m_spVideoDecoderHeap;
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_References = pD3D12Dec->m_spDPBManager;
|
||||
|
||||
///
|
||||
/// Flush work to the GPU and blocking wait until decode finishes
|
||||
/// Flush work to the GPU
|
||||
///
|
||||
pD3D12Dec->m_needsGPUFlush = true;
|
||||
d3d12_video_decoder_flush(codec);
|
||||
// Call to d3d12_video_decoder_flush increases m_FenceValue
|
||||
uint64_t inflightIndexBeforeFlush = (pD3D12Dec->m_fenceValue - 1u) % D3D12_VIDEO_DEC_ASYNC_DEPTH;
|
||||
|
||||
if (!pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()) {
|
||||
if (pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation())
|
||||
{
|
||||
// No need to copy, the output surface fence is merely the decode queue fence
|
||||
*picture->fence = (pipe_fence_handle*) &pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_FenceData;
|
||||
}
|
||||
else
|
||||
{
|
||||
///
|
||||
/// If !pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()
|
||||
/// We cannot use the standalone video buffer allocation directly and we must use instead
|
||||
|
|
@ -631,8 +670,13 @@ d3d12_video_decoder_end_frame(struct pipe_video_codec *codec,
|
|||
d3d12_resource_from_resource(&pD3D12Screen->base, d3d12OutputArguments.pOutputTexture2D);
|
||||
assert(pPipeSrc);
|
||||
|
||||
// Copy all format subresources/texture planes
|
||||
// GPU wait on the graphics context which will do the copy until the decode finishes
|
||||
pD3D12Screen->cmdqueue->Wait(
|
||||
pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_FenceData.cmdqueue_fence,
|
||||
pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_FenceData.value
|
||||
);
|
||||
|
||||
// Copy all format subresources/texture planes
|
||||
for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
|
||||
assert(d3d12OutputArguments.OutputSubresource < INT16_MAX);
|
||||
struct pipe_box box = { 0,
|
||||
|
|
@ -653,22 +697,12 @@ d3d12_video_decoder_end_frame(struct pipe_video_codec *codec,
|
|||
0, // src level
|
||||
&box);
|
||||
}
|
||||
// Flush resource_copy_region batch and wait on this CPU thread for GPU work completion
|
||||
struct pipe_fence_handle *completion_fence = NULL;
|
||||
// Flush resource_copy_region batch
|
||||
// The output surface fence is the graphics queue that will signal after the copy ends
|
||||
pD3D12Dec->base.context->flush(pD3D12Dec->base.context,
|
||||
&completion_fence,
|
||||
picture->fence,
|
||||
PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH);
|
||||
assert(completion_fence);
|
||||
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Waiting on GPU completion fence for "
|
||||
"resource_copy_region on decoded frame.\n");
|
||||
pD3D12Screen->base.fence_finish(&pD3D12Screen->base, NULL, completion_fence, OS_TIMEOUT_INFINITE);
|
||||
pD3D12Screen->base.fence_reference(&pD3D12Screen->base, &completion_fence, NULL);
|
||||
pipe_resource_reference(&pPipeSrc, NULL);
|
||||
}
|
||||
|
||||
// We do not use the async fence for now but set it to
|
||||
// NULL to avoid uninitialized memory in VA frontend
|
||||
*picture->fence = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -678,16 +712,15 @@ int d3d12_video_decoder_get_decoder_fence(struct pipe_video_codec *codec,
|
|||
struct pipe_fence_handle *fence,
|
||||
uint64_t timeout)
|
||||
{
|
||||
/* No need to wait for anything, we're already flushing
|
||||
and waiting in d3d12_video_decoder_end_frame */
|
||||
struct d3d12_fence *fenceValueToWaitOn = (struct d3d12_fence *) fence;
|
||||
assert(fenceValueToWaitOn);
|
||||
|
||||
// We set NULL in d3d12_video_decoder_end_frame
|
||||
assert(fence == NULL);
|
||||
ASSERTED bool wait_res = d3d12_video_decoder_sync_completion(codec, fenceValueToWaitOn->value, timeout);
|
||||
|
||||
// Return semantics based on p_video_codec interface
|
||||
// ret == 0 -> Decode in progress
|
||||
// ret != 0 -> Decode completed
|
||||
return 1;
|
||||
return wait_res ? 1 : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -717,9 +750,6 @@ d3d12_video_decoder_flush(struct pipe_video_codec *codec)
|
|||
goto flush_fail;
|
||||
}
|
||||
|
||||
// Close and execute command list and wait for idle on CPU blocking
|
||||
// this method before resetting list and allocator for next submission.
|
||||
|
||||
if (pD3D12Dec->m_transitionsBeforeCloseCmdList.size() > 0) {
|
||||
pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(pD3D12Dec->m_transitionsBeforeCloseCmdList.size(),
|
||||
pD3D12Dec->m_transitionsBeforeCloseCmdList.data());
|
||||
|
|
@ -733,28 +763,10 @@ d3d12_video_decoder_flush(struct pipe_video_codec *codec)
|
|||
}
|
||||
|
||||
ID3D12CommandList *ppCommandLists[1] = { pD3D12Dec->m_spDecodeCommandList.Get() };
|
||||
struct d3d12_fence* pUploadBitstreamFence = d3d12_fence(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_pBitstreamUploadGPUCompletionFence);
|
||||
pD3D12Dec->m_spDecodeCommandQueue->Wait(pUploadBitstreamFence->cmdqueue_fence, pUploadBitstreamFence->value);
|
||||
pD3D12Dec->m_spDecodeCommandQueue->ExecuteCommandLists(1, ppCommandLists);
|
||||
pD3D12Dec->m_spDecodeCommandQueue->Signal(pD3D12Dec->m_spFence.Get(), pD3D12Dec->m_fenceValue);
|
||||
pD3D12Dec->m_spFence->SetEventOnCompletion(pD3D12Dec->m_fenceValue, nullptr);
|
||||
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush - ExecuteCommandLists finished on signal with "
|
||||
"fenceValue: %d\n",
|
||||
pD3D12Dec->m_fenceValue);
|
||||
|
||||
hr = pD3D12Dec->m_spCommandAllocator->Reset();
|
||||
if (FAILED(hr)) {
|
||||
debug_printf(
|
||||
"[d3d12_video_decoder] d3d12_video_decoder_flush - resetting ID3D12CommandAllocator failed with HR %x\n",
|
||||
hr);
|
||||
goto flush_fail;
|
||||
}
|
||||
|
||||
hr = pD3D12Dec->m_spDecodeCommandList->Reset(pD3D12Dec->m_spCommandAllocator.Get());
|
||||
if (FAILED(hr)) {
|
||||
debug_printf(
|
||||
"[d3d12_video_decoder] d3d12_video_decoder_flush - resetting ID3D12GraphicsCommandList failed with HR %x\n",
|
||||
hr);
|
||||
goto flush_fail;
|
||||
}
|
||||
|
||||
// Validate device was not removed
|
||||
hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason();
|
||||
|
|
@ -766,9 +778,13 @@ d3d12_video_decoder_flush(struct pipe_video_codec *codec)
|
|||
goto flush_fail;
|
||||
}
|
||||
|
||||
debug_printf(
|
||||
"[d3d12_video_decoder] d3d12_video_decoder_flush - GPU signaled execution finalized for fenceValue: %d\n",
|
||||
pD3D12Dec->m_fenceValue);
|
||||
// Set async fence info
|
||||
memset(&pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_FenceData,
|
||||
0,
|
||||
sizeof(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_FenceData));
|
||||
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_FenceData.value = pD3D12Dec->m_fenceValue;
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_FenceData.cmdqueue_fence = pD3D12Dec->m_spFence.Get();
|
||||
|
||||
pD3D12Dec->m_fenceValue++;
|
||||
pD3D12Dec->m_needsGPUFlush = false;
|
||||
|
|
@ -804,20 +820,31 @@ d3d12_video_decoder_create_command_objects(const struct d3d12_screen *pD3D12Scre
|
|||
return false;
|
||||
}
|
||||
|
||||
hr = pD3D12Screen->dev->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
|
||||
IID_PPV_ARGS(pD3D12Dec->m_spCommandAllocator.GetAddressOf()));
|
||||
if (FAILED(hr)) {
|
||||
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to "
|
||||
"CreateCommandAllocator failed with HR %x\n",
|
||||
hr);
|
||||
for (auto& inputResource : pD3D12Dec->m_inflightResourcesPool)
|
||||
{
|
||||
hr = pD3D12Dec->m_pD3D12Screen->dev->CreateCommandAllocator(
|
||||
D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
|
||||
IID_PPV_ARGS(inputResource.m_spCommandAllocator.GetAddressOf()));
|
||||
if (FAILED(hr)) {
|
||||
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to "
|
||||
"CreateCommandAllocator failed with HR %x\n",
|
||||
hr);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
ComPtr<ID3D12Device4> spD3D12Device4;
|
||||
if (FAILED(pD3D12Dec->m_pD3D12Screen->dev->QueryInterface(
|
||||
IID_PPV_ARGS(spD3D12Device4.GetAddressOf())))) {
|
||||
debug_printf(
|
||||
"[d3d12_video_decoder] d3d12_video_decoder_create_decoder - D3D12 Device has no ID3D12Device4 support\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
hr = pD3D12Screen->dev->CreateCommandList(0,
|
||||
D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
|
||||
pD3D12Dec->m_spCommandAllocator.Get(),
|
||||
nullptr,
|
||||
IID_PPV_ARGS(pD3D12Dec->m_spDecodeCommandList.GetAddressOf()));
|
||||
hr = spD3D12Device4->CreateCommandList1(0,
|
||||
D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
|
||||
D3D12_COMMAND_LIST_FLAG_NONE,
|
||||
IID_PPV_ARGS(pD3D12Dec->m_spDecodeCommandList.GetAddressOf()));
|
||||
|
||||
if (FAILED(hr)) {
|
||||
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateCommandList "
|
||||
|
|
@ -922,8 +949,8 @@ d3d12_video_decoder_create_staging_bitstream_buffer(const struct d3d12_screen *p
|
|||
{
|
||||
assert(pD3D12Dec->m_spD3D12VideoDevice);
|
||||
|
||||
if (pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Get() != nullptr) {
|
||||
pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Reset();
|
||||
if (pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_curFrameCompressedBitstreamBuffer.Get() != nullptr) {
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_curFrameCompressedBitstreamBuffer.Reset();
|
||||
}
|
||||
|
||||
auto descHeap = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, pD3D12Dec->m_NodeMask, pD3D12Dec->m_NodeMask);
|
||||
|
|
@ -934,7 +961,7 @@ d3d12_video_decoder_create_staging_bitstream_buffer(const struct d3d12_screen *p
|
|||
&descResource,
|
||||
D3D12_RESOURCE_STATE_COMMON,
|
||||
nullptr,
|
||||
IID_PPV_ARGS(pD3D12Dec->m_curFrameCompressedBitstreamBuffer.GetAddressOf()));
|
||||
IID_PPV_ARGS(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_curFrameCompressedBitstreamBuffer.GetAddressOf()));
|
||||
if (FAILED(hr)) {
|
||||
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_staging_bitstream_buffer - "
|
||||
"CreateCommittedResource failed with HR %x\n",
|
||||
|
|
@ -942,7 +969,7 @@ d3d12_video_decoder_create_staging_bitstream_buffer(const struct d3d12_screen *p
|
|||
return false;
|
||||
}
|
||||
|
||||
pD3D12Dec->m_curFrameCompressedBitstreamBufferAllocatedSize = bufSize;
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_curFrameCompressedBitstreamBufferAllocatedSize = bufSize;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -1252,7 +1279,7 @@ d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(
|
|||
d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(codec->base.profile);
|
||||
ID3D12Resource *pPipeD3D12DstResource = d3d12_resource_resource(pD3D12VideoBuffer->texture);
|
||||
D3D12_RESOURCE_DESC outputResourceDesc = GetDesc(pPipeD3D12DstResource);
|
||||
pD3D12Dec->qp_matrix_frame_argument_enabled = false;
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].qp_matrix_frame_argument_enabled = false;
|
||||
switch (profileType) {
|
||||
case d3d12_video_decode_profile_type_h264:
|
||||
{
|
||||
|
|
@ -1273,7 +1300,7 @@ d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(
|
|||
DXVA_Qmatrix_H264 dxvaQmatrixH264 = {};
|
||||
d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_h264((pipe_h264_picture_desc *) picture,
|
||||
dxvaQmatrixH264);
|
||||
pD3D12Dec->qp_matrix_frame_argument_enabled = true; // We don't have a way of knowing from the pipe params so send always
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].qp_matrix_frame_argument_enabled = true; // We don't have a way of knowing from the pipe params so send always
|
||||
d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(codec, &dxvaQmatrixH264, dxvaQMatrixBufferSize);
|
||||
} break;
|
||||
|
||||
|
|
@ -1292,10 +1319,10 @@ d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(
|
|||
|
||||
size_t dxvaQMatrixBufferSize = sizeof(DXVA_Qmatrix_HEVC);
|
||||
DXVA_Qmatrix_HEVC dxvaQmatrixHEVC = {};
|
||||
pD3D12Dec->qp_matrix_frame_argument_enabled = false;
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].qp_matrix_frame_argument_enabled = false;
|
||||
d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_hevc((pipe_h265_picture_desc *) picture,
|
||||
dxvaQmatrixHEVC,
|
||||
pD3D12Dec->qp_matrix_frame_argument_enabled);
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].qp_matrix_frame_argument_enabled);
|
||||
d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(codec, &dxvaQmatrixHEVC, dxvaQMatrixBufferSize);
|
||||
} break;
|
||||
case d3d12_video_decode_profile_type_av1:
|
||||
|
|
@ -1310,7 +1337,7 @@ d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(
|
|||
d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec,
|
||||
&dxvaPicParamsAV1,
|
||||
dxvaPicParamsBufferSize);
|
||||
pD3D12Dec->qp_matrix_frame_argument_enabled = false;
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].qp_matrix_frame_argument_enabled = false;
|
||||
} break;
|
||||
case d3d12_video_decode_profile_type_vp9:
|
||||
{
|
||||
|
|
@ -1324,7 +1351,7 @@ d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(
|
|||
d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec,
|
||||
&dxvaPicParamsVP9,
|
||||
dxvaPicParamsBufferSize);
|
||||
pD3D12Dec->qp_matrix_frame_argument_enabled = false;
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].qp_matrix_frame_argument_enabled = false;
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
|
|
@ -1344,20 +1371,20 @@ d3d12_video_decoder_prepare_dxva_slices_control(
|
|||
switch (profileType) {
|
||||
case d3d12_video_decode_profile_type_h264:
|
||||
{
|
||||
d3d12_video_decoder_prepare_dxva_slices_control_h264(pD3D12Dec, pD3D12Dec->m_SliceControlBuffer, (struct pipe_h264_picture_desc*) picture);
|
||||
d3d12_video_decoder_prepare_dxva_slices_control_h264(pD3D12Dec, pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_SliceControlBuffer, (struct pipe_h264_picture_desc*) picture);
|
||||
} break;
|
||||
|
||||
case d3d12_video_decode_profile_type_hevc:
|
||||
{
|
||||
d3d12_video_decoder_prepare_dxva_slices_control_hevc(pD3D12Dec, pD3D12Dec->m_SliceControlBuffer, (struct pipe_h265_picture_desc*) picture);
|
||||
d3d12_video_decoder_prepare_dxva_slices_control_hevc(pD3D12Dec, pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_SliceControlBuffer, (struct pipe_h265_picture_desc*) picture);
|
||||
} break;
|
||||
case d3d12_video_decode_profile_type_av1:
|
||||
{
|
||||
d3d12_video_decoder_prepare_dxva_slices_control_av1(pD3D12Dec, pD3D12Dec->m_SliceControlBuffer, (struct pipe_av1_picture_desc*) picture);
|
||||
d3d12_video_decoder_prepare_dxva_slices_control_av1(pD3D12Dec, pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_SliceControlBuffer, (struct pipe_av1_picture_desc*) picture);
|
||||
} break;
|
||||
case d3d12_video_decode_profile_type_vp9:
|
||||
{
|
||||
d3d12_video_decoder_prepare_dxva_slices_control_vp9(pD3D12Dec, pD3D12Dec->m_SliceControlBuffer, (struct pipe_vp9_picture_desc*) picture);
|
||||
d3d12_video_decoder_prepare_dxva_slices_control_vp9(pD3D12Dec, pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_SliceControlBuffer, (struct pipe_vp9_picture_desc*) picture);
|
||||
} break;
|
||||
|
||||
default:
|
||||
|
|
@ -1372,12 +1399,12 @@ d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(struct d3d12_video_deco
|
|||
void *pDXVAStruct,
|
||||
uint64_t DXVAStructSize)
|
||||
{
|
||||
if (pD3D12Dec->m_InverseQuantMatrixBuffer.capacity() < DXVAStructSize) {
|
||||
pD3D12Dec->m_InverseQuantMatrixBuffer.reserve(DXVAStructSize);
|
||||
if (pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_InverseQuantMatrixBuffer.capacity() < DXVAStructSize) {
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_InverseQuantMatrixBuffer.reserve(DXVAStructSize);
|
||||
}
|
||||
|
||||
pD3D12Dec->m_InverseQuantMatrixBuffer.resize(DXVAStructSize);
|
||||
memcpy(pD3D12Dec->m_InverseQuantMatrixBuffer.data(), pDXVAStruct, DXVAStructSize);
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_InverseQuantMatrixBuffer.resize(DXVAStructSize);
|
||||
memcpy(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_InverseQuantMatrixBuffer.data(), pDXVAStruct, DXVAStructSize);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -1385,12 +1412,12 @@ d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(struct d3d12_video_
|
|||
void *pDXVAStruct,
|
||||
uint64_t DXVAStructSize)
|
||||
{
|
||||
if (pD3D12Dec->m_picParamsBuffer.capacity() < DXVAStructSize) {
|
||||
pD3D12Dec->m_picParamsBuffer.reserve(DXVAStructSize);
|
||||
if (pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_picParamsBuffer.capacity() < DXVAStructSize) {
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_picParamsBuffer.reserve(DXVAStructSize);
|
||||
}
|
||||
|
||||
pD3D12Dec->m_picParamsBuffer.resize(DXVAStructSize);
|
||||
memcpy(pD3D12Dec->m_picParamsBuffer.data(), pDXVAStruct, DXVAStructSize);
|
||||
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_picParamsBuffer.resize(DXVAStructSize);
|
||||
memcpy(pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_picParamsBuffer.data(), pDXVAStruct, DXVAStructSize);
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
@ -1506,3 +1533,102 @@ d3d12_video_decoder_resolve_profile(d3d12_video_decode_profile_type profileType,
|
|||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
d3d12_video_decoder_ensure_fence_finished(struct pipe_video_codec *codec, uint64_t fenceValueToWaitOn, uint64_t timeout_ns)
|
||||
{
|
||||
bool wait_result = true;
|
||||
struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
|
||||
HRESULT hr = S_OK;
|
||||
uint64_t completedValue = pD3D12Dec->m_spFence->GetCompletedValue();
|
||||
|
||||
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - Waiting for fence (with timeout_ns %" PRIu64 ") to finish with "
|
||||
"fenceValue: %" PRIu64 " - Current Fence Completed Value %" PRIu64 "\n",
|
||||
timeout_ns, fenceValueToWaitOn, completedValue);
|
||||
|
||||
if(completedValue < fenceValueToWaitOn) {
|
||||
|
||||
HANDLE event = { };
|
||||
int event_fd = 0;
|
||||
event = d3d12_fence_create_event(&event_fd);
|
||||
|
||||
hr = pD3D12Dec->m_spFence->SetEventOnCompletion(fenceValueToWaitOn, event);
|
||||
if (FAILED(hr)) {
|
||||
debug_printf(
|
||||
"[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - SetEventOnCompletion for fenceValue %" PRIu64 " failed with HR %x\n",
|
||||
fenceValueToWaitOn, hr);
|
||||
goto ensure_fence_finished_fail;
|
||||
}
|
||||
|
||||
wait_result = d3d12_fence_wait_event(event, event_fd, timeout_ns);
|
||||
d3d12_fence_close_event(event, event_fd);
|
||||
|
||||
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - Waiting on fence to be done with "
|
||||
"fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n",
|
||||
fenceValueToWaitOn,
|
||||
completedValue);
|
||||
} else {
|
||||
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - Fence already done with "
|
||||
"fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n",
|
||||
fenceValueToWaitOn,
|
||||
completedValue);
|
||||
}
|
||||
return wait_result;
|
||||
|
||||
ensure_fence_finished_fail:
|
||||
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_sync_completion failed for fenceValue: %" PRIu64 "\n", fenceValueToWaitOn);
|
||||
assert(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
d3d12_video_decoder_sync_completion(struct pipe_video_codec *codec, uint64_t fenceValueToWaitOn, uint64_t timeout_ns)
|
||||
{
|
||||
struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
|
||||
assert(pD3D12Dec);
|
||||
assert(pD3D12Dec->m_spD3D12VideoDevice);
|
||||
assert(pD3D12Dec->m_spDecodeCommandQueue);
|
||||
HRESULT hr = S_OK;
|
||||
|
||||
ASSERTED bool wait_result = d3d12_video_decoder_ensure_fence_finished(codec, fenceValueToWaitOn, timeout_ns);
|
||||
assert(wait_result);
|
||||
|
||||
// Release references granted on end_frame for this inflight operations
|
||||
pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_spDecoder.Reset();
|
||||
pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_spDecoderHeap.Reset();
|
||||
pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_References.reset();
|
||||
pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_stagingDecodeBitstream.resize(0);
|
||||
pipe_resource_reference(&pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].pPipeCompressedBufferObj, NULL);
|
||||
|
||||
struct d3d12_screen *pD3D12Screen = (struct d3d12_screen *) pD3D12Dec->m_pD3D12Screen;
|
||||
assert(pD3D12Screen);
|
||||
|
||||
pD3D12Screen->base.fence_reference(&pD3D12Screen->base, &pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_pBitstreamUploadGPUCompletionFence, NULL);
|
||||
|
||||
hr = pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_spCommandAllocator->Reset();
|
||||
if(FAILED(hr)) {
|
||||
debug_printf("failed with %x.\n", hr);
|
||||
goto sync_with_token_fail;
|
||||
}
|
||||
|
||||
// Validate device was not removed
|
||||
hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason();
|
||||
if (hr != S_OK) {
|
||||
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_sync_completion"
|
||||
" - D3D12Device was removed AFTER d3d12_video_decoder_ensure_fence_finished "
|
||||
"execution with HR %x, but wasn't before.\n",
|
||||
hr);
|
||||
goto sync_with_token_fail;
|
||||
}
|
||||
|
||||
debug_printf(
|
||||
"[d3d12_video_decoder] d3d12_video_decoder_sync_completion - GPU execution finalized for fenceValue: %" PRIu64 "\n",
|
||||
fenceValueToWaitOn);
|
||||
|
||||
return wait_result;
|
||||
|
||||
sync_with_token_fail:
|
||||
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_sync_completion failed for fenceValue: %" PRIu64 "\n", fenceValueToWaitOn);
|
||||
assert(false);
|
||||
return false;
|
||||
}
|
||||
|
|
@ -92,6 +92,9 @@ int d3d12_video_decoder_get_decoder_fence(struct pipe_video_codec *codec,
|
|||
/// d3d12_video_decoder functions starts
|
||||
///
|
||||
|
||||
// We need enough to so next item in pipeline doesn't ask for a fence value we lost
|
||||
const uint64_t D3D12_VIDEO_DEC_ASYNC_DEPTH = 36;
|
||||
|
||||
struct d3d12_video_decoder_reference_poc_entry {
|
||||
uint8_t refpicset_index;
|
||||
int32_t poc_value;
|
||||
|
|
@ -117,7 +120,6 @@ struct d3d12_video_decoder
|
|||
ComPtr<ID3D12VideoDecoder> m_spVideoDecoder;
|
||||
ComPtr<ID3D12VideoDecoderHeap> m_spVideoDecoderHeap;
|
||||
ComPtr<ID3D12CommandQueue> m_spDecodeCommandQueue;
|
||||
ComPtr<ID3D12CommandAllocator> m_spCommandAllocator;
|
||||
ComPtr<ID3D12VideoDecodeCommandList1> m_spDecodeCommandList;
|
||||
|
||||
std::vector<D3D12_RESOURCE_BARRIER> m_transitionsBeforeCloseCmdList;
|
||||
|
|
@ -138,43 +140,66 @@ struct d3d12_video_decoder
|
|||
///
|
||||
|
||||
// Tracks DPB and reference picture textures
|
||||
std::unique_ptr<d3d12_video_decoder_references_manager> m_spDPBManager;
|
||||
std::shared_ptr<d3d12_video_decoder_references_manager> m_spDPBManager;
|
||||
|
||||
static const uint64_t m_InitialCompBitstreamGPUBufferSize = (1024 /*1K*/ * 1024 /*1MB*/) * 8 /*8 MB*/; // 8MB
|
||||
|
||||
struct InFlightDecodeResources
|
||||
{
|
||||
struct pipe_fence_handle *m_pBitstreamUploadGPUCompletionFence;
|
||||
|
||||
struct d3d12_fence m_FenceData;
|
||||
|
||||
// In case of reconfigurations that trigger creation of new
|
||||
// decoder or decoderheap or reference frames allocations
|
||||
// we need to keep a reference alive to the ones that
|
||||
// are currently in-flight
|
||||
ComPtr<ID3D12VideoDecoder> m_spDecoder;
|
||||
ComPtr<ID3D12VideoDecoderHeap> m_spDecoderHeap;
|
||||
|
||||
// Tracks DPB and reference picture textures
|
||||
std::shared_ptr<d3d12_video_decoder_references_manager> m_References;
|
||||
|
||||
ComPtr<ID3D12CommandAllocator> m_spCommandAllocator;
|
||||
// Holds the input bitstream buffer while it's being constructed in decode_bitstream calls
|
||||
std::vector<uint8_t> m_stagingDecodeBitstream;
|
||||
|
||||
// Holds the input bitstream buffer in GPU video memory
|
||||
ComPtr<ID3D12Resource> m_curFrameCompressedBitstreamBuffer;
|
||||
|
||||
// Actual number of allocated bytes available in the buffer (after
|
||||
// m_curFrameCompressedBitstreamBufferPayloadSize might be garbage)
|
||||
uint64_t m_curFrameCompressedBitstreamBufferAllocatedSize =0;
|
||||
uint64_t m_curFrameCompressedBitstreamBufferPayloadSize = 0u; // Actual number of bytes of valid data
|
||||
|
||||
// Holds a buffer for the DXVA struct layout of the picture params of the current frame
|
||||
std::vector<uint8_t> m_picParamsBuffer; // size() has the byte size of the currently held picparams ; capacity()
|
||||
// has the underlying container allocation size
|
||||
|
||||
// Set for each frame indicating whether to send VIDEO_DECODE_BUFFER_TYPE_INVERSE_QUANTIZATION_MATRIX
|
||||
bool qp_matrix_frame_argument_enabled = false;
|
||||
|
||||
// Holds a buffer for the DXVA struct layout of the VIDEO_DECODE_BUFFER_TYPE_INVERSE_QUANTIZATION_MATRIX of the
|
||||
// current frame m_InverseQuantMatrixBuffer.size() == 0 means no quantization matrix buffer is set for current frame
|
||||
std::vector<uint8_t> m_InverseQuantMatrixBuffer; // size() has the byte size of the currently held
|
||||
// VIDEO_DECODE_BUFFER_TYPE_INVERSE_QUANTIZATION_MATRIX ;
|
||||
// capacity() has the underlying container allocation size
|
||||
|
||||
// Holds a buffer for the DXVA struct layout of the VIDEO_DECODE_BUFFER_TYPE_SLICE_CONTROL of the current frame
|
||||
// m_SliceControlBuffer.size() == 0 means no quantization matrix buffer is set for current frame
|
||||
std::vector<uint8_t>
|
||||
m_SliceControlBuffer; // size() has the byte size of the currently held VIDEO_DECODE_BUFFER_TYPE_SLICE_CONTROL ;
|
||||
// capacity() has the underlying container allocation size
|
||||
|
||||
pipe_resource* pPipeCompressedBufferObj = NULL;
|
||||
};
|
||||
|
||||
std::vector<InFlightDecodeResources> m_inflightResourcesPool;
|
||||
|
||||
// Holds pointers to current decode output target texture and reference textures from upper layer
|
||||
struct pipe_video_buffer *m_pCurrentDecodeTarget;
|
||||
struct pipe_video_buffer **m_pCurrentReferenceTargets;
|
||||
|
||||
// Holds the input bitstream buffer while it's being constructed in decode_bitstream calls
|
||||
std::vector<uint8_t> m_stagingDecodeBitstream;
|
||||
|
||||
const uint64_t m_InitialCompBitstreamGPUBufferSize = (1024 /*1K*/ * 1024 /*1MB*/) * 8 /*8 MB*/; // 8MB
|
||||
|
||||
// Holds the input bitstream buffer in GPU video memory
|
||||
ComPtr<ID3D12Resource> m_curFrameCompressedBitstreamBuffer;
|
||||
uint64_t m_curFrameCompressedBitstreamBufferAllocatedSize =
|
||||
m_InitialCompBitstreamGPUBufferSize; // Actual number of allocated bytes available in the buffer (after
|
||||
// m_curFrameCompressedBitstreamBufferPayloadSize might be garbage)
|
||||
uint64_t m_curFrameCompressedBitstreamBufferPayloadSize = 0u; // Actual number of bytes of valid data
|
||||
|
||||
// Holds a buffer for the DXVA struct layout of the picture params of the current frame
|
||||
std::vector<uint8_t> m_picParamsBuffer; // size() has the byte size of the currently held picparams ; capacity()
|
||||
// has the underlying container allocation size
|
||||
|
||||
// Set for each frame indicating whether to send VIDEO_DECODE_BUFFER_TYPE_INVERSE_QUANTIZATION_MATRIX
|
||||
bool qp_matrix_frame_argument_enabled = false;
|
||||
|
||||
// Holds a buffer for the DXVA struct layout of the VIDEO_DECODE_BUFFER_TYPE_INVERSE_QUANTIZATION_MATRIX of the
|
||||
// current frame m_InverseQuantMatrixBuffer.size() == 0 means no quantization matrix buffer is set for current frame
|
||||
std::vector<uint8_t> m_InverseQuantMatrixBuffer; // size() has the byte size of the currently held
|
||||
// VIDEO_DECODE_BUFFER_TYPE_INVERSE_QUANTIZATION_MATRIX ;
|
||||
// capacity() has the underlying container allocation size
|
||||
|
||||
// Holds a buffer for the DXVA struct layout of the VIDEO_DECODE_BUFFER_TYPE_SLICE_CONTROL of the current frame
|
||||
// m_SliceControlBuffer.size() == 0 means no quantization matrix buffer is set for current frame
|
||||
std::vector<uint8_t>
|
||||
m_SliceControlBuffer; // size() has the byte size of the currently held VIDEO_DECODE_BUFFER_TYPE_SLICE_CONTROL ;
|
||||
// capacity() has the underlying container allocation size
|
||||
|
||||
// Indicates if GPU commands have not been flushed and are pending.
|
||||
bool m_needsGPUFlush = false;
|
||||
|
||||
|
|
@ -220,11 +245,17 @@ void
|
|||
d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(struct d3d12_video_decoder *codec,
|
||||
struct pipe_picture_desc * picture,
|
||||
struct d3d12_video_buffer * pD3D12VideoBuffer);
|
||||
|
||||
uint64_t
|
||||
d3d12_video_decoder_pool_current_index(struct d3d12_video_decoder *pD3D12Dec);
|
||||
|
||||
template <typename T>
|
||||
T *
|
||||
d3d12_video_decoder_get_current_dxva_picparams(struct d3d12_video_decoder *codec)
|
||||
{
|
||||
return reinterpret_cast<T *>(codec->m_picParamsBuffer.data());
|
||||
struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
|
||||
assert(pD3D12Dec);
|
||||
return reinterpret_cast<T *>(codec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_picParamsBuffer.data());
|
||||
}
|
||||
bool
|
||||
d3d12_video_decoder_supports_aot_dpb(D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport,
|
||||
|
|
@ -244,6 +275,12 @@ d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(struct d3d12_video_deco
|
|||
void
|
||||
d3d12_video_decoder_prepare_dxva_slices_control(struct d3d12_video_decoder *pD3D12Dec, struct pipe_picture_desc *picture);
|
||||
|
||||
bool
|
||||
d3d12_video_decoder_ensure_fence_finished(struct pipe_video_codec *codec, uint64_t fenceValueToWaitOn, uint64_t timeout_ns);
|
||||
|
||||
bool
|
||||
d3d12_video_decoder_sync_completion(struct pipe_video_codec *codec, uint64_t fenceValueToWaitOn, uint64_t timeout_ns);
|
||||
|
||||
///
|
||||
/// d3d12_video_decoder functions ends
|
||||
///
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue