mediafoundation: Do GPU-GPU encoder sync for two-pass input vpblit

Reviewed-by: Pohsiang (John) Hsu <pohhsu@microsoft.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36236>
This commit is contained in:
Sil Vilerino 2025-07-17 11:05:43 -04:00 committed by Marge Bot
parent 2142f03031
commit 71f61ae7bb
5 changed files with 35 additions and 23 deletions

View file

@ -52,6 +52,7 @@ typedef class DX12EncodeContext
// producer (e.g decoder) can reuse the buffer in their pool
pipe_video_buffer *pPipeVideoBuffer = nullptr;
pipe_video_buffer *pDownscaledTwoPassPipeVideoBuffer = nullptr;
pipe_fence_handle *pDownscaledTwoPassPipeVideoBufferCompletionFence = nullptr;
ComPtr<IMFMediaBuffer> spMediaBuffer;
ComPtr<IMFD3D12SynchronizationObjectCommands> spSyncObjectCommands;
ID3D12CommandQueue *pSyncObjectQueue = nullptr; // weakref
@ -194,5 +195,7 @@ typedef class DX12EncodeContext
pDownscaledTwoPassPipeVideoBuffer->destroy( pDownscaledTwoPassPipeVideoBuffer );
if( pPipeResourcePSNRStats )
pVlScreen->pscreen->resource_destroy( pVlScreen->pscreen, pPipeResourcePSNRStats );
if (pDownscaledTwoPassPipeVideoBufferCompletionFence)
pVlScreen->pscreen->fence_reference( pVlScreen->pscreen, &pDownscaledTwoPassPipeVideoBufferCompletionFence, NULL );
}
} *LPDX12EncodeContext;

View file

@ -35,6 +35,8 @@ HRESULT
CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12EncodeContext )
{
HRESULT hr = S_OK;
struct pipe_fence_handle *pPipeEncoderInputFenceHandle = nullptr;
UINT64 pipeEncoderInputFenceHandleValue = 0u;
UINT unDiscontinuity = 0;
LPDX12EncodeContext pDX12EncodeContext;
UINT uiSubresourceIndex = 0;
@ -126,7 +128,7 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
CHECKHR_GOTO( spDeviceContext3.As( &spDeviceContext4 ), done );
// This will signal the staging fence the d3d12 mesa backend is consuming
spDeviceContext4->Signal( m_spStagingFence11.Get(), m_NextSyncFenceValue );
spDeviceContext4->Signal( m_spStagingFence11.Get(), m_CurrentSyncFenceValue );
debug_printf( "[dx12 hmft 0x%p] DX11 *shared* input sample\n", this );
}
else
@ -165,7 +167,7 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
// Since we're signaling from the D3D11 context on a shared fence, the signal
// will happen after the d3d11 context copy is done.
CHECKHR_GOTO( spDeviceContext3.As( &spDeviceContext4 ), done );
spDeviceContext4->Signal( m_spStagingFence11.Get(), m_NextSyncFenceValue );
spDeviceContext4->Signal( m_spStagingFence11.Get(), m_CurrentSyncFenceValue );
}
}
else
@ -188,7 +190,7 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
// This will signal the staging fence the d3d12 mesa backend is consuming
// Since we have a Wait() on spStagingQueue added by EnqueueResourceReadyWait, this will only happen after MF
// triggered completion on the input
m_spStagingQueue->Signal( m_spStagingFence12.Get(), m_NextSyncFenceValue );
m_spStagingQueue->Signal( m_spStagingFence12.Get(), m_CurrentSyncFenceValue );
winsysHandle.com_obj = spResource.Get();
winsysHandle.type = WINSYS_HANDLE_TYPE_D3D12_RES;
@ -202,8 +204,24 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
debug_printf( "[dx12 hmft 0x%p] DX12 input sample\n", this );
}
//
// If two pass is disabled, we just need to set the input fence and input fence value
// to the input texture fence/value.
//
// Otherwise, when two pass is enabled, we need to downscale the input texture
// for which we need to sync the readiness of the input texture against
// the vpblit input fence, and then sync the encoder readiness fence (e.g pPicInfo->base.in_fence)
// against the vpblit output fence
//
if( !m_pPipeVideoCodec->two_pass.enable || ( m_pPipeVideoCodec->two_pass.pow2_downscale_factor == 0 ) )
{
pPipeEncoderInputFenceHandle = m_pPipeFenceHandle;
pipeEncoderInputFenceHandleValue = m_CurrentSyncFenceValue;
}
#if ENCODE_WITH_TWO_PASS
if( m_pPipeVideoCodec->two_pass.enable && ( m_pPipeVideoCodec->two_pass.pow2_downscale_factor > 0 ) )
else
{
// TODO: In case the app sends the downscaled input remove this
@ -221,11 +239,12 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
pDX12EncodeContext->pDownscaledTwoPassPipeVideoBuffer = m_pPipeContext->create_video_buffer( m_pPipeContext, &templ );
struct pipe_vpp_desc vpblit_params = {};
struct pipe_fence_handle *dst_surface_fence = nullptr;
vpblit_params.base.in_fence = m_pPipeFenceHandle; // input surface fence (driver input)
vpblit_params.base.in_fence_value = m_CurrentSyncFenceValue;
vpblit_params.base.out_fence = &dst_surface_fence; // Output surface fence (driver output)
vpblit_params.base.in_fence_value = pipeEncoderInputFenceHandleValue; // input surface fence value (driver input)
vpblit_params.base.out_fence = &pPipeEncoderInputFenceHandle; // Output surface fence (driver output)
pipeEncoderInputFenceHandleValue = 0u; // pPipeEncoderInputFenceHandle is PIPE_FD_TYPE_NATIVE_SYNC so doesn't need the value
vpblit_params.base.input_format = pDX12EncodeContext->pPipeVideoBuffer->buffer_format;
vpblit_params.base.output_format = pDX12EncodeContext->pDownscaledTwoPassPipeVideoBuffer->buffer_format;
@ -254,14 +273,8 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
done );
m_pPipeVideoBlitter->flush( m_pPipeVideoBlitter );
assert(dst_surface_fence); // Driver must have returned the completion fence
// Wait for downscaling completion before encode can proceed
ASSERTED bool finished = m_pPipeVideoCodec->fence_wait( m_pPipeVideoCodec,
dst_surface_fence,
OS_TIMEOUT_INFINITE );
assert( finished );
m_pPipeVideoCodec->destroy_fence( m_pPipeVideoCodec, dst_surface_fence);
assert(pPipeEncoderInputFenceHandle); // Driver must have returned the completion fence
pDX12EncodeContext->pDownscaledTwoPassPipeVideoBufferCompletionFence = pPipeEncoderInputFenceHandle; // For destruction of the fence later
}
#endif // ENCODE_WITH_TWO_PASS
@ -443,6 +456,8 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
pDX12EncodeContext->pVlScreen = m_pVlScreen; // weakref
// Call the helper for encoder specific work
pDX12EncodeContext->encoderPicInfo.base.in_fence = pPipeEncoderInputFenceHandle;
pDX12EncodeContext->encoderPicInfo.base.in_fence_value = pipeEncoderInputFenceHandleValue;
CHECKHR_GOTO( PrepareForEncodeHelper( pDX12EncodeContext, bReceivedDirtyRectBlob, dirtyRectFrameNum ), done );
{
@ -519,8 +534,7 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
}
}
// Set the fence to be waited on m_SyncFenceValue and increment the value for the next frame
m_CurrentSyncFenceValue = m_NextSyncFenceValue++;
m_CurrentSyncFenceValue++; // increment the fence value for the next sync fence
done:
if( SUCCEEDED( hr ) )

View file

@ -205,9 +205,6 @@ CDX12EncHMFT::PrepareForEncodeHelper( LPDX12EncodeContext pDX12EncodeContext, bo
uint32_t rate_ctrl_active_layer_index = 0;
pPicInfo->requested_metadata = m_EncoderCapabilities.m_HWSupportedMetadataFlags;
pPicInfo->base.in_fence = m_pPipeFenceHandle;
pPicInfo->base.in_fence_value = m_CurrentSyncFenceValue;
pPicInfo->base.input_format = pDX12EncodeContext->pPipeVideoBuffer->buffer_format;
UpdateH264EncPictureDesc( pPicInfo,

View file

@ -248,8 +248,6 @@ CDX12EncHMFT::PrepareForEncodeHelper( LPDX12EncodeContext pDX12EncodeContext, bo
pPicInfo->requested_metadata = m_EncoderCapabilities.m_HWSupportedMetadataFlags;
pPicInfo->base.in_fence = m_pPipeFenceHandle;
pPicInfo->base.in_fence_value = m_CurrentSyncFenceValue;
pPicInfo->base.input_format = pDX12EncodeContext->pPipeVideoBuffer->buffer_format;
if( pDX12EncodeContext->bROI )
{

View file

@ -582,7 +582,7 @@ class __declspec( uuid( HMFT_GUID ) ) CDX12EncHMFT : CMFD3DManager,
ComPtr<ID3D12Fence> m_spStagingFence12;
struct pipe_fence_handle *m_pPipeFenceHandle = nullptr;
HANDLE m_hSharedFenceHandle = nullptr;
uint64_t m_NextSyncFenceValue = 1, m_CurrentSyncFenceValue = 0;
uint64_t m_CurrentSyncFenceValue = 1;
// Cached encoder capabilities
class encoder_capabilities m_EncoderCapabilities = {};