mediafoundation: Do GPU-GPU encoder sync for two-pass input vpblit

Reviewed-by: Pohsiang (John) Hsu <pohhsu@microsoft.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36236>
2026-02-09 22:50:28 +01:00 · 2025-07-17 11:05:43 -04:00 · 2025-07-17 11:05:43 -04:00 · 71f61ae7bb
commit 71f61ae7bb
parent 2142f03031
5 changed files with 35 additions and 23 deletions
--- a/src/gallium/frontends/mediafoundation/context.h
+++ b/src/gallium/frontends/mediafoundation/context.h
@ -52,6 +52,7 @@ typedef class DX12EncodeContext
   // producer (e.g decoder) can reuse the buffer in their pool
   pipe_video_buffer *pPipeVideoBuffer = nullptr;
   pipe_video_buffer *pDownscaledTwoPassPipeVideoBuffer = nullptr;
+   pipe_fence_handle *pDownscaledTwoPassPipeVideoBufferCompletionFence = nullptr;
   ComPtr<IMFMediaBuffer> spMediaBuffer;
   ComPtr<IMFD3D12SynchronizationObjectCommands> spSyncObjectCommands;
   ID3D12CommandQueue *pSyncObjectQueue = nullptr;   // weakref
@ -194,5 +195,7 @@ typedef class DX12EncodeContext
         pDownscaledTwoPassPipeVideoBuffer->destroy( pDownscaledTwoPassPipeVideoBuffer );
      if( pPipeResourcePSNRStats )
         pVlScreen->pscreen->resource_destroy( pVlScreen->pscreen, pPipeResourcePSNRStats );
+      if (pDownscaledTwoPassPipeVideoBufferCompletionFence)
+         pVlScreen->pscreen->fence_reference( pVlScreen->pscreen, &pDownscaledTwoPassPipeVideoBufferCompletionFence, NULL );
   }
 } *LPDX12EncodeContext;
--- a/src/gallium/frontends/mediafoundation/encode.cpp
+++ b/src/gallium/frontends/mediafoundation/encode.cpp
@ -35,6 +35,8 @@ HRESULT
 CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12EncodeContext )
 {
   HRESULT hr = S_OK;
+   struct pipe_fence_handle *pPipeEncoderInputFenceHandle = nullptr;
+   UINT64 pipeEncoderInputFenceHandleValue = 0u;
   UINT unDiscontinuity = 0;
   LPDX12EncodeContext pDX12EncodeContext;
   UINT uiSubresourceIndex = 0;
@ -126,7 +128,7 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
         CHECKHR_GOTO( spDeviceContext3.As( &spDeviceContext4 ), done );

         // This will signal the staging fence the d3d12 mesa backend is consuming
-         spDeviceContext4->Signal( m_spStagingFence11.Get(), m_NextSyncFenceValue );
+         spDeviceContext4->Signal( m_spStagingFence11.Get(), m_CurrentSyncFenceValue );
         debug_printf( "[dx12 hmft 0x%p] DX11 *shared* input sample\n", this );
      }
      else
@ -165,7 +167,7 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
         // Since we're signaling from the D3D11 context on a shared fence, the signal
         // will happen after the d3d11 context copy is done.
         CHECKHR_GOTO( spDeviceContext3.As( &spDeviceContext4 ), done );
-         spDeviceContext4->Signal( m_spStagingFence11.Get(), m_NextSyncFenceValue );
+         spDeviceContext4->Signal( m_spStagingFence11.Get(), m_CurrentSyncFenceValue );
      }
   }
   else
@ -188,7 +190,7 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
      // This will signal the staging fence the d3d12 mesa backend is consuming
      // Since we have a Wait() on spStagingQueue added by EnqueueResourceReadyWait, this will only happen after MF
      // triggered completion on the input
-      m_spStagingQueue->Signal( m_spStagingFence12.Get(), m_NextSyncFenceValue );
+      m_spStagingQueue->Signal( m_spStagingFence12.Get(), m_CurrentSyncFenceValue );

      winsysHandle.com_obj = spResource.Get();
      winsysHandle.type = WINSYS_HANDLE_TYPE_D3D12_RES;
@ -202,8 +204,24 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
      debug_printf( "[dx12 hmft 0x%p] DX12 input sample\n", this );
   }

+
+   //
+   // If two pass is disabled, we just need to set the input fence and input fence value
+   // to the input texture fence/value.
+   //
+   // Otherwise, when two pass is enabled, we need to downscale the input texture
+   // for which we need to sync the readiness of the input texture against
+   // the vpblit input fence, and then sync the encoder readiness fence (e.g pPicInfo->base.in_fence)
+   // against the vpblit output fence
+   //
+
+   if( !m_pPipeVideoCodec->two_pass.enable || ( m_pPipeVideoCodec->two_pass.pow2_downscale_factor == 0 ) )
+   {
+      pPipeEncoderInputFenceHandle = m_pPipeFenceHandle;
+      pipeEncoderInputFenceHandleValue = m_CurrentSyncFenceValue;
+   }
 #if ENCODE_WITH_TWO_PASS
-   if( m_pPipeVideoCodec->two_pass.enable && ( m_pPipeVideoCodec->two_pass.pow2_downscale_factor > 0 ) )
+   else
   {
      // TODO: In case the app sends the downscaled input remove this

@ -221,11 +239,12 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
      pDX12EncodeContext->pDownscaledTwoPassPipeVideoBuffer = m_pPipeContext->create_video_buffer( m_pPipeContext, &templ );

      struct pipe_vpp_desc vpblit_params = {};
-      struct pipe_fence_handle *dst_surface_fence = nullptr;

      vpblit_params.base.in_fence = m_pPipeFenceHandle;   // input surface fence (driver input)
-      vpblit_params.base.in_fence_value = m_CurrentSyncFenceValue;
-      vpblit_params.base.out_fence = &dst_surface_fence;          // Output surface fence (driver output)
+      vpblit_params.base.in_fence_value = pipeEncoderInputFenceHandleValue;   // input surface fence value (driver input)
+
+      vpblit_params.base.out_fence = &pPipeEncoderInputFenceHandle;          // Output surface fence (driver output)
+      pipeEncoderInputFenceHandleValue = 0u; // pPipeEncoderInputFenceHandle is PIPE_FD_TYPE_NATIVE_SYNC so doesn't need the value

      vpblit_params.base.input_format = pDX12EncodeContext->pPipeVideoBuffer->buffer_format;
      vpblit_params.base.output_format = pDX12EncodeContext->pDownscaledTwoPassPipeVideoBuffer->buffer_format;
@ -254,14 +273,8 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
                      done );
      m_pPipeVideoBlitter->flush( m_pPipeVideoBlitter );

-      assert(dst_surface_fence);   // Driver must have returned the completion fence
-      // Wait for downscaling completion before encode can proceed
-
-      ASSERTED bool finished = m_pPipeVideoCodec->fence_wait( m_pPipeVideoCodec,
-                                                              dst_surface_fence,
-                                                              OS_TIMEOUT_INFINITE );
-      assert( finished );
-      m_pPipeVideoCodec->destroy_fence( m_pPipeVideoCodec, dst_surface_fence);
+      assert(pPipeEncoderInputFenceHandle);   // Driver must have returned the completion fence
+      pDX12EncodeContext->pDownscaledTwoPassPipeVideoBufferCompletionFence = pPipeEncoderInputFenceHandle; // For destruction of the fence later
   }
 #endif   // ENCODE_WITH_TWO_PASS

@ -443,6 +456,8 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
   pDX12EncodeContext->pVlScreen = m_pVlScreen;   // weakref

   // Call the helper for encoder specific work
+   pDX12EncodeContext->encoderPicInfo.base.in_fence = pPipeEncoderInputFenceHandle;
+   pDX12EncodeContext->encoderPicInfo.base.in_fence_value = pipeEncoderInputFenceHandleValue;
   CHECKHR_GOTO( PrepareForEncodeHelper( pDX12EncodeContext, bReceivedDirtyRectBlob, dirtyRectFrameNum ), done );

   {
@ -519,8 +534,7 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
      }
   }

-   // Set the fence to be waited on m_SyncFenceValue and increment the value for the next frame
-   m_CurrentSyncFenceValue = m_NextSyncFenceValue++;
+   m_CurrentSyncFenceValue++;   // increment the fence value for the next sync fence

 done:
   if( SUCCEEDED( hr ) )
--- a/src/gallium/frontends/mediafoundation/encode_h264.cpp
+++ b/src/gallium/frontends/mediafoundation/encode_h264.cpp
@ -205,9 +205,6 @@ CDX12EncHMFT::PrepareForEncodeHelper( LPDX12EncodeContext pDX12EncodeContext, bo
   uint32_t rate_ctrl_active_layer_index = 0;

   pPicInfo->requested_metadata = m_EncoderCapabilities.m_HWSupportedMetadataFlags;
-
-   pPicInfo->base.in_fence = m_pPipeFenceHandle;
-   pPicInfo->base.in_fence_value = m_CurrentSyncFenceValue;
   pPicInfo->base.input_format = pDX12EncodeContext->pPipeVideoBuffer->buffer_format;

   UpdateH264EncPictureDesc( pPicInfo,
--- a/src/gallium/frontends/mediafoundation/encode_hevc.cpp
+++ b/src/gallium/frontends/mediafoundation/encode_hevc.cpp
@ -248,8 +248,6 @@ CDX12EncHMFT::PrepareForEncodeHelper( LPDX12EncodeContext pDX12EncodeContext, bo

   pPicInfo->requested_metadata = m_EncoderCapabilities.m_HWSupportedMetadataFlags;

-   pPicInfo->base.in_fence = m_pPipeFenceHandle;
-   pPicInfo->base.in_fence_value = m_CurrentSyncFenceValue;
   pPicInfo->base.input_format = pDX12EncodeContext->pPipeVideoBuffer->buffer_format;
   if( pDX12EncodeContext->bROI )
   {
--- a/src/gallium/frontends/mediafoundation/hmft_entrypoints.h
+++ b/src/gallium/frontends/mediafoundation/hmft_entrypoints.h
@ -582,7 +582,7 @@ class __declspec( uuid( HMFT_GUID ) ) CDX12EncHMFT : CMFD3DManager,
   ComPtr<ID3D12Fence> m_spStagingFence12;
   struct pipe_fence_handle *m_pPipeFenceHandle = nullptr;
   HANDLE m_hSharedFenceHandle = nullptr;
-   uint64_t m_NextSyncFenceValue = 1, m_CurrentSyncFenceValue = 0;
+   uint64_t m_CurrentSyncFenceValue = 1;

   // Cached encoder capabilities
   class encoder_capabilities m_EncoderCapabilities = {};