From 71f61ae7bb8af4caefdff9532652e8eac9ddb00f Mon Sep 17 00:00:00 2001
From: Sil Vilerino <sivileri@microsoft.com>
Date: Thu, 17 Jul 2025 11:05:43 -0400
Subject: [PATCH] mediafoundation: Do GPU-GPU encoder sync for two-pass input
 vpblit

Reviewed-by: Pohsiang (John) Hsu <pohhsu@microsoft.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36236>
---
 .../frontends/mediafoundation/context.h       |  3 ++
 .../frontends/mediafoundation/encode.cpp      | 48 ++++++++++++-------
 .../frontends/mediafoundation/encode_h264.cpp |  3 --
 .../frontends/mediafoundation/encode_hevc.cpp |  2 -
 .../mediafoundation/hmft_entrypoints.h        |  2 +-
 5 files changed, 35 insertions(+), 23 deletions(-)
diff --git a/src/gallium/frontends/mediafoundation/context.h b/src/gallium/frontends/mediafoundation/context.h
index 5f1149f374e..0c67449f5b0 100644
--- a/src/gallium/frontends/mediafoundation/context.h
+++ b/src/gallium/frontends/mediafoundation/context.h
@@ -52,6 +52,7 @@ typedef class DX12EncodeContext
    // producer (e.g decoder) can reuse the buffer in their pool
    pipe_video_buffer *pPipeVideoBuffer = nullptr;
    pipe_video_buffer *pDownscaledTwoPassPipeVideoBuffer = nullptr;
+   pipe_fence_handle *pDownscaledTwoPassPipeVideoBufferCompletionFence = nullptr;
    ComPtr<IMFMediaBuffer> spMediaBuffer;
    ComPtr<IMFD3D12SynchronizationObjectCommands> spSyncObjectCommands;
    ID3D12CommandQueue *pSyncObjectQueue = nullptr;   // weakref
@@ -194,5 +195,7 @@ typedef class DX12EncodeContext
          pDownscaledTwoPassPipeVideoBuffer->destroy( pDownscaledTwoPassPipeVideoBuffer );
       if( pPipeResourcePSNRStats )
          pVlScreen->pscreen->resource_destroy( pVlScreen->pscreen, pPipeResourcePSNRStats );
+      if (pDownscaledTwoPassPipeVideoBufferCompletionFence)
+         pVlScreen->pscreen->fence_reference( pVlScreen->pscreen, &pDownscaledTwoPassPipeVideoBufferCompletionFence, NULL );
    }
 } *LPDX12EncodeContext;
diff --git a/src/gallium/frontends/mediafoundation/encode.cpp b/src/gallium/frontends/mediafoundation/encode.cpp
index 1823a88c1c3..f97bd508c91 100644
--- a/src/gallium/frontends/mediafoundation/encode.cpp
+++ b/src/gallium/frontends/mediafoundation/encode.cpp
@@ -35,6 +35,8 @@ HRESULT
 CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12EncodeContext )
 {
    HRESULT hr = S_OK;
+   struct pipe_fence_handle *pPipeEncoderInputFenceHandle = nullptr;
+   UINT64 pipeEncoderInputFenceHandleValue = 0u;
    UINT unDiscontinuity = 0;
    LPDX12EncodeContext pDX12EncodeContext;
    UINT uiSubresourceIndex = 0;
@@ -126,7 +128,7 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
          CHECKHR_GOTO( spDeviceContext3.As( &spDeviceContext4 ), done );
 
          // This will signal the staging fence the d3d12 mesa backend is consuming
-         spDeviceContext4->Signal( m_spStagingFence11.Get(), m_NextSyncFenceValue );
+         spDeviceContext4->Signal( m_spStagingFence11.Get(), m_CurrentSyncFenceValue );
          debug_printf( "[dx12 hmft 0x%p] DX11 *shared* input sample\n", this );
       }
       else
@@ -165,7 +167,7 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
          // Since we're signaling from the D3D11 context on a shared fence, the signal
          // will happen after the d3d11 context copy is done.
          CHECKHR_GOTO( spDeviceContext3.As( &spDeviceContext4 ), done );
-         spDeviceContext4->Signal( m_spStagingFence11.Get(), m_NextSyncFenceValue );
+         spDeviceContext4->Signal( m_spStagingFence11.Get(), m_CurrentSyncFenceValue );
       }
    }
    else
@@ -188,7 +190,7 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
       // This will signal the staging fence the d3d12 mesa backend is consuming
       // Since we have a Wait() on spStagingQueue added by EnqueueResourceReadyWait, this will only happen after MF
       // triggered completion on the input
-      m_spStagingQueue->Signal( m_spStagingFence12.Get(), m_NextSyncFenceValue );
+      m_spStagingQueue->Signal( m_spStagingFence12.Get(), m_CurrentSyncFenceValue );
 
       winsysHandle.com_obj = spResource.Get();
       winsysHandle.type = WINSYS_HANDLE_TYPE_D3D12_RES;
@@ -202,8 +204,24 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
       debug_printf( "[dx12 hmft 0x%p] DX12 input sample\n", this );
    }
 
+
+   //
+   // If two pass is disabled, we just need to set the input fence and input fence value
+   // to the input texture fence/value.
+   //
+   // Otherwise, when two pass is enabled, we need to downscale the input texture
+   // for which we need to sync the readiness of the input texture against
+   // the vpblit input fence, and then sync the encoder readiness fence (e.g pPicInfo->base.in_fence)
+   // against the vpblit output fence
+   //
+
+   if( !m_pPipeVideoCodec->two_pass.enable || ( m_pPipeVideoCodec->two_pass.pow2_downscale_factor == 0 ) )
+   {
+      pPipeEncoderInputFenceHandle = m_pPipeFenceHandle;
+      pipeEncoderInputFenceHandleValue = m_CurrentSyncFenceValue;
+   }
 #if ENCODE_WITH_TWO_PASS
-   if( m_pPipeVideoCodec->two_pass.enable && ( m_pPipeVideoCodec->two_pass.pow2_downscale_factor > 0 ) )
+   else
    {
       // TODO: In case the app sends the downscaled input remove this
 
@@ -221,11 +239,12 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
       pDX12EncodeContext->pDownscaledTwoPassPipeVideoBuffer = m_pPipeContext->create_video_buffer( m_pPipeContext, &templ );
 
       struct pipe_vpp_desc vpblit_params = {};
-      struct pipe_fence_handle *dst_surface_fence = nullptr;
 
       vpblit_params.base.in_fence = m_pPipeFenceHandle;   // input surface fence (driver input)
-      vpblit_params.base.in_fence_value = m_CurrentSyncFenceValue;
-      vpblit_params.base.out_fence = &dst_surface_fence;          // Output surface fence (driver output)
+      vpblit_params.base.in_fence_value = pipeEncoderInputFenceHandleValue;   // input surface fence value (driver input)
+
+      vpblit_params.base.out_fence = &pPipeEncoderInputFenceHandle;          // Output surface fence (driver output)
+      pipeEncoderInputFenceHandleValue = 0u; // pPipeEncoderInputFenceHandle is PIPE_FD_TYPE_NATIVE_SYNC so doesn't need the value
 
       vpblit_params.base.input_format = pDX12EncodeContext->pPipeVideoBuffer->buffer_format;
       vpblit_params.base.output_format = pDX12EncodeContext->pDownscaledTwoPassPipeVideoBuffer->buffer_format;
@@ -254,14 +273,8 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
                       done );
       m_pPipeVideoBlitter->flush( m_pPipeVideoBlitter );
 
-      assert(dst_surface_fence);   // Driver must have returned the completion fence
-      // Wait for downscaling completion before encode can proceed
-
-      ASSERTED bool finished = m_pPipeVideoCodec->fence_wait( m_pPipeVideoCodec,
-                                                              dst_surface_fence,
-                                                              OS_TIMEOUT_INFINITE );
-      assert( finished );
-      m_pPipeVideoCodec->destroy_fence( m_pPipeVideoCodec, dst_surface_fence);
+      assert(pPipeEncoderInputFenceHandle);   // Driver must have returned the completion fence
+      pDX12EncodeContext->pDownscaledTwoPassPipeVideoBufferCompletionFence = pPipeEncoderInputFenceHandle; // For destruction of the fence later
    }
 #endif   // ENCODE_WITH_TWO_PASS
 
@@ -443,6 +456,8 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
    pDX12EncodeContext->pVlScreen = m_pVlScreen;   // weakref
 
    // Call the helper for encoder specific work
+   pDX12EncodeContext->encoderPicInfo.base.in_fence = pPipeEncoderInputFenceHandle;
+   pDX12EncodeContext->encoderPicInfo.base.in_fence_value = pipeEncoderInputFenceHandleValue;
    CHECKHR_GOTO( PrepareForEncodeHelper( pDX12EncodeContext, bReceivedDirtyRectBlob, dirtyRectFrameNum ), done );
 
    {
@@ -519,8 +534,7 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
       }
    }
 
-   // Set the fence to be waited on m_SyncFenceValue and increment the value for the next frame
-   m_CurrentSyncFenceValue = m_NextSyncFenceValue++;
+   m_CurrentSyncFenceValue++;   // increment the fence value for the next sync fence
 
 done:
    if( SUCCEEDED( hr ) )
diff --git a/src/gallium/frontends/mediafoundation/encode_h264.cpp b/src/gallium/frontends/mediafoundation/encode_h264.cpp
index d9e9a6266ba..631a0db2dfd 100644
--- a/src/gallium/frontends/mediafoundation/encode_h264.cpp
+++ b/src/gallium/frontends/mediafoundation/encode_h264.cpp
@@ -205,9 +205,6 @@ CDX12EncHMFT::PrepareForEncodeHelper( LPDX12EncodeContext pDX12EncodeContext, bo
    uint32_t rate_ctrl_active_layer_index = 0;
 
    pPicInfo->requested_metadata = m_EncoderCapabilities.m_HWSupportedMetadataFlags;
-
-   pPicInfo->base.in_fence = m_pPipeFenceHandle;
-   pPicInfo->base.in_fence_value = m_CurrentSyncFenceValue;
    pPicInfo->base.input_format = pDX12EncodeContext->pPipeVideoBuffer->buffer_format;
 
    UpdateH264EncPictureDesc( pPicInfo,
diff --git a/src/gallium/frontends/mediafoundation/encode_hevc.cpp b/src/gallium/frontends/mediafoundation/encode_hevc.cpp
index d20f354522c..e2a5e3d870b 100644
--- a/src/gallium/frontends/mediafoundation/encode_hevc.cpp
+++ b/src/gallium/frontends/mediafoundation/encode_hevc.cpp
@@ -248,8 +248,6 @@ CDX12EncHMFT::PrepareForEncodeHelper( LPDX12EncodeContext pDX12EncodeContext, bo
 
    pPicInfo->requested_metadata = m_EncoderCapabilities.m_HWSupportedMetadataFlags;
 
-   pPicInfo->base.in_fence = m_pPipeFenceHandle;
-   pPicInfo->base.in_fence_value = m_CurrentSyncFenceValue;
    pPicInfo->base.input_format = pDX12EncodeContext->pPipeVideoBuffer->buffer_format;
    if( pDX12EncodeContext->bROI )
    {
diff --git a/src/gallium/frontends/mediafoundation/hmft_entrypoints.h b/src/gallium/frontends/mediafoundation/hmft_entrypoints.h
index 04ded442968..791634ed09b 100644
--- a/src/gallium/frontends/mediafoundation/hmft_entrypoints.h
+++ b/src/gallium/frontends/mediafoundation/hmft_entrypoints.h
@@ -582,7 +582,7 @@ class __declspec( uuid( HMFT_GUID ) ) CDX12EncHMFT : CMFD3DManager,
    ComPtr<ID3D12Fence> m_spStagingFence12;
    struct pipe_fence_handle *m_pPipeFenceHandle = nullptr;
    HANDLE m_hSharedFenceHandle = nullptr;
-   uint64_t m_NextSyncFenceValue = 1, m_CurrentSyncFenceValue = 0;
+   uint64_t m_CurrentSyncFenceValue = 1;
 
    // Cached encoder capabilities
    class encoder_capabilities m_EncoderCapabilities = {};