mediafoundation: Implement full/lower resolution two pass

Reviewed-by: Yubo Xie <yuboxie@microsoft.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35264>
2026-05-05 20:28:04 +02:00 · 2025-05-23 11:48:18 -04:00 · 2025-05-23 11:48:18 -04:00 · caa74854ec
commit caa74854ec
parent d7022698ff
13 changed files with 301 additions and 11 deletions
--- a/src/gallium/frontends/mediafoundation/context.h
+++ b/src/gallium/frontends/mediafoundation/context.h
@ -50,6 +50,7 @@ typedef class DX12EncodeContext
   // and then signal EnqueueResourceRelease so the media
   // producer (e.g decoder) can reuse the buffer in their pool
   pipe_video_buffer *pPipeVideoBuffer = nullptr;
+   pipe_video_buffer *pDownscaledTwoPassPipeVideoBuffer = nullptr;
   ComPtr<IMFMediaBuffer> spMediaBuffer;
   ComPtr<IMFD3D12SynchronizationObjectCommands> spSyncObjectCommands;
   ID3D12CommandQueue *pSyncObjectQueue = nullptr;   // weakref
@ -188,5 +189,7 @@ typedef class DX12EncodeContext
         pVlScreen->pscreen->resource_destroy( pVlScreen->pscreen, pPipeResourceRCBitAllocMapStats );
      if( pPipeVideoBuffer )
         pPipeVideoBuffer->destroy( pPipeVideoBuffer );
+      if( pDownscaledTwoPassPipeVideoBuffer )
+         pDownscaledTwoPassPipeVideoBuffer->destroy( pDownscaledTwoPassPipeVideoBuffer );
   }
 } *LPDX12EncodeContext;
--- a/src/gallium/frontends/mediafoundation/encode.cpp
+++ b/src/gallium/frontends/mediafoundation/encode.cpp
@ -183,6 +183,60 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
      debug_printf( "[dx12 hmft 0x%p] DX12 input sample\n", this );
   }

+#if ENCODE_WITH_TWO_PASS
+   if (m_pPipeVideoCodec->two_pass.enable &&
+      (m_pPipeVideoCodec->two_pass.pow2_downscale_factor > 0))
+   {
+      // TODO: In case the app sends the downscaled input remove this
+
+      //
+      // Use VPBlit to downscale the input texture to generate the 1st pass
+      // downscaled input texture
+      //
+
+      struct pipe_video_buffer templ = {};
+      templ.buffer_format = pDX12EncodeContext->pPipeVideoBuffer->buffer_format;
+      templ.width = static_cast<uint32_t>(std::ceil(pDX12EncodeContext->pPipeVideoBuffer->width / (1 << m_pPipeVideoCodec->two_pass.pow2_downscale_factor)));
+      templ.height = static_cast<uint32_t>(std::ceil(pDX12EncodeContext->pPipeVideoBuffer->height / (1 << m_pPipeVideoCodec->two_pass.pow2_downscale_factor)));
+      pDX12EncodeContext->pDownscaledTwoPassPipeVideoBuffer = m_pPipeContext->create_video_buffer(m_pPipeContext, &templ);
+
+      struct pipe_vpp_desc vpblit_params = {};
+      struct pipe_fence_handle *dst_surface_fence = nullptr;
+
+      vpblit_params.src_surface_fence = m_pPipeFenceHandle; // input surface fence (driver input)
+      vpblit_params.base.fence = &dst_surface_fence; // Output surface fence (driver output)
+
+      vpblit_params.base.input_format = pDX12EncodeContext->pPipeVideoBuffer->buffer_format;
+      vpblit_params.base.output_format = pDX12EncodeContext->pDownscaledTwoPassPipeVideoBuffer->buffer_format;
+      vpblit_params.src_region.x0 = 0u;
+      vpblit_params.src_region.y0 = 0u;
+      vpblit_params.src_region.x1 = pDX12EncodeContext->pPipeVideoBuffer->width;
+      vpblit_params.src_region.y1 = pDX12EncodeContext->pPipeVideoBuffer->height;
+
+      vpblit_params.dst_region.x0 = 0u;
+      vpblit_params.dst_region.y0 = 0u;
+      vpblit_params.dst_region.x1 = pDX12EncodeContext->pDownscaledTwoPassPipeVideoBuffer->width;
+      vpblit_params.dst_region.y1 = pDX12EncodeContext->pDownscaledTwoPassPipeVideoBuffer->height;
+
+      m_pPipeVideoBlitter->begin_frame(m_pPipeVideoBlitter,
+                                       pDX12EncodeContext->pDownscaledTwoPassPipeVideoBuffer,
+                                       &vpblit_params.base);
+
+      CHECKBOOL_GOTO( (m_pPipeVideoBlitter->process_frame(m_pPipeVideoBlitter, pDX12EncodeContext->pPipeVideoBuffer, &vpblit_params) == 0), MF_E_UNEXPECTED, done );
+      CHECKBOOL_GOTO( (m_pPipeVideoBlitter->end_frame(m_pPipeVideoBlitter, pDX12EncodeContext->pDownscaledTwoPassPipeVideoBuffer, &vpblit_params.base) == 0), MF_E_UNEXPECTED, done );
+      m_pPipeVideoBlitter->flush(m_pPipeVideoBlitter);
+      
+      assert(*vpblit_params.base.fence); // Driver must have returned the completion fence
+      // Wait for downscaling completion before encode can proceed
+
+      ASSERTED bool finished = m_pPipeVideoCodec->context->screen->fence_finish(m_pPipeVideoCodec->context->screen,
+                                                                                NULL, /*passing non NULL resets GRFX context*/
+                                                                                *vpblit_params.base.fence,
+                                                                                OS_TIMEOUT_INFINITE );
+      assert(finished);
+   }
+#endif // ENCODE_WITH_TWO_PASS
+
   // validate texture dimensions with surface alignment here for now, will add handling for non-aligned textures later
   if( textureWidth % surfaceWidthAlignment != 0 || textureHeight % surfaceHeightAlignment != 0 )
   {
--- a/src/gallium/frontends/mediafoundation/encode_h264.cpp
+++ b/src/gallium/frontends/mediafoundation/encode_h264.cpp
@ -304,6 +304,13 @@ CDX12EncHMFT::PrepareForEncodeHelper( LPDX12EncodeContext pDX12EncodeContext, bo
   pPicInfo->dpb_size = static_cast<uint8_t>( cur_frame_desc->dpb_snapshot.size() );
   assert( pPicInfo->dpb_size <= PIPE_H264_MAX_DPB_SIZE );
   memcpy( &pPicInfo->dpb[0], cur_frame_desc->dpb_snapshot.data(), sizeof( cur_frame_desc->dpb_snapshot[0] ) * pPicInfo->dpb_size );
+   for( unsigned i = 0; i < pPicInfo->dpb_size; i++ )
+   {
+      if (pPicInfo->dpb[i].pic_order_cnt == cur_frame_desc->gop_info->picture_order_count)
+      {
+         pPicInfo->dpb_curr_pic = static_cast<uint8_t>(i);
+      }
+   }

   if( ( pPicInfo->picture_type == PIPE_H2645_ENC_PICTURE_TYPE_P ) || ( pPicInfo->picture_type == PIPE_H2645_ENC_PICTURE_TYPE_B ) )
   {
@ -498,6 +505,13 @@ CDX12EncHMFT::PrepareForEncodeHelper( LPDX12EncodeContext pDX12EncodeContext, bo
      static_cast<uint32_t>( std::ceil( ( static_cast<float>( 100 - m_uiQualityVsSpeed ) / 100.0f ) *
                                        static_cast<double>( m_EncoderCapabilities.m_uiMaxHWSupportedQualityVsSpeedLevel ) ) ) );

+   if (m_pPipeVideoCodec->two_pass.enable &&
+      (m_pPipeVideoCodec->two_pass.pow2_downscale_factor > 0))
+   {
+      pPicInfo->twopass_frame_config.downscaled_source = pDX12EncodeContext->pDownscaledTwoPassPipeVideoBuffer;
+      pPicInfo->twopass_frame_config.skip_1st_pass = false;
+   }
+
   // Slices data
   height_in_blocks = ( ( pDX12EncodeContext->pPipeVideoBuffer->height + 15 ) >> 4 );
   width_in_blocks = ( ( pDX12EncodeContext->pPipeVideoBuffer->width + 15 ) >> 4 );
@ -1136,6 +1150,7 @@ CDX12EncHMFT::CreateGOPTracker( uint32_t textureWidth, uint32_t textureHeight )
   uint32_t MaxHWL1Ref = m_EncoderCapabilities.m_uiMaxHWSupportedL1References;
   MaxHWL0Ref = std::min( 1u, MaxHWL0Ref );   // we only support 1
   MaxHWL1Ref = 0;
+   std::unique_ptr<dpb_buffer_manager> upTwoPassDPBManager;

   SAFE_DELETE( m_pGOPTracker );
   // B Frame not supported by HW
@ -1163,6 +1178,18 @@ CDX12EncHMFT::CreateGOPTracker( uint32_t textureWidth, uint32_t textureHeight )
   assert( MaxHWL0Ref <= m_uiMaxNumRefFrame );
   assert( MaxHWL1Ref <= m_uiMaxNumRefFrame );

+   if (m_pPipeVideoCodec->two_pass.enable &&
+      (m_pPipeVideoCodec->two_pass.pow2_downscale_factor > 0))
+   {
+      upTwoPassDPBManager = std::make_unique<dpb_buffer_manager>(
+        m_pPipeVideoCodec,
+        static_cast<unsigned>(std::ceil(textureWidth / (1 << m_pPipeVideoCodec->two_pass.pow2_downscale_factor))),
+        static_cast<unsigned>(std::ceil(textureHeight / (1 << m_pPipeVideoCodec->two_pass.pow2_downscale_factor))),
+        ConvertProfileToFormat( m_pPipeVideoCodec->profile ),
+        m_pPipeVideoCodec->max_references + 1 /*curr pic*/ +
+           ( m_bLowLatency ? 0 : MFT_INPUT_QUEUE_DEPTH ) /*MFT process input queue depth for delayed in flight recon pic release*/ );
+   }
+
   m_pGOPTracker = new reference_frames_tracker_h264( m_pPipeVideoCodec,
                                                      textureWidth,
                                                      textureHeight,
@ -1175,7 +1202,8 @@ CDX12EncHMFT::CreateGOPTracker( uint32_t textureWidth, uint32_t textureHeight )
                                                      MaxHWL1Ref,
                                                      m_pPipeVideoCodec->max_references,
                                                      m_uiMaxLongTermReferences,
-                                                      m_gpuFeatureFlags.m_bH264SendUnwrappedPOC );
+                                                      m_gpuFeatureFlags.m_bH264SendUnwrappedPOC,
+                                                      std::move(upTwoPassDPBManager) );

   CHECKNULL_GOTO( m_pGOPTracker, MF_E_INVALIDMEDIATYPE, done );

--- a/src/gallium/frontends/mediafoundation/encode_hevc.cpp
+++ b/src/gallium/frontends/mediafoundation/encode_hevc.cpp
@ -299,6 +299,11 @@ CDX12EncHMFT::PrepareForEncodeHelper( LPDX12EncodeContext pDX12EncodeContext, bo
      pPicInfo->dpb[i].pic_order_cnt = cur_frame_desc->dpb_snapshot[i].pic_order_cnt;
      pPicInfo->dpb[i].is_ltr = cur_frame_desc->dpb_snapshot[i].is_ltr;
      pPicInfo->dpb[i].buffer = cur_frame_desc->dpb_snapshot[i].buffer;
+      pPicInfo->dpb[i].downscaled_buffer = cur_frame_desc->dpb_snapshot[i].downscaled_buffer;
+      if (pPicInfo->dpb[i].pic_order_cnt == cur_frame_desc->gop_info->picture_order_count)
+      {
+         pPicInfo->dpb_curr_pic = static_cast<uint8_t>(i);
+      }
   }

   pDX12EncodeContext->longTermReferenceFrameInfo = cur_frame_desc->gop_info->long_term_reference_frame_info;
@ -387,6 +392,13 @@ CDX12EncHMFT::PrepareForEncodeHelper( LPDX12EncodeContext pDX12EncodeContext, bo
      static_cast<uint32_t>( std::ceil( ( static_cast<float>( 100 - m_uiQualityVsSpeed ) / 100.0f ) *
                                        static_cast<double>( m_EncoderCapabilities.m_uiMaxHWSupportedQualityVsSpeedLevel ) ) ) );

+   if (m_pPipeVideoCodec->two_pass.enable &&
+      (m_pPipeVideoCodec->two_pass.pow2_downscale_factor > 0))
+   {
+      pPicInfo->twopass_frame_config.downscaled_source = pDX12EncodeContext->pDownscaledTwoPassPipeVideoBuffer;
+      pPicInfo->twopass_frame_config.skip_1st_pass = false;
+   }
+
   // Setup Level, not sure why this is represented twice on the codec?
   pPicInfo->seq.general_level_idc = static_cast<uint8_t>( m_pPipeVideoCodec->level );

@ -910,6 +922,7 @@ CDX12EncHMFT::CreateGOPTracker( uint32_t textureWidth, uint32_t textureHeight )
   uint32_t MaxHWL1Ref = m_EncoderCapabilities.m_uiMaxHWSupportedL1References;
   MaxHWL0Ref = std::min( 1u, MaxHWL0Ref );   // we only support 1
   MaxHWL1Ref = 0;
+   std::unique_ptr<dpb_buffer_manager> upTwoPassDPBManager;

   SAFE_DELETE( m_pGOPTracker );
   // B Frame not supported by HW
@ -937,6 +950,18 @@ CDX12EncHMFT::CreateGOPTracker( uint32_t textureWidth, uint32_t textureHeight )
   assert( MaxHWL0Ref <= m_uiMaxNumRefFrame );
   assert( MaxHWL1Ref <= m_uiMaxNumRefFrame );

+   if (m_pPipeVideoCodec->two_pass.enable &&
+      (m_pPipeVideoCodec->two_pass.pow2_downscale_factor > 0))
+   {
+      upTwoPassDPBManager = std::make_unique<dpb_buffer_manager>(
+        m_pPipeVideoCodec,
+        static_cast<unsigned>(std::ceil(textureWidth / (1 << m_pPipeVideoCodec->two_pass.pow2_downscale_factor))),
+        static_cast<unsigned>(std::ceil(textureHeight / (1 << m_pPipeVideoCodec->two_pass.pow2_downscale_factor))),
+        ConvertProfileToFormat( m_pPipeVideoCodec->profile ),
+        m_pPipeVideoCodec->max_references + 1 /*curr pic*/ +
+           ( m_bLowLatency ? 0 : MFT_INPUT_QUEUE_DEPTH ) /*MFT process input queue depth for delayed in flight recon pic release*/ );
+   }
+
   m_pGOPTracker = new reference_frames_tracker_hevc( m_pPipeVideoCodec,
                                                      textureWidth,
                                                      textureHeight,
@ -948,7 +973,8 @@ CDX12EncHMFT::CreateGOPTracker( uint32_t textureWidth, uint32_t textureHeight )
                                                      MaxHWL0Ref,
                                                      MaxHWL1Ref,
                                                      m_pPipeVideoCodec->max_references,
-                                                      m_uiMaxLongTermReferences );
+                                                      m_uiMaxLongTermReferences,
+                                                      std::move(upTwoPassDPBManager) );
   CHECKNULL_GOTO( m_pGOPTracker, MF_E_INVALIDMEDIATYPE, done );

 done:
--- a/src/gallium/frontends/mediafoundation/encoder_capabilities.cpp
+++ b/src/gallium/frontends/mediafoundation/encoder_capabilities.cpp
@ -144,4 +144,8 @@ encoder_capabilities::initialize( pipe_screen *pScreen, pipe_video_profile video
   // TODO: We should get the supported slice mode from pipe, but currently, it doesn't support.
   //       Currently, dx12MFT only support mode_blocks, so we initialize it like this.
   m_HWSupportedSliceModes = EnumMask<pipe_video_slice_mode> { PIPE_VIDEO_SLICE_MODE_BLOCKS };
+
+
+   m_TwoPassSupport.value =
+      pScreen->get_video_param( pScreen, videoProfile, PIPE_VIDEO_ENTRYPOINT_ENCODE, PIPE_VIDEO_CAP_ENC_TWO_PASS );
 }
--- a/src/gallium/frontends/mediafoundation/encoder_capabilities.h
+++ b/src/gallium/frontends/mediafoundation/encoder_capabilities.h
@ -127,4 +127,7 @@ class encoder_capabilities

   // Supported slice mode
   EnumMask<pipe_video_slice_mode> m_HWSupportedSliceModes {};
+
+   // Two pass encode
+   union pipe_enc_cap_two_pass m_TwoPassSupport = {};
 };
--- a/src/gallium/frontends/mediafoundation/hmft_entrypoints.h
+++ b/src/gallium/frontends/mediafoundation/hmft_entrypoints.h
@ -66,6 +66,10 @@ using namespace concurrency;
 using namespace Microsoft::WRL;
 using Microsoft::WRL::ComPtr;

+#define ENCODE_WITH_TWO_PASS 0
+#define ENCODE_WITH_TWO_PASS_LOWEST_RES 1
+#define ENCODE_WITH_TWO_PASS_EXTERNAL_DPB_RECON_SCALE 1
+
 #define NUM_INPUT_TYPES 3

 extern MFT_REGISTER_TYPE_INFO rgOutputInfo;
@ -473,6 +477,7 @@ class __declspec( uuid( HMFT_GUID ) ) CDX12EncHMFT : CMFD3DManager,
   UINT32 m_uiVideoOutputBitsUsedMapBlockSize = 0;

   struct pipe_video_codec *m_pPipeVideoCodec = nullptr;
+   struct pipe_video_codec *m_pPipeVideoBlitter = nullptr;
   reference_frames_tracker *m_pGOPTracker = nullptr;
   enum pipe_format m_inputPipeFormat = PIPE_FORMAT_NV12;

--- a/src/gallium/frontends/mediafoundation/mftransform.cpp
+++ b/src/gallium/frontends/mediafoundation/mftransform.cpp
@ -859,6 +859,34 @@ CDX12EncHMFT::InitializeEncoder( pipe_video_profile videoProfile, UINT32 Width,
         CHECKHR_GOTO( E_INVALIDARG, done );
      }

+#if ENCODE_WITH_TWO_PASS
+      encoderSettings.two_pass.enable = 1;
+#if ENCODE_WITH_TWO_PASS_LOWEST_RES
+      encoderSettings.two_pass.pow2_downscale_factor = m_EncoderCapabilities.m_TwoPassSupport.bits.max_pow2_downscale_factor;
+#else
+      encoderSettings.two_pass.pow2_downscale_factor = m_EncoderCapabilities.m_TwoPassSupport.bits.min_pow2_downscale_factor;
+#endif // ENCODE_WITH_TWO_PASS_LOWEST_RES
+
+#if ENCODE_WITH_TWO_PASS_EXTERNAL_DPB_RECON_SCALE
+      encoderSettings.two_pass.skip_1st_dpb_texture = m_EncoderCapabilities.m_TwoPassSupport.bits.supports_1pass_recon_writing_skip;
+#else
+      encoderSettings.two_pass.skip_1st_dpb_texture = 0u;
+#endif // ENCODE_WITH_TWO_PASS_EXTERNAL_DPB_RECON_SCALE
+
+   if (encoderSettings.two_pass.enable &&
+      (encoderSettings.two_pass.pow2_downscale_factor > 0))
+   {
+      struct pipe_video_codec blitterSettings = {};
+      blitterSettings.entrypoint = PIPE_VIDEO_ENTRYPOINT_PROCESSING;
+      blitterSettings.width = Width;
+      blitterSettings.height = Height;
+      CHECKNULL_GOTO( m_pPipeVideoBlitter = m_pPipeContext->create_video_codec( m_pPipeContext, &blitterSettings ),
+                      MF_E_UNEXPECTED,
+                      done );
+   }
+
+#endif // ENCODE_WITH_TWO_PASS
+
      CHECKNULL_GOTO( m_pPipeVideoCodec = m_pPipeContext->create_video_codec( m_pPipeContext, &encoderSettings ),
                      MF_E_UNEXPECTED,
                      done );
@ -927,6 +955,12 @@ CDX12EncHMFT::CleanupEncoder( void )
      m_pPipeVideoCodec = nullptr;
   }

+   if( m_pPipeVideoBlitter )
+   {
+      m_pPipeVideoBlitter->destroy( m_pPipeVideoBlitter );
+      m_pPipeVideoBlitter = nullptr;
+   }
+
   SAFE_DELETE( m_pGOPTracker );
 }

@ -1130,7 +1164,86 @@ CDX12EncHMFT::xThreadProc( void *pCtx )
                                                          pDX12EncodeContext->pAsyncCookie,
                                                          &encoded_bitstream_bytes,
                                                          &metadata );
+
+#if (VIDEO_CODEC_H264ENC || VIDEO_CODEC_H265ENC)
+                  if (pThis->m_pPipeVideoCodec->two_pass.enable &&
+                     (pThis->m_pPipeVideoCodec->two_pass.pow2_downscale_factor > 0) &&
+                     (pThis->m_pPipeVideoCodec->two_pass.skip_1st_dpb_texture))
+                  {
+                     // In this case, when two pass is enabled for a lower resolution 1st pass
+                     // AND we select skip_1st_dpb_texture, that means that
+                     // the driver will _NOT_ write the 1st pass recon pic output to
+                     // the downscaled_buffer object we send in the dpb_snapshot,
+                     // and instead we need to to a VPBlit scale from the dpb.buffer
+                     // into dpb.downscaled_buffer ourselves
+
+                     struct pipe_vpp_desc vpblit_params = {};
+                     struct pipe_fence_handle *dst_surface_fence = nullptr;
+
+                     vpblit_params.src_surface_fence = NULL; // No need, we _just_ waited for completion above before get_feedback
+                     vpblit_params.base.fence = &dst_surface_fence; // Output surface fence (driver output)
+
+#if VIDEO_CODEC_H264ENC
+                     auto &cur_pic_dpb_entry = pDX12EncodeContext->encoderPicInfo.h264enc.dpb[pDX12EncodeContext->encoderPicInfo.h265enc.dpb_curr_pic];
+#elif VIDEO_CODEC_H265ENC
+                     auto &cur_pic_dpb_entry = pDX12EncodeContext->encoderPicInfo.h265enc.dpb[pDX12EncodeContext->encoderPicInfo.h265enc.dpb_curr_pic];
+#endif
+
+                     vpblit_params.base.input_format = cur_pic_dpb_entry.buffer->buffer_format;
+                     vpblit_params.base.output_format = cur_pic_dpb_entry.downscaled_buffer->buffer_format;
+                     vpblit_params.src_region.x0 = 0u;
+                     vpblit_params.src_region.y0 = 0u;
+                     vpblit_params.src_region.x1 = cur_pic_dpb_entry.buffer->width;
+                     vpblit_params.src_region.y1 = cur_pic_dpb_entry.buffer->height;
+
+                     vpblit_params.dst_region.x0 = 0u;
+                     vpblit_params.dst_region.y0 = 0u;
+                     vpblit_params.dst_region.x1 = cur_pic_dpb_entry.downscaled_buffer->width;
+                     vpblit_params.dst_region.y1 = cur_pic_dpb_entry.downscaled_buffer->height;
+
+                     pThis->m_pPipeVideoBlitter->begin_frame(pThis->m_pPipeVideoBlitter,
+                                                             pDX12EncodeContext->pDownscaledTwoPassPipeVideoBuffer,
+                                                             &vpblit_params.base);
+
+                     if (pThis->m_pPipeVideoBlitter->process_frame(pThis->m_pPipeVideoBlitter, cur_pic_dpb_entry.buffer, &vpblit_params) != 0)
+                     {
+                        assert( false );
+                        pThis->QueueEvent( MEError, GUID_NULL, E_FAIL, nullptr );
+                        bHasEncodingError = TRUE;
+                        delete pDX12EncodeContext;
+                        break;   // break out of while try_pop
+                     }
+
+                     if (pThis->m_pPipeVideoBlitter->end_frame(pThis->m_pPipeVideoBlitter, pDX12EncodeContext->pDownscaledTwoPassPipeVideoBuffer, &vpblit_params.base) != 0)
+                     {
+                        assert( false );
+                        pThis->QueueEvent( MEError, GUID_NULL, E_FAIL, nullptr );
+                        bHasEncodingError = TRUE;
+                        delete pDX12EncodeContext;
+                        break;   // break out of while try_pop
+                     }
+
+                     pThis->m_pPipeVideoBlitter->flush(pThis->m_pPipeVideoBlitter);
+
+                     assert(*vpblit_params.base.fence); // Driver must have returned the completion fence
+                     // Wait for downscaling completion before encode can proceed
+
+                     // TODO: This can probably be done better later as plumbing
+                     // the two pass pipe into the MFT frontend API properties
+                     // Instead of waiting on the CPU here for the fence, can probably
+                     // queue the fence wait into the next frame's encode GPU fence wait
+
+                     ASSERTED bool finished = pThis->m_pPipeVideoCodec->context->screen->fence_finish(pThis->m_pPipeVideoCodec->context->screen,
+                                                                                                      NULL, /*passing non NULL resets GRFX context*/
+                                                                                                      *vpblit_params.base.fence,
+                                                                                                       OS_TIMEOUT_INFINITE );
+                     assert(finished);
+                  }
+#endif // (VIDEO_CODEC_H264ENC || VIDEO_CODEC_H265ENC)
+
+                  // Only release the reconpic AFTER working on it for two pass if needed
                  pThis->m_pGOPTracker->release_reconpic( pDX12EncodeContext->pAsyncDPBToken );
+
               }
            }
         }
--- a/src/gallium/frontends/mediafoundation/reference_frames_tracker.h
+++ b/src/gallium/frontends/mediafoundation/reference_frames_tracker.h
@ -40,6 +40,7 @@ class reference_frames_tracker_dpb_async_token
 {
 public:
   std::vector<pipe_video_buffer *> dpb_buffers_to_release;
+   std::vector<pipe_video_buffer *> dpb_downscaled_buffers_to_release;
 };

 class reference_frames_tracker
--- a/src/gallium/frontends/mediafoundation/reference_frames_tracker_h264.cpp
+++ b/src/gallium/frontends/mediafoundation/reference_frames_tracker_h264.cpp
@ -44,7 +44,8 @@ reference_frames_tracker_h264::reference_frames_tracker_h264( struct pipe_video_
                                                              uint32_t MaxL1References,
                                                              uint32_t MaxDPBCapacity,
                                                              uint32_t MaxLongTermReferences,
-                                                              bool bSendUnwrappedPOC )
+                                                              bool bSendUnwrappedPOC,
+                                                              std::unique_ptr<dpb_buffer_manager> upTwoPassDPBManager)
   : m_codec( codec ),
     m_MaxL0References( MaxL0References ),
     m_MaxL1References( MaxL1References ),
@ -58,7 +59,8 @@ reference_frames_tracker_h264::reference_frames_tracker_h264( struct pipe_video_
        textureHeight,
        ConvertProfileToFormat( m_codec->profile ),
        m_codec->max_references + 1 /*curr pic*/ +
-           ( bLowLatency ? 0 : MFT_INPUT_QUEUE_DEPTH ) /*MFT process input queue depth for delayed in flight recon pic release*/ )
+           ( bLowLatency ? 0 : MFT_INPUT_QUEUE_DEPTH ) /*MFT process input queue depth for delayed in flight recon pic release*/ ),
+     m_upTwoPassDPBManager(std::move(upTwoPassDPBManager))
 {
   assert( m_MaxL0References == 1 );
   m_bLayerCountSet = bLayerCountSet;
@ -92,6 +94,12 @@ reference_frames_tracker_h264::release_reconpic( reference_frames_tracker_dpb_as
      for( unsigned i = 0; i < pAsyncDPBToken->dpb_buffers_to_release.size(); i++ )
         m_DPBManager.release_dpb_buffer( pAsyncDPBToken->dpb_buffers_to_release[i] );

+      if (m_upTwoPassDPBManager)
+      {
+         for( unsigned i = 0; i < pAsyncDPBToken->dpb_downscaled_buffers_to_release.size(); i++ )
+            m_upTwoPassDPBManager->release_dpb_buffer( pAsyncDPBToken->dpb_downscaled_buffers_to_release[i] );
+      }
+
      delete pAsyncDPBToken;
   }
 }
@ -110,6 +118,7 @@ reference_frames_tracker_h264::begin_frame( reference_frames_tracker_dpb_async_t
                                            uint32_t dirtyRectFrameNum )
 {
   struct pipe_video_buffer *curframe_dpb_buffer = m_DPBManager.get_fresh_dpb_buffer();
+   struct pipe_video_buffer *curframe_dpb_downscaled_buffer = m_upTwoPassDPBManager ? m_upTwoPassDPBManager->get_fresh_dpb_buffer() : NULL;

   if( markLTR )
   {
@ -135,7 +144,11 @@ reference_frames_tracker_h264::begin_frame( reference_frames_tracker_dpb_async_t
   if( m_frame_state_descriptor.gop_info->frame_type == PIPE_H2645_ENC_PICTURE_TYPE_IDR )
   {
      for( auto &i : m_PrevFramesInfos )
+      {
         ( pAsyncDPBToken )->dpb_buffers_to_release.push_back( i.buffer );
+         if (m_upTwoPassDPBManager)
+            ( pAsyncDPBToken )->dpb_downscaled_buffers_to_release.push_back( i.downscaled_buffer );
+      }
      m_PrevFramesInfos.clear();
      m_checkValidSTR = false;
      m_ValidSTRFrameNumNoWrap = UINT64_MAX;
@ -205,6 +218,7 @@ reference_frames_tracker_h264::begin_frame( reference_frames_tracker_dpb_async_t
         m_PrevFramesInfos[i].temporal_id,
         m_PrevFramesInfos[i].is_ltr,
         m_PrevFramesInfos[i].buffer,
+         m_PrevFramesInfos[i].downscaled_buffer,
      } );
      m_frame_state_descriptor.dirty_rect_frame_num.push_back( m_PrevFramesInfos[i].dirty_rect_frame_num );
   }
@ -219,6 +233,7 @@ reference_frames_tracker_h264::begin_frame( reference_frames_tracker_dpb_async_t
         m_frame_state_descriptor.gop_info->temporal_id,
         isLTR,
         curframe_dpb_buffer,
+         curframe_dpb_downscaled_buffer,
      } );
      m_frame_state_descriptor.dirty_rect_frame_num.push_back( dirtyRectFrameNum );

@ -257,6 +272,8 @@ reference_frames_tracker_h264::begin_frame( reference_frames_tracker_dpb_async_t
               unreachable( "Unexpected zero STR" );
            }
            ( pAsyncDPBToken )->dpb_buffers_to_release.push_back( entryToRemove->buffer );
+            if (m_upTwoPassDPBManager)
+               ( pAsyncDPBToken )->dpb_downscaled_buffers_to_release.push_back( entryToRemove->downscaled_buffer );
            m_PrevFramesInfos.erase( entryToRemove );
         }
      }
@ -280,6 +297,8 @@ reference_frames_tracker_h264::begin_frame( reference_frames_tracker_dpb_async_t
               unreachable( "Unexpected LTR replacement in Bitmap but not in PrevFramesInfos" );
            }
            ( pAsyncDPBToken )->dpb_buffers_to_release.push_back( entryToRemove->buffer );
+            if (m_upTwoPassDPBManager)
+               ( pAsyncDPBToken )->dpb_downscaled_buffers_to_release.push_back( entryToRemove->downscaled_buffer );
            m_PrevFramesInfos.erase( entryToRemove );
         }
         MarkLTRIndex( m_frame_state_descriptor.gop_info->ltr_index );
@ -292,11 +311,14 @@ reference_frames_tracker_h264::begin_frame( reference_frames_tracker_dpb_async_t
                                     m_frame_state_descriptor.gop_info->ltr_index,
                                     m_frame_state_descriptor.gop_info->temporal_id,
                                     dirtyRectFrameNum,
-                                     curframe_dpb_buffer } );
+                                     curframe_dpb_buffer,
+                                     curframe_dpb_downscaled_buffer } );
   }
   else
   {
      ( pAsyncDPBToken )->dpb_buffers_to_release.push_back( curframe_dpb_buffer );
+      if (m_upTwoPassDPBManager)
+         ( pAsyncDPBToken )->dpb_downscaled_buffers_to_release.push_back( curframe_dpb_downscaled_buffer );
   }
 }

--- a/src/gallium/frontends/mediafoundation/reference_frames_tracker_h264.h
+++ b/src/gallium/frontends/mediafoundation/reference_frames_tracker_h264.h
@ -27,6 +27,7 @@

 #include <deque>
 #include <queue>
+#include <memory>

 #include "dpb_buffer_manager.h"
 #include "reference_frames_tracker.h"
@ -74,6 +75,7 @@ class reference_frames_tracker_h264 : public reference_frames_tracker
      uint8_t temporal_id;
      uint32_t dirty_rect_frame_num;
      struct pipe_video_buffer *buffer;
+      struct pipe_video_buffer *downscaled_buffer;
   } PrevFrameInfo;

   // used to sort PrevFrameInfo array
@ -115,7 +117,8 @@ class reference_frames_tracker_h264 : public reference_frames_tracker
                                  uint32_t MaxL1References,
                                  uint32_t MaxDPBCapacity,
                                  uint32_t MaxLongTermReferences,
-                                  bool bSendUnwrappedPOC );
+                                  bool bSendUnwrappedPOC,
+                                  std::unique_ptr<dpb_buffer_manager> upTwoPassDPBManager = nullptr );

 private:
   uint32_t PrepareFrameRefLists( bool useLTR, uint32_t useLTRBitmap );
@ -151,6 +154,7 @@ class reference_frames_tracker_h264 : public reference_frames_tracker
   std::deque<struct PrevFrameInfo> m_PrevFramesInfos;
   struct pipe_video_codec *m_codec;
   dpb_buffer_manager m_DPBManager;
+   std::unique_ptr<dpb_buffer_manager> m_upTwoPassDPBManager;

   bool m_pendingMarkLTR = false;
   uint32_t m_pendingMarkLTRIndex = 0;
--- a/src/gallium/frontends/mediafoundation/reference_frames_tracker_hevc.cpp
+++ b/src/gallium/frontends/mediafoundation/reference_frames_tracker_hevc.cpp
@ -27,7 +27,6 @@
 #include <iterator>
 #include "hmft_entrypoints.h"
 #include "wpptrace.h"
-
 #include "reference_frames_tracker_hevc.tmh"

 reference_frames_tracker_hevc::reference_frames_tracker_hevc( struct pipe_video_codec *codec,
@ -41,7 +40,8 @@ reference_frames_tracker_hevc::reference_frames_tracker_hevc( struct pipe_video_
                                                              uint32_t MaxL0References,
                                                              uint32_t MaxL1References,
                                                              uint32_t MaxDPBCapacity,
-                                                              uint32_t MaxLongTermReferences )
+                                                              uint32_t MaxLongTermReferences,
+                                                              std::unique_ptr<dpb_buffer_manager> upTwoPassDPBManager)
   : m_codec( codec ),
     m_MaxL0References( MaxL0References ),
     m_MaxL1References( MaxL1References ),
@ -53,7 +53,8 @@ reference_frames_tracker_hevc::reference_frames_tracker_hevc( struct pipe_video_
        textureHeight,
        ConvertProfileToFormat( m_codec->profile ),
        m_codec->max_references + 1 /*curr pic*/ +
-           ( bLowLatency ? 0 : MFT_INPUT_QUEUE_DEPTH ) /*MFT process input queue depth for delayed in flight recon pic release*/ )
+           ( bLowLatency ? 0 : MFT_INPUT_QUEUE_DEPTH ) /*MFT process input queue depth for delayed in flight recon pic release*/ ),
+     m_upTwoPassDPBManager(std::move(upTwoPassDPBManager))
 {
   assert( m_MaxL0References == 1 );

@ -74,6 +75,12 @@ reference_frames_tracker_hevc::release_reconpic( reference_frames_tracker_dpb_as
      for( unsigned i = 0; i < pAsyncDPBToken->dpb_buffers_to_release.size(); i++ )
         m_DPBManager.release_dpb_buffer( pAsyncDPBToken->dpb_buffers_to_release[i] );

+      if (m_upTwoPassDPBManager)
+      {
+         for( unsigned i = 0; i < pAsyncDPBToken->dpb_downscaled_buffers_to_release.size(); i++ )
+            m_upTwoPassDPBManager->release_dpb_buffer( pAsyncDPBToken->dpb_downscaled_buffers_to_release[i] );
+      }
+
      delete pAsyncDPBToken;
   }
 }
@ -92,6 +99,7 @@ reference_frames_tracker_hevc::begin_frame( reference_frames_tracker_dpb_async_t
                                            uint32_t dirtyRectFrameNum )
 {
   struct pipe_video_buffer *curframe_dpb_buffer = m_DPBManager.get_fresh_dpb_buffer();
+   struct pipe_video_buffer *curframe_dpb_downscaled_buffer = m_upTwoPassDPBManager ? m_upTwoPassDPBManager->get_fresh_dpb_buffer() : NULL;

   if( markLTR )
   {
@ -113,7 +121,11 @@ reference_frames_tracker_hevc::begin_frame( reference_frames_tracker_dpb_async_t
   if( m_frame_state_descriptor.gop_info->frame_type == PIPE_H2645_ENC_PICTURE_TYPE_IDR )
   {
      for( auto &i : m_PrevFramesInfos )
+      {
         ( pAsyncDPBToken )->dpb_buffers_to_release.push_back( i.buffer );
+         if (m_upTwoPassDPBManager)
+            ( pAsyncDPBToken )->dpb_downscaled_buffers_to_release.push_back( i.downscaled_buffer );
+      }
      m_PrevFramesInfos.clear();
      m_ActiveLTRBitmap = 0;
   }
@ -146,6 +158,8 @@ reference_frames_tracker_hevc::begin_frame( reference_frames_tracker_dpb_async_t
            if( !is_ltr || ( is_ltr && !( useLTRBitmap & ( 1 << ltr_index ) ) ) )
            {
               ( pAsyncDPBToken )->dpb_buffers_to_release.push_back( itr->buffer );
+               if (m_upTwoPassDPBManager)
+                  ( pAsyncDPBToken )->dpb_downscaled_buffers_to_release.push_back( itr->downscaled_buffer );
               itr = m_PrevFramesInfos.erase( itr );
            }
            else
@ -178,6 +192,7 @@ reference_frames_tracker_hevc::begin_frame( reference_frames_tracker_dpb_async_t
         m_PrevFramesInfos[i].temporal_id,
         m_PrevFramesInfos[i].is_ltr,
         m_PrevFramesInfos[i].buffer,
+         m_PrevFramesInfos[i].downscaled_buffer,
      } );
      m_frame_state_descriptor.dirty_rect_frame_num.push_back( m_PrevFramesInfos[i].dirty_rect_frame_num );
   }
@ -192,6 +207,7 @@ reference_frames_tracker_hevc::begin_frame( reference_frames_tracker_dpb_async_t
         m_frame_state_descriptor.gop_info->temporal_id,
         isLTR,
         curframe_dpb_buffer,
+         curframe_dpb_downscaled_buffer
      } );
      m_frame_state_descriptor.dirty_rect_frame_num.push_back( dirtyRectFrameNum );

@ -208,6 +224,8 @@ reference_frames_tracker_hevc::begin_frame( reference_frames_tracker_dpb_async_t
            unreachable( "Unexpected zero STR" );
         }
         ( pAsyncDPBToken )->dpb_buffers_to_release.push_back( entryToRemove->buffer );
+         if (m_upTwoPassDPBManager)
+            ( pAsyncDPBToken )->dpb_downscaled_buffers_to_release.push_back( entryToRemove->downscaled_buffer );
         m_PrevFramesInfos.erase( entryToRemove );
      }

@ -230,6 +248,8 @@ reference_frames_tracker_hevc::begin_frame( reference_frames_tracker_dpb_async_t
               unreachable( "Unexpected LTR replacement in Bitmap but not in PrevFramesInfos" );
            }
            ( pAsyncDPBToken )->dpb_buffers_to_release.push_back( entryToRemove->buffer );
+            if (m_upTwoPassDPBManager)
+               ( pAsyncDPBToken )->dpb_downscaled_buffers_to_release.push_back( entryToRemove->downscaled_buffer );
            m_PrevFramesInfos.erase( entryToRemove );
         }
         MarkLTRIndex( m_frame_state_descriptor.gop_info->ltr_index );
@ -240,11 +260,14 @@ reference_frames_tracker_hevc::begin_frame( reference_frames_tracker_dpb_async_t
                                     m_frame_state_descriptor.gop_info->ltr_index,
                                     m_frame_state_descriptor.gop_info->temporal_id,
                                     dirtyRectFrameNum,
-                                     curframe_dpb_buffer } );
+                                     curframe_dpb_buffer,
+                                     curframe_dpb_downscaled_buffer } );
   }
   else
   {
      ( pAsyncDPBToken )->dpb_buffers_to_release.push_back( curframe_dpb_buffer );
+      if (m_upTwoPassDPBManager)
+         ( pAsyncDPBToken )->dpb_downscaled_buffers_to_release.push_back( curframe_dpb_downscaled_buffer );
   }
 }

--- a/src/gallium/frontends/mediafoundation/reference_frames_tracker_hevc.h
+++ b/src/gallium/frontends/mediafoundation/reference_frames_tracker_hevc.h
@ -27,6 +27,7 @@

 #include <deque>
 #include <queue>
+#include <memory>

 #include "dpb_buffer_manager.h"
 #include "reference_frames_tracker.h"
@ -67,6 +68,7 @@ class reference_frames_tracker_hevc : public reference_frames_tracker
      uint8_t temporal_id;
      uint32_t dirty_rect_frame_num;
      struct pipe_video_buffer *buffer;
+      struct pipe_video_buffer *downscaled_buffer;
   } PrevFrameInfo;

   // used to sort PrevFrameInfo array
@ -109,7 +111,8 @@ class reference_frames_tracker_hevc : public reference_frames_tracker
                                  uint32_t MaxL0References,
                                  uint32_t MaxL1References,
                                  uint32_t MaxDPBCapacity,
-                                  uint32_t MaxLongTermReferences );
+                                  uint32_t MaxLongTermReferences,
+                                  std::unique_ptr<dpb_buffer_manager> upTwoPassDPBManager = nullptr );

 private:
   uint32_t PrepareFrameRefLists();
@ -129,6 +132,7 @@ class reference_frames_tracker_hevc : public reference_frames_tracker
   std::deque<struct PrevFrameInfo> m_PrevFramesInfos;
   struct pipe_video_codec *m_codec;
   dpb_buffer_manager m_DPBManager;
+   std::unique_ptr<dpb_buffer_manager> m_upTwoPassDPBManager;

   bool m_pendingMarkLTR = false;
   uint32_t m_pendingMarkLTRIndex = 0;