mediafoundation: Use lower size estimations for compressed output bitstream sizes

Reviewed-by: Pohsiang (John) Hsu <pohhsu@microsoft.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37565>
2025-12-31 16:20:13 +01:00 · 2025-09-24 14:53:45 -04:00 · 2025-09-24 14:53:45 -04:00 · 0395dca6d6
commit 0395dca6d6
parent 30703e1d7d
3 changed files with 69 additions and 5 deletions
--- a/src/gallium/frontends/mediafoundation/encode.cpp
+++ b/src/gallium/frontends/mediafoundation/encode.cpp
@ -542,19 +542,19 @@ CDX12EncHMFT::PrepareForEncode( IMFSample *pSample, LPDX12EncodeContext *ppDX12E
         if( m_EncoderCapabilities.m_HWSupportSlicedFences.bits.multiple_buffers_required )
         {
            // Buffer byte size for sliced buffers + notifications with multiple individual buffers per slice
-            templ.width0 = ( 1024 /*1K*/ * 1024 /*1MB*/ ) * 8 /*8 MB*/;
+            templ.width0 = m_uiMaxOutputBitstreamSize;
         }
         else
         {
            // Buffer byte size for sliced buffers + notifications with a single buffer (suballocated by driver for each slice)
-            templ.width0 = ( 1024 /*1K*/ * 1024 /*1MB*/ ) * 8 /*8 MB*/;
+            templ.width0 = m_uiMaxOutputBitstreamSize;
         }
      }
      else
 #endif   // (USE_D3D12_PREVIEW_HEADERS && (D3D12_PREVIEW_SDK_VERSION >= 717))
      {
         // Buffer byte size for full frame bitstream (when num_output_buffers == 1)
-         templ.width0 = ( 1024 /*1K*/ * 1024 /*1MB*/ ) * 8 /*8 MB*/;
+         templ.width0 = m_uiMaxOutputBitstreamSize;
      }

      templ.target = PIPE_BUFFER;
--- a/src/gallium/frontends/mediafoundation/hmft_entrypoints.h
+++ b/src/gallium/frontends/mediafoundation/hmft_entrypoints.h
@ -749,6 +749,7 @@ class __declspec( uuid( HMFT_GUID ) ) CDX12EncHMFT : CMFD3DManager,
   BOOL m_bWorkGlobalPrioritySet = FALSE;
 #endif // ( USE_D3D12_PREVIEW_HEADERS && ( D3D12_PREVIEW_SDK_VERSION >= 717 ) )

+   UINT m_uiMaxOutputBitstreamSize = 0u;
   struct pipe_video_codec *m_pPipeVideoCodec = nullptr;
   struct pipe_video_codec *m_pPipeVideoBlitter = nullptr;
   reference_frames_tracker *m_pGOPTracker = nullptr;
--- a/src/gallium/frontends/mediafoundation/mftransform.cpp
+++ b/src/gallium/frontends/mediafoundation/mftransform.cpp
@ -809,6 +809,53 @@ done:
   return hr;
 }

+// Helper function to calculate the max output bitstream size based on width, height, and format
+// This function uses an approach based on common video formats and their typical compression ratios
+UINT CalculateMaxOutputBitstreamSize(
+   UINT uiWidth,
+   UINT uiHeight,
+   enum pipe_format format
+)
+{
+   assert((uiHeight > 16) &&
+         (uiWidth > 16) &&
+         (format != PIPE_FORMAT_NONE));
+
+   const UINT MIN_BUFFER_SIZE = 128 * 128 * 2; // Minimum buffer size for very small frames: 128x128 pixels at 2 bytes/pixel
+   const UINT MAX_BUFFER_SIZE = 20 * 1024 * 1024; // Maximum buffer size of 20MB
+   const float EXPECTED_COMPRESSION_FACTOR = 2.0f; // Assume 50% of calculated size after compression of raw pixel sizes
+
+   UINT alignedWidth = (uiWidth + 15) & ~15;
+   UINT alignedHeight = (uiHeight + 15) & ~15;
+   UINT bufferSize = 0;
+   switch (format) {
+      case PIPE_FORMAT_NV12:
+         // NV12: Y plane (1 byte/pixel) + UV plane (1/2 byte/pixel) = 1.5 bytes/pixel
+         bufferSize = alignedWidth * alignedHeight * 3 / 2;
+         break;
+      case PIPE_FORMAT_P010:
+         // P010: Y plane (2 bytes/pixel) + UV plane (1 byte/pixel) = 3 bytes/pixel
+         bufferSize = alignedWidth * alignedHeight * 3;
+         break;
+      case PIPE_FORMAT_AYUV:
+         // AYUV: 4 bytes/pixel
+         bufferSize = alignedWidth * alignedHeight * 4;
+         break;
+      default:
+         // Fallback formula for other formats: assume 15 bits/pixel (1.875 bytes/pixel)
+         bufferSize = (((alignedHeight) * (alignedWidth) * 15) >> 3);
+         break;
+   }
+
+   // Apply EXPECTED_COMPRESSION_FACTOR constant (% of calculated size)
+   bufferSize = static_cast<UINT>(std::ceil(bufferSize / EXPECTED_COMPRESSION_FACTOR));
+
+   // Clamp buffer size between minimum and maximum limits
+   bufferSize = std::max(MIN_BUFFER_SIZE, std::min(bufferSize, MAX_BUFFER_SIZE));
+
+   return bufferSize;
+}
+
 // internal function to initialize the encoder
 HRESULT
 CDX12EncHMFT::InitializeEncoder( pipe_video_profile videoProfile, UINT32 Width, UINT32 Height )
@ -899,6 +946,21 @@ CDX12EncHMFT::InitializeEncoder( pipe_video_profile videoProfile, UINT32 Width,
                      MF_E_UNEXPECTED,
                      done );

+      // Calculate and cache the expected output buffer max bitstream size for a single frame
+      m_uiMaxOutputBitstreamSize = CalculateMaxOutputBitstreamSize(
+         encoderSettings.width,
+         encoderSettings.height,
+         ConvertProfileToFormat(encoderSettings.profile));
+
+      debug_printf("[dx12 hmft 0x%p] Calculated max output bitstream size: %u bytes (%u Kb, %u Mb) for %ux%u pipe_format %u\n",
+                   this,
+                   m_uiMaxOutputBitstreamSize,
+                   m_uiMaxOutputBitstreamSize / 1024,
+                   m_uiMaxOutputBitstreamSize / (1024 * 1024),
+                   encoderSettings.width,
+                   encoderSettings.height,
+                   ConvertProfileToFormat(encoderSettings.profile));
+
      // Create DX12 fence and share it as handle for using it with DX11/create_fence_win32
      CHECKHR_GOTO( m_spDevice->CreateFence( 0, D3D12_FENCE_FLAG_SHARED, IID_PPV_ARGS( &m_spStagingFence12 ) ), done );
      CHECKHR_GOTO( m_spDevice->CreateSharedHandle( m_spStagingFence12.Get(), NULL, GENERIC_ALL, NULL, &m_hSharedFenceHandle ),
@ -1121,10 +1183,11 @@ CDX12EncHMFT::xThreadProc( void *pCtx )
               // Wait for each slice fence and resolve offset/size as each slice is ready
               //
               ComPtr<IMFMediaBuffer> spMemoryBuffer;
-               // TODO: Estimate size of entire frame (e.g all slices) instead of assuming 8MBs here...
+               // Create a single large buffer to hold all slices using the max output bitstream size for the whole frame
+               // it can be possible that slices have padding between them so more size may be needed.
               // TODO: or even better allow multiple buffers (one per slice) in the MFSample or multiple MFSamples (one per slice)
               // with tight allocations
-               MFCreateMemoryBuffer( ( 1024 /*1K*/ * 1024 /*1MB*/ ) * 8 /*8 MB*/, &spMemoryBuffer );
+               MFCreateMemoryBuffer( pThis->m_uiMaxOutputBitstreamSize, &spMemoryBuffer );
               uint64_t output_buffer_offset = 0u;
               LPBYTE lpBuffer;
               spMemoryBuffer->Lock( &lpBuffer, NULL, NULL );