mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 13:48:06 +02:00
mediafoundation: Use WaitForMultipleObjects for AUTO slices wait in sliced encode mode
Reviewed-by: Jesse Natalie <jenatali@microsoft.com> Reviewed-by: Pohsiang (John) Hsu <pohhsu@microsoft.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40764>
This commit is contained in:
parent
9246c66c61
commit
283f07f13e
1 changed files with 83 additions and 61 deletions
|
|
@ -1573,10 +1573,11 @@ CDX12EncHMFT::xThreadProc( void *pCtx )
|
|||
// and what pSliceFences[] after that point are unused and must not be waited on.
|
||||
//
|
||||
// When emitting the MFSamples asynchronously for each slice, we need to mark MFSample_LastSlice
|
||||
// on the last actual slice. In auto mode, we only know which one is the last actual slice after pLastSliceFence is
|
||||
// signaled and by that time, it is too late to mark MFSample_LastSlice on the last slice only (as it may have been
|
||||
// already emitted), so in PIPE_SLICE_AUTO_MODE, we gather all completed slices here and emit them together after
|
||||
// pLastSliceFence is signaled.
|
||||
// on the last actual slice. In auto mode, we use a pending-buffer approach: all slices except the
|
||||
// most recent are emitted eagerly with bIsLastSlice = FALSE. The most recently completed slice is
|
||||
// held in a pending slot. When pLastSliceFence signals (breaking the loop), the pending slice is
|
||||
// emitted with bIsLastSlice = TRUE. This avoids deferring all emissions to a second pass while
|
||||
// still correctly marking the last slice.
|
||||
//
|
||||
if( !pDX12EncodeContext->IsSliceAutoModeEnabled() )
|
||||
{
|
||||
|
|
@ -1620,20 +1621,31 @@ CDX12EncHMFT::xThreadProc( void *pCtx )
|
|||
}
|
||||
else
|
||||
{
|
||||
std::vector<ComPtr<IMFMediaBuffer>> spMediaBuffers;
|
||||
spMediaBuffers.reserve( num_slice_buffers );
|
||||
std::vector<ComPtr<IMFSample>> spOutputSamples;
|
||||
spOutputSamples.reserve( num_slice_buffers );
|
||||
std::vector<std::vector<struct codec_unit_location_t>> codec_unit_metadatas;
|
||||
codec_unit_metadatas.reserve( num_slice_buffers );
|
||||
// Pending-buffer approach: emit slices eagerly as each pSliceFences[slice_idx] completes,
|
||||
// but always hold back the most recently completed slice in a "pending" slot.
|
||||
// When a new slice completes, emit the pending one with bIsLastSlice = FALSE and replace it.
|
||||
// When pLastSliceFence fires (breaking the loop), emit the pending slice with bIsLastSlice = TRUE.
|
||||
// This gives immediate emission of all slices except the last, which is only delayed by the
|
||||
// nanosecond gap between the last pSliceFences signal and pLastSliceFence.
|
||||
ComPtr<IMFSample> pendingSample;
|
||||
ComPtr<IMFMediaBuffer> pendingBuffer;
|
||||
std::vector<struct codec_unit_location_t> pendingMetadata;
|
||||
pendingMetadata.reserve( 16 );
|
||||
uint32_t actual_slice_count = 0;
|
||||
|
||||
std::vector<struct codec_unit_location_t> cur_slice_codec_unit_metadata;
|
||||
cur_slice_codec_unit_metadata.reserve( 16 );
|
||||
struct HandleCloser {
|
||||
void operator()( void *h ) {
|
||||
if(h) CloseHandle(h);
|
||||
}
|
||||
};
|
||||
|
||||
std::unique_ptr<void, HandleCloser> lastSliceFenceEvent(
|
||||
pThis->m_pPipeContext->screen->fence_get_win32_event(
|
||||
pThis->m_pPipeContext->screen, pDX12EncodeContext->pLastSliceFence ) );
|
||||
assert(lastSliceFenceEvent);
|
||||
|
||||
for( uint32_t slice_idx = 0; slice_idx < num_slice_buffers; slice_idx++ )
|
||||
{
|
||||
cur_slice_codec_unit_metadata.clear();
|
||||
|
||||
// Wait for the current slice fence to complete, using pLastSliceFence as a short-circuit.
|
||||
// pLastSliceFence signals when ALL slices are complete, so once it's signaled, we can
|
||||
// stop polling individual slice fences and assume those are all done.
|
||||
|
|
@ -1641,43 +1653,47 @@ CDX12EncHMFT::xThreadProc( void *pCtx )
|
|||
// actually generated than the max allocated buffers (num_slice_buffers is max number of supported slices in
|
||||
// auto slice mode). pLastSliceFence acts as a "cancel token" to exit the wait loop when all actual slices are
|
||||
// ready to process.
|
||||
uint32_t slice_fence_wait_iteration = 0u;
|
||||
constexpr uint32_t slice_fence_wait_max_iterations =
|
||||
1000u; // 1 second max wait (1ms timeout per iteration below)
|
||||
while( slice_fence_wait_iteration++ < slice_fence_wait_max_iterations &&
|
||||
!WaitForFence( pDX12EncodeContext->pSliceFences[slice_idx], 1000000ULL /* 1ms timeout */ ) &&
|
||||
!WaitForFence( pDX12EncodeContext->pLastSliceFence, 0 /* No wait, just poll */ ) )
|
||||
{
|
||||
// Keep polling with 1ms timeout until either:
|
||||
// 1. slice_fence_wait_iteration reaches max iterations (timeout), OR
|
||||
// 2. The current slice fence signals, OR
|
||||
// 3. pLastSliceFence signals (all actual slices are ready to process - short-circuit)
|
||||
}
|
||||
//
|
||||
// Use WaitForMultipleObjects to block until either fence signals
|
||||
//
|
||||
|
||||
if( slice_fence_wait_iteration >= slice_fence_wait_max_iterations )
|
||||
{
|
||||
assert( false );
|
||||
MFE_ERROR( "[dx12 hmft 0x%p] Timeout waiting for slice %u fence OR pLastSliceFence to complete",
|
||||
pThis,
|
||||
slice_idx );
|
||||
break;
|
||||
}
|
||||
else if( WaitForFence( pDX12EncodeContext->pSliceFences[slice_idx], 0 /* No wait, just check */ ) )
|
||||
std::unique_ptr<void, HandleCloser> sliceFenceEvent(
|
||||
pThis->m_pPipeContext->screen->fence_get_win32_event(
|
||||
pThis->m_pPipeContext->screen, pDX12EncodeContext->pSliceFences[slice_idx] ) );
|
||||
assert(sliceFenceEvent);
|
||||
|
||||
HANDLE fenceEvents[2] = { sliceFenceEvent.get(), lastSliceFenceEvent.get() };
|
||||
DWORD waitResult = WaitForMultipleObjects( 2, fenceEvents, FALSE /* bWaitAll */, INFINITE );
|
||||
|
||||
if( waitResult == WAIT_OBJECT_0 + 0 /* slice fence signaled */ )
|
||||
{
|
||||
//
|
||||
// The current slice_idx fence is completed - process this slice
|
||||
//
|
||||
|
||||
ComPtr<IMFSample> spOutputSample;
|
||||
MFCreateSample( &spOutputSample );
|
||||
spOutputSamples.push_back( spOutputSample );
|
||||
ComPtr<IMFMediaBuffer> spMediaBuffer;
|
||||
// Emit the previous pending slice (not the last) before replacing it
|
||||
if( pendingSample )
|
||||
{
|
||||
pThis->FinalizeAndEmitOutputSample( pDX12EncodeContext,
|
||||
pendingBuffer,
|
||||
pendingSample,
|
||||
pendingMetadata.data(),
|
||||
static_cast<unsigned>( pendingMetadata.size() ),
|
||||
dwReceivedInput,
|
||||
FALSE /*bIsLastSlice*/,
|
||||
ResolveStatsCompletionFenceValue );
|
||||
}
|
||||
|
||||
// Current slice becomes pending
|
||||
pendingSample.Reset();
|
||||
pendingBuffer.Reset();
|
||||
pendingMetadata.clear();
|
||||
MFCreateSample( &pendingSample );
|
||||
|
||||
// Reset codec unit metadata for this slice as it will be wrapped on its own IMFSample
|
||||
if( !pThis->ProcessSliceBitstreamZeroCopy( pDX12EncodeContext,
|
||||
slice_idx,
|
||||
spMediaBuffer,
|
||||
cur_slice_codec_unit_metadata ) )
|
||||
pendingBuffer,
|
||||
pendingMetadata ) )
|
||||
{
|
||||
debug_printf( "[dx12 hmft 0x%p] Failed to process slice %u bitstream\n", pThis, slice_idx );
|
||||
MFE_ERROR( "[dx12 hmft 0x%p] Failed to process slice %u bitstream", pThis, slice_idx );
|
||||
|
|
@ -1688,10 +1704,9 @@ CDX12EncHMFT::xThreadProc( void *pCtx )
|
|||
break;
|
||||
}
|
||||
|
||||
spMediaBuffers.push_back( spMediaBuffer );
|
||||
codec_unit_metadatas.push_back( cur_slice_codec_unit_metadata );
|
||||
actual_slice_count++;
|
||||
}
|
||||
else if( WaitForFence( pDX12EncodeContext->pLastSliceFence, 0 /* No wait, just check */ ) )
|
||||
else if( waitResult == WAIT_OBJECT_0 + 1 /* last slice fence signaled */ )
|
||||
{
|
||||
//
|
||||
// If pLastSliceFence is completed but the slice_idx fence didn't, it means this pSliceFences slot
|
||||
|
|
@ -1701,32 +1716,39 @@ CDX12EncHMFT::xThreadProc( void *pCtx )
|
|||
}
|
||||
else
|
||||
{
|
||||
// Unexpected: neither fence signaled after exiting wait loop - this should not happen
|
||||
assert( false );
|
||||
MFE_ERROR( "[dx12 hmft 0x%p] Slice fence wait loop exited but neither fence signaled for slice %u",
|
||||
// Unexpected WaitForMultipleObjects result on waitResult
|
||||
DWORD lastError = GetLastError();
|
||||
debug_printf( "[dx12 hmft 0x%p] WaitForMultipleObjects failed for slice %" PRIu32 " (result=0x%" PRIx32 ", GetLastError=0x%" PRIx32 ")\n",
|
||||
pThis,
|
||||
slice_idx,
|
||||
static_cast<uint32_t>( waitResult ),
|
||||
static_cast<uint32_t>( lastError ) );
|
||||
MFE_ERROR( "[dx12 hmft 0x%p] WaitForMultipleObjects failed for slice %u (result=0x%x, GetLastError=0x%x)",
|
||||
pThis,
|
||||
slice_idx );
|
||||
slice_idx,
|
||||
static_cast<uint32_t>( waitResult ),
|
||||
static_cast<uint32_t>( lastError ) );
|
||||
assert( false );
|
||||
pThis->QueueEvent( MEError, GUID_NULL, E_FAIL, nullptr );
|
||||
bHasEncodingError = TRUE;
|
||||
delete pDX12EncodeContext;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// At the end of the loop, we always have in spOutputSamples.size() the actual number of slices processed
|
||||
// which may be less than num_slice_buffers in auto slice mode
|
||||
// so adjust accordingly num_slice_buffers here so we only emit the actual processed slices
|
||||
num_slice_buffers = static_cast<uint32_t>( spOutputSamples.size() );
|
||||
|
||||
for( uint32_t emission_idx = 0; emission_idx < num_slice_buffers; emission_idx++ )
|
||||
// Emit the final pending slice with bIsLastSlice = TRUE
|
||||
if( pendingSample )
|
||||
{
|
||||
pThis->FinalizeAndEmitOutputSample( pDX12EncodeContext,
|
||||
spMediaBuffers[emission_idx],
|
||||
spOutputSamples[emission_idx],
|
||||
codec_unit_metadatas[emission_idx].data(),
|
||||
static_cast<unsigned>( codec_unit_metadatas[emission_idx].size() ),
|
||||
pendingBuffer,
|
||||
pendingSample,
|
||||
pendingMetadata.data(),
|
||||
static_cast<unsigned>( pendingMetadata.size() ),
|
||||
dwReceivedInput,
|
||||
( emission_idx == ( num_slice_buffers - 1 ) ),
|
||||
TRUE /*bIsLastSlice*/,
|
||||
ResolveStatsCompletionFenceValue );
|
||||
HMFT_ETW_EVENT_STOP( "TimeToEmitMFSampleOutput", pThis );
|
||||
}
|
||||
HMFT_ETW_EVENT_STOP( "TimeToEmitMFSampleOutput", pThis );
|
||||
}
|
||||
|
||||
// Cleanup fences
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue