From 1959a352ea0d9eb54f1271bce87f8ffbca98122d Mon Sep 17 00:00:00 2001 From: Jesse Natalie Date: Wed, 2 Jul 2025 14:35:49 -0700 Subject: [PATCH] d3d12: Fix video fence lifetime issues pipe_fence_handle is a refcounted object, it can't be owned by a container which might have a different lifetime, it needs a dedicated heap allocation so it can outlive its container. Make sure that when we're handing out pipe_fence_handle references, that we add a ref to them before handing them out. Instead of assuming that a fence_wait call is for the exact fence that we returned from a given op, mirror what's done on graphics and opportunistically scan the batches to see what's done, and reclaim resources for them. Use d3d12_fence helpers to replace a lot of duplicated code. Part-of: --- src/gallium/drivers/d3d12/d3d12_fence.cpp | 50 +++--- src/gallium/drivers/d3d12/d3d12_fence.h | 21 ++- src/gallium/drivers/d3d12/d3d12_video_dec.cpp | 134 +++++----------- src/gallium/drivers/d3d12/d3d12_video_dec.h | 7 +- src/gallium/drivers/d3d12/d3d12_video_enc.cpp | 148 +++++++----------- src/gallium/drivers/d3d12/d3d12_video_enc.h | 7 +- .../drivers/d3d12/d3d12_video_proc.cpp | 9 +- src/gallium/drivers/d3d12/d3d12_video_proc.h | 2 +- 8 files changed, 153 insertions(+), 225 deletions(-) diff --git a/src/gallium/drivers/d3d12/d3d12_fence.cpp b/src/gallium/drivers/d3d12/d3d12_fence.cpp index f1e16d36899..1611114d554 100644 --- a/src/gallium/drivers/d3d12/d3d12_fence.cpp +++ b/src/gallium/drivers/d3d12/d3d12_fence.cpp @@ -40,27 +40,30 @@ destroy_fence(struct d3d12_fence *fence) FREE(fence); } -bool -d3d12_reset_fence(struct d3d12_fence *fence, ID3D12Fence *d3d12_fence_obj, uint64_t fence_value) +struct d3d12_fence * +d3d12_create_fence(struct d3d12_screen *screen, bool signal_new) { - d3d12_fence_close_event(fence->event, fence->event_fd); - fence->cmdqueue_fence = d3d12_fence_obj; - fence->value = fence_value; - fence->event = d3d12_fence_create_event(&fence->event_fd); - fence->signaled = false; + struct d3d12_fence *ret = + d3d12_create_fence_raw(screen->fence, screen->fence_value + (signal_new ? 1 : 0)); + if (!ret) { + debug_printf("CALLOC_STRUCT failed\n"); + return NULL; + } - if (FAILED(fence->cmdqueue_fence->SetEventOnCompletion(fence->value, fence->event))) - goto fail; - - return true; + if (signal_new) { + ++screen->fence_value; + if (FAILED(screen->cmdqueue->Signal(screen->fence, ret->value))) + goto fail; + } + return ret; fail: - d3d12_fence_close_event(fence->event, fence->event_fd); - return false; + destroy_fence(ret); + return NULL; } struct d3d12_fence * -d3d12_create_fence(struct d3d12_screen *screen, bool signal_new) +d3d12_create_fence_raw(ID3D12Fence *fence, uint64_t value) { struct d3d12_fence *ret = CALLOC_STRUCT(d3d12_fence); if (!ret) { @@ -69,13 +72,12 @@ d3d12_create_fence(struct d3d12_screen *screen, bool signal_new) } ret->type = PIPE_FD_TYPE_NATIVE_SYNC; - uint64_t value = screen->fence_value; - if (signal_new) { - value = ++screen->fence_value; - if (FAILED(screen->cmdqueue->Signal(screen->fence, value))) - goto fail; - } - if(!d3d12_reset_fence(ret, screen->fence, value)) + ret->cmdqueue_fence = fence; + ret->value = value; + ret->event = d3d12_fence_create_event(&ret->event_fd); + ret->signaled = false; + + if (FAILED(fence->SetEventOnCompletion(value, ret->event))) goto fail; pipe_reference_init(&ret->reference, 1); @@ -124,6 +126,12 @@ d3d12_fence_reference(struct d3d12_fence **ptr, struct d3d12_fence *fence) *ptr = fence; } +void +d3d12_video_destroy_fence(struct pipe_video_codec *codec, struct pipe_fence_handle *fence) +{ + d3d12_fence_reference((struct d3d12_fence **)&fence, nullptr); +} + static void fence_reference(struct pipe_screen *pscreen, struct pipe_fence_handle **pptr, diff --git a/src/gallium/drivers/d3d12/d3d12_fence.h b/src/gallium/drivers/d3d12/d3d12_fence.h index 9b6303e7cf6..bc5f5900cab 100644 --- a/src/gallium/drivers/d3d12/d3d12_fence.h +++ b/src/gallium/drivers/d3d12/d3d12_fence.h @@ -101,22 +101,37 @@ d3d12_fence(struct pipe_fence_handle *pfence) return (struct d3d12_fence *)pfence; } -bool -d3d12_reset_fence(struct d3d12_fence *fence, ID3D12Fence *d3d12_fence_obj, uint64_t fence_value); - struct d3d12_fence * d3d12_create_fence(struct d3d12_screen *screen, bool signal_new); +struct d3d12_fence * +d3d12_create_fence_raw(ID3D12Fence *d3d12_fence_obj, uint64_t fence_value); + struct d3d12_fence * d3d12_open_fence(struct d3d12_screen *screen, HANDLE handle, const void *name, pipe_fd_type type); void d3d12_fence_reference(struct d3d12_fence **ptr, struct d3d12_fence *fence); +void +d3d12_video_destroy_fence(struct pipe_video_codec *codec, struct pipe_fence_handle *fence); + bool d3d12_fence_finish(struct d3d12_fence *fence, uint64_t timeout_ns); void d3d12_screen_fence_init(struct pipe_screen *pscreen); +#if defined(__cplusplus) +#include +struct d3d12_fence_deleter +{ + void operator()(struct d3d12_fence *f) + { + d3d12_fence_reference(&f, nullptr); + } +}; +using d3d12_unique_fence = std::unique_ptr; +#endif + #endif diff --git a/src/gallium/drivers/d3d12/d3d12_video_dec.cpp b/src/gallium/drivers/d3d12/d3d12_video_dec.cpp index d4c1a379597..784f3e5db5e 100644 --- a/src/gallium/drivers/d3d12/d3d12_video_dec.cpp +++ b/src/gallium/drivers/d3d12/d3d12_video_dec.cpp @@ -65,7 +65,7 @@ d3d12_video_create_decoder(struct pipe_context *context, const struct pipe_video // Not using new doesn't call ctor and the initializations in the class declaration are lost struct d3d12_video_decoder *pD3D12Dec = new d3d12_video_decoder; - pD3D12Dec->m_inflightResourcesPool.resize(D3D12_VIDEO_DEC_ASYNC_DEPTH, { 0 }); + pD3D12Dec->m_inflightResourcesPool.resize(D3D12_VIDEO_DEC_ASYNC_DEPTH); pD3D12Dec->base = *codec; pD3D12Dec->m_screen = context->screen; @@ -81,6 +81,7 @@ d3d12_video_create_decoder(struct pipe_context *context, const struct pipe_video pD3D12Dec->base.end_frame = d3d12_video_decoder_end_frame; pD3D12Dec->base.flush = d3d12_video_decoder_flush; pD3D12Dec->base.fence_wait = d3d12_video_decoder_fence_wait; + pD3D12Dec->base.destroy_fence = d3d12_video_destroy_fence; pD3D12Dec->m_decodeFormat = d3d12_convert_pipe_video_profile_to_dxgi_format(codec->profile); pD3D12Dec->m_d3d12DecProfileType = d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(codec->profile); @@ -157,8 +158,7 @@ d3d12_video_decoder_destroy(struct pipe_video_codec *codec) // Flush and wait for completion of any in-flight GPU work before destroying objects d3d12_video_decoder_flush(codec); if (pD3D12Dec->m_fenceValue > 1 /* Check we submitted at least one frame */) { - auto decode_queue_completion_fence = pD3D12Dec->m_inflightResourcesPool[(pD3D12Dec->m_fenceValue - 1u) % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_FenceData; - d3d12_video_decoder_sync_completion(codec, decode_queue_completion_fence.cmdqueue_fence, decode_queue_completion_fence.value, OS_TIMEOUT_INFINITE); + d3d12_video_decoder_sync_completion(codec, (pD3D12Dec->m_fenceValue - 1u) % D3D12_VIDEO_DEC_ASYNC_DEPTH, OS_TIMEOUT_INFINITE); struct pipe_fence_handle *context_queue_completion_fence = NULL; pD3D12Dec->base.context->flush(pD3D12Dec->base.context, &context_queue_completion_fence, PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH); pD3D12Dec->m_pD3D12Screen->base.fence_finish(&pD3D12Dec->m_pD3D12Screen->base, NULL, context_queue_completion_fence, OS_TIMEOUT_INFINITE); @@ -201,17 +201,13 @@ d3d12_video_decoder_begin_frame(struct pipe_video_codec *codec, /// /// Wait here to make sure the next in flight resource set is empty before using it /// - uint64_t fenceValueToWaitOn = static_cast( - std::max(static_cast(0l), - static_cast(pD3D12Dec->m_fenceValue) - static_cast(D3D12_VIDEO_DEC_ASYNC_DEPTH))); - - debug_printf("[d3d12_video_decoder] d3d12_video_decoder_begin_frame Waiting for completion of in flight resource " - "sets with previous work with fenceValue: %" PRIu64 "\n", - fenceValueToWaitOn); - - ASSERTED bool wait_res = - d3d12_video_decoder_sync_completion(codec, pD3D12Dec->m_spFence.Get(), fenceValueToWaitOn, OS_TIMEOUT_INFINITE); - assert(wait_res); + if (pD3D12Dec->m_fenceValue >= D3D12_VIDEO_DEC_ASYNC_DEPTH) { + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_begin_frame Waiting for completion of in flight resource " + "sets with previous work\n"); + ASSERTED bool wait_res = + d3d12_video_decoder_sync_completion(codec, pD3D12Dec->m_fenceValue % D3D12_VIDEO_DEC_ASYNC_DEPTH, OS_TIMEOUT_INFINITE); + assert(wait_res); + } HRESULT hr = pD3D12Dec->m_spDecodeCommandList->Reset( pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_spCommandAllocator.Get()); @@ -679,7 +675,8 @@ d3d12_video_decoder_end_frame(struct pipe_video_codec *codec, if (pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()) { // No need to copy, the output surface fence is merely the decode queue fence - *picture->fence = (pipe_fence_handle *) &pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_FenceData; + if (picture->fence) + d3d12_fence_reference((struct d3d12_fence **)picture->fence, pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_fence.get()); } else { /// /// If !pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation() @@ -698,9 +695,8 @@ d3d12_video_decoder_end_frame(struct pipe_video_codec *codec, assert(pPipeSrc); // GPU wait on the graphics context which will do the copy until the decode finishes - pD3D12Screen->cmdqueue->Wait( - pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_FenceData.cmdqueue_fence, - pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_FenceData.value); + pD3D12Dec->base.context->fence_server_sync(pD3D12Dec->base.context, + (struct pipe_fence_handle *)pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_fence.get()); // Copy all format subresources/texture planes for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) { @@ -740,8 +736,12 @@ d3d12_video_decoder_fence_wait(struct pipe_video_codec *codec, struct pipe_fence struct d3d12_fence *fenceValueToWaitOn = (struct d3d12_fence *) fence; assert(fenceValueToWaitOn); - ASSERTED bool wait_res = - d3d12_video_decoder_sync_completion(codec, fenceValueToWaitOn->cmdqueue_fence, fenceValueToWaitOn->value, timeout); + bool wait_res = d3d12_fence_finish(fenceValueToWaitOn, timeout); + if (wait_res) { + // Opportunistically reset batches + for (uint32_t i = 0; i < D3D12_VIDEO_DEC_ASYNC_DEPTH; ++i) + (void)d3d12_video_decoder_sync_completion(codec, i, 0); + } // Return semantics based on p_video_codec interface // ret == 0 -> Decode in progress @@ -806,10 +806,8 @@ d3d12_video_decoder_flush(struct pipe_video_codec *codec) } // Set async fence info - ASSERTED bool success = d3d12_reset_fence(&inFlightResources.m_FenceData, pD3D12Dec->m_spFence.Get(), pD3D12Dec->m_fenceValue); - assert(success); + inFlightResources.m_fence.reset(d3d12_create_fence_raw(pD3D12Dec->m_spFence.Get(), pD3D12Dec->m_fenceValue++)); - pD3D12Dec->m_fenceValue++; pD3D12Dec->m_needsGPUFlush = false; } return; @@ -843,6 +841,7 @@ d3d12_video_decoder_create_command_objects(const struct d3d12_screen *pD3D12Scre return false; } + uint64_t CompletionFenceValue = pD3D12Dec->m_fenceValue; for (auto &inputResource : pD3D12Dec->m_inflightResourcesPool) { hr = pD3D12Dec->m_pD3D12Screen->dev->CreateCommandAllocator( D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE, @@ -853,6 +852,9 @@ d3d12_video_decoder_create_command_objects(const struct d3d12_screen *pD3D12Scre hr); return false; } + + // Initialize fence for the in flight resource pool slot + inputResource.m_fence.reset(d3d12_create_fence_raw(pD3D12Dec->m_spFence.Get(), CompletionFenceValue++)); } ComPtr spD3D12Device4; @@ -1620,59 +1622,9 @@ d3d12_video_decoder_resolve_profile(d3d12_video_decode_profile_type profileType, #endif // D3D12_VIDEO_ANY_DECODER_ENABLED } -bool -d3d12_video_decoder_ensure_fence_finished(struct pipe_video_codec *codec, - ID3D12Fence *fence, - uint64_t fenceValueToWaitOn, - uint64_t timeout_ns) -{ - bool wait_result = true; - HRESULT hr = S_OK; - uint64_t completedValue = fence->GetCompletedValue(); - - debug_printf( - "[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - Waiting for fence (with timeout_ns %" PRIu64 - ") to finish with " - "fenceValue: %" PRIu64 " - Current Fence Completed Value %" PRIu64 "\n", - timeout_ns, - fenceValueToWaitOn, - completedValue); - - if (completedValue < fenceValueToWaitOn) { - - HANDLE event = {}; - int event_fd = 0; - event = d3d12_fence_create_event(&event_fd); - - hr = fence->SetEventOnCompletion(fenceValueToWaitOn, event); - if (FAILED(hr)) { - debug_printf("[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - SetEventOnCompletion for " - "fenceValue %" PRIu64 " failed with HR %x\n", - fenceValueToWaitOn, - hr); - return false; - } - - wait_result = d3d12_fence_wait_event(event, event_fd, timeout_ns); - d3d12_fence_close_event(event, event_fd); - - debug_printf("[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - Waiting on fence to be done with " - "fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n", - fenceValueToWaitOn, - completedValue); - } else { - debug_printf("[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - Fence already done with " - "fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n", - fenceValueToWaitOn, - completedValue); - } - return wait_result; -} - bool d3d12_video_decoder_sync_completion(struct pipe_video_codec *codec, - ID3D12Fence *fence, - uint64_t fenceValueToWaitOn, + uint32_t frame_index, uint64_t timeout_ns) { struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec; @@ -1681,18 +1633,16 @@ d3d12_video_decoder_sync_completion(struct pipe_video_codec *codec, assert(pD3D12Dec->m_spDecodeCommandQueue); HRESULT hr = S_OK; - ASSERTED bool wait_result = d3d12_video_decoder_ensure_fence_finished(codec, fence, fenceValueToWaitOn, timeout_ns); - assert(wait_result); + auto &pool_entry = pD3D12Dec->m_inflightResourcesPool[frame_index]; + if (!d3d12_fence_finish(pool_entry.m_fence.get(), timeout_ns)) + return false; // Release references granted on end_frame for this inflight operations - pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_spDecoder.Reset(); - pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_spDecoderHeap.Reset(); - pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_References.reset(); - pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_stagingDecodeBitstream.resize( - 0); - pipe_resource_reference( - &pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].pPipeCompressedBufferObj, - NULL); + pool_entry.m_spDecoder.Reset(); + pool_entry.m_spDecoderHeap.Reset(); + pool_entry.m_References.reset(); + pool_entry.m_stagingDecodeBitstream.clear(); + pipe_resource_reference(&pool_entry.pPipeCompressedBufferObj, NULL); struct d3d12_screen *pD3D12Screen = (struct d3d12_screen *) pD3D12Dec->m_pD3D12Screen; assert(pD3D12Screen); @@ -1703,8 +1653,7 @@ d3d12_video_decoder_sync_completion(struct pipe_video_codec *codec, .m_pBitstreamUploadGPUCompletionFence, NULL); - hr = - pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_spCommandAllocator->Reset(); + hr = pool_entry.m_spCommandAllocator->Reset(); if (FAILED(hr)) { debug_printf("failed with %x.\n", hr); goto sync_with_token_fail; @@ -1721,15 +1670,14 @@ d3d12_video_decoder_sync_completion(struct pipe_video_codec *codec, } debug_printf( - "[d3d12_video_decoder] d3d12_video_decoder_sync_completion - GPU execution finalized for fenceValue: %" PRIu64 - "\n", - fenceValueToWaitOn); + "[d3d12_video_decoder] d3d12_video_decoder_sync_completion - GPU execution finalized for frame index: %u\n", + frame_index); - return wait_result; + return true; sync_with_token_fail: - debug_printf("[d3d12_video_decoder] d3d12_video_decoder_sync_completion failed for fenceValue: %" PRIu64 "\n", - fenceValueToWaitOn); + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_sync_completion failed for frame index: %u\n", + frame_index); assert(false); return false; -} \ No newline at end of file +} diff --git a/src/gallium/drivers/d3d12/d3d12_video_dec.h b/src/gallium/drivers/d3d12/d3d12_video_dec.h index f574b1ac1dd..9c3264cdf94 100644 --- a/src/gallium/drivers/d3d12/d3d12_video_dec.h +++ b/src/gallium/drivers/d3d12/d3d12_video_dec.h @@ -148,7 +148,7 @@ struct d3d12_video_decoder { struct pipe_fence_handle *m_pBitstreamUploadGPUCompletionFence; - struct d3d12_fence m_FenceData; + d3d12_unique_fence m_fence; // In case of reconfigurations that trigger creation of new // decoder or decoderheap or reference frames allocations @@ -276,10 +276,7 @@ void d3d12_video_decoder_prepare_dxva_slices_control(struct d3d12_video_decoder *pD3D12Dec, struct pipe_picture_desc *picture); bool -d3d12_video_decoder_ensure_fence_finished(struct pipe_video_codec *codec, ID3D12Fence* fence, uint64_t fenceValueToWaitOn, uint64_t timeout_ns); - -bool -d3d12_video_decoder_sync_completion(struct pipe_video_codec *codec, ID3D12Fence* fence, uint64_t fenceValueToWaitOn, uint64_t timeout_ns); +d3d12_video_decoder_sync_completion(struct pipe_video_codec *codec, uint32_t frame_index, uint64_t timeout_ns); /// /// d3d12_video_decoder functions ends diff --git a/src/gallium/drivers/d3d12/d3d12_video_enc.cpp b/src/gallium/drivers/d3d12/d3d12_video_enc.cpp index 9c396cf7db7..cc087132f32 100644 --- a/src/gallium/drivers/d3d12/d3d12_video_enc.cpp +++ b/src/gallium/drivers/d3d12/d3d12_video_enc.cpp @@ -179,59 +179,9 @@ flush_fail: assert(false); } -bool -d3d12_video_encoder_ensure_fence_finished(struct pipe_video_codec *codec, ID3D12Fence *fence, uint64_t fenceValueToWaitOn, uint64_t timeout_ns) -{ - bool wait_result = true; - struct d3d12_video_encoder *pD3D12Enc = (struct d3d12_video_encoder *) codec; - HRESULT hr = S_OK; - uint64_t completedValue = fence->GetCompletedValue(); - - debug_printf("[d3d12_video_encoder] d3d12_video_encoder_ensure_fence_finished - Waiting for fence (with timeout_ns %" PRIu64 ") to finish with " - "fenceValue: %" PRIu64 " - Current Fence Completed Value %" PRIu64 "\n", - timeout_ns, fenceValueToWaitOn, completedValue); - - if(completedValue < fenceValueToWaitOn) { - - HANDLE event = { }; - int event_fd = 0; - event = d3d12_fence_create_event(&event_fd); - - hr = fence->SetEventOnCompletion(fenceValueToWaitOn, event); - if (FAILED(hr)) { - debug_printf( - "[d3d12_video_encoder] d3d12_video_encoder_ensure_fence_finished - SetEventOnCompletion for fenceValue %" PRIu64 " failed with HR %x\n", - fenceValueToWaitOn, hr); - goto ensure_fence_finished_fail; - } - - debug_printf("[d3d12_video_encoder] d3d12_video_encoder_ensure_fence_finished - Waiting on fence to be done with " - "fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n", - fenceValueToWaitOn, - completedValue); - - wait_result = d3d12_fence_wait_event(event, event_fd, timeout_ns); - d3d12_fence_close_event(event, event_fd); - } else { - debug_printf("[d3d12_video_encoder] d3d12_video_encoder_ensure_fence_finished - Fence already done with " - "fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n", - fenceValueToWaitOn, - completedValue); - } - return wait_result; - -ensure_fence_finished_fail: - debug_printf("[d3d12_video_encoder] d3d12_video_encoder_ensure_fence_finished failed for fenceValue: %" PRIu64 "\n", fenceValueToWaitOn); - pD3D12Enc->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_ENC_ASYNC_DEPTH].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED; - pD3D12Enc->m_spEncodedFrameMetadata[fenceValueToWaitOn % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED; - assert(false); - return false; -} - bool d3d12_video_encoder_sync_completion(struct pipe_video_codec *codec, - ID3D12Fence *fence, - uint64_t fenceValueToWaitOn, + size_t pool_index, uint64_t timeout_ns) { struct d3d12_video_encoder *pD3D12Enc = (struct d3d12_video_encoder *) codec; @@ -240,22 +190,23 @@ d3d12_video_encoder_sync_completion(struct pipe_video_codec *codec, assert(pD3D12Enc->m_spEncodeCommandQueue); HRESULT hr = S_OK; - bool wait_result = d3d12_video_encoder_ensure_fence_finished(codec, fence, fenceValueToWaitOn, timeout_ns); - assert(wait_result); + auto &pool_entry = pD3D12Enc->m_inflightResourcesPool[pool_index]; + if (!d3d12_fence_finish(pool_entry.m_CompletionFence.get(), timeout_ns)) + return false; debug_printf("[d3d12_video_encoder] d3d12_video_encoder_sync_completion - resetting ID3D12CommandAllocator %p...\n", - pD3D12Enc->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_ENC_ASYNC_DEPTH].m_spCommandAllocator.Get()); - hr = pD3D12Enc->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_ENC_ASYNC_DEPTH].m_spCommandAllocator->Reset(); + pool_entry.m_spCommandAllocator.Get()); + hr = pool_entry.m_spCommandAllocator->Reset(); if(FAILED(hr)) { debug_printf("failed with %x.\n", hr); goto sync_with_token_fail; } // Release references granted on end_frame for this inflight operations - pD3D12Enc->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_ENC_ASYNC_DEPTH].m_spEncoder.Reset(); - pD3D12Enc->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_ENC_ASYNC_DEPTH].m_spEncoderHeap.Reset(); - pD3D12Enc->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_ENC_ASYNC_DEPTH].m_References.reset(); - pD3D12Enc->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_ENC_ASYNC_DEPTH].m_InputSurfaceFence = NULL; + pool_entry.m_spEncoder.Reset(); + pool_entry.m_spEncoderHeap.Reset(); + pool_entry.m_References.reset(); + pool_entry.m_InputSurfaceFence = NULL; // Validate device was not removed hr = pD3D12Enc->m_pD3D12Screen->dev->GetDeviceRemovedReason(); @@ -268,15 +219,14 @@ d3d12_video_encoder_sync_completion(struct pipe_video_codec *codec, } debug_printf( - "[d3d12_video_encoder] d3d12_video_encoder_sync_completion - GPU execution finalized for fenceValue: %" PRIu64 "\n", - fenceValueToWaitOn); + "[d3d12_video_encoder] d3d12_video_encoder_sync_completion - GPU execution finalized for pool index: %" PRIu64 "\n", + (uint64_t)pool_index); - return wait_result; + return true; sync_with_token_fail: - debug_printf("[d3d12_video_encoder] d3d12_video_encoder_sync_completion failed for fenceValue: %" PRIu64 "\n", fenceValueToWaitOn); - pD3D12Enc->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_ENC_ASYNC_DEPTH].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED; - pD3D12Enc->m_spEncodedFrameMetadata[fenceValueToWaitOn % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED; + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_sync_completion failed for pool index: %" PRIu64 "\n", (uint64_t)pool_index); + pool_entry.encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED; assert(false); return false; } @@ -298,9 +248,9 @@ d3d12_video_encoder_destroy(struct pipe_video_codec *codec) // Flush pending work before destroying if(pD3D12Enc->m_bPendingWorkNotFlushed){ - uint64_t curBatchFence = pD3D12Enc->m_fenceValue; + size_t pool_index = d3d12_video_encoder_pool_current_index(pD3D12Enc); d3d12_video_encoder_flush(codec); - d3d12_video_encoder_sync_completion(codec, pD3D12Enc->m_spFence.Get(), curBatchFence, OS_TIMEOUT_INFINITE); + d3d12_video_encoder_sync_completion(codec, pool_index, OS_TIMEOUT_INFINITE); } if (pD3D12Enc->m_SliceHeaderRepackBuffer) @@ -2387,6 +2337,7 @@ d3d12_video_encoder_create_command_objects(struct d3d12_video_encoder *pD3D12Enc return false; } + uint64_t CompletionFenceValue = pD3D12Enc->m_fenceValue; for (auto& inputResource : pD3D12Enc->m_inflightResourcesPool) { // Create associated command allocator for Encode, Resolve operations @@ -2399,6 +2350,9 @@ d3d12_video_encoder_create_command_objects(struct d3d12_video_encoder *pD3D12Enc hr); return false; } + + // Initialize fence for the in flight resource pool slot + inputResource.m_CompletionFence.reset(d3d12_create_fence_raw(pD3D12Enc->m_spFence.Get(), CompletionFenceValue++)); } ComPtr spD3D12Device4; @@ -2434,8 +2388,8 @@ d3d12_video_encoder_create_encoder(struct pipe_context *context, const struct pi // Not using new doesn't call ctor and the initializations in the class declaration are lost struct d3d12_video_encoder *pD3D12Enc = new d3d12_video_encoder; - pD3D12Enc->m_spEncodedFrameMetadata.resize(D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT, {nullptr, 0, 0}); - pD3D12Enc->m_inflightResourcesPool.resize(D3D12_VIDEO_ENC_ASYNC_DEPTH, { 0 }); + pD3D12Enc->m_spEncodedFrameMetadata.resize(D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT); + pD3D12Enc->m_inflightResourcesPool.resize(D3D12_VIDEO_ENC_ASYNC_DEPTH); pD3D12Enc->base = *codec; pD3D12Enc->m_screen = context->screen; @@ -2453,6 +2407,7 @@ d3d12_video_encoder_create_encoder(struct pipe_context *context, const struct pi pD3D12Enc->base.get_feedback = d3d12_video_encoder_get_feedback; pD3D12Enc->base.create_dpb_buffer = d3d12_video_create_dpb_buffer; pD3D12Enc->base.fence_wait = d3d12_video_encoder_fence_wait; + pD3D12Enc->base.destroy_fence = d3d12_video_destroy_fence; pD3D12Enc->base.encode_bitstream_sliced = d3d12_video_encoder_encode_bitstream_sliced; pD3D12Enc->base.get_slice_bitstream_data = d3d12_video_encoder_get_slice_bitstream_data; @@ -2750,12 +2705,12 @@ d3d12_video_encoder_begin_frame(struct pipe_video_codec * codec, /// /// Wait here to make sure the next in flight resource set is empty before using it /// - uint64_t fenceValueToWaitOn = static_cast(std::max(static_cast(0l), static_cast(pD3D12Enc->m_fenceValue) - static_cast(D3D12_VIDEO_ENC_ASYNC_DEPTH) )); - - debug_printf("[d3d12_video_encoder] d3d12_video_encoder_begin_frame Waiting for completion of in flight resource sets with previous work with fenceValue: %" PRIu64 "\n", - fenceValueToWaitOn); - - d3d12_video_encoder_ensure_fence_finished(codec, pD3D12Enc->m_spFence.Get(), fenceValueToWaitOn, OS_TIMEOUT_INFINITE); + if (pD3D12Enc->m_fenceValue >= D3D12_VIDEO_ENC_ASYNC_DEPTH) { + debug_printf("[d3d12_video_encoder] d3d12_video_encoder_begin_frame Waiting for completion of in flight resource sets with previous work for pool index:" + "%" PRIu64 "\n", + (uint64_t)d3d12_video_encoder_pool_current_index(pD3D12Enc)); + d3d12_fence_finish(pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].m_CompletionFence.get(), OS_TIMEOUT_INFINITE); + } if (!d3d12_video_encoder_reconfigure_session(pD3D12Enc, target, picture)) { debug_printf("[d3d12_video_encoder] d3d12_video_encoder_begin_frame - Failure on " @@ -2771,7 +2726,7 @@ d3d12_video_encoder_begin_frame(struct pipe_video_codec * codec, goto fail; } - pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].m_InputSurfaceFence = (struct d3d12_fence*) *picture->fence; + pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].m_InputSurfaceFence = picture->fence ? d3d12_fence(*picture->fence) : nullptr; pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_OK; pD3D12Enc->m_spEncodedFrameMetadata[d3d12_video_encoder_metadata_current_index(pD3D12Enc)].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_OK; @@ -2896,9 +2851,7 @@ d3d12_video_encoder_get_slice_bitstream_data(struct pipe_video_codec *codec, return; } - bool wait_res = d3d12_video_encoder_ensure_fence_finished(codec, - pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[slice_idx].cmdqueue_fence, - pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].ppSubregionFenceValues[slice_idx], OS_TIMEOUT_INFINITE); + bool wait_res = d3d12_fence_finish(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[slice_idx].get(), OS_TIMEOUT_INFINITE); if (!wait_res) { debug_printf("Error: d3d12_video_encoder_get_slice_bitstream_data for Encode GPU command for fence %" PRIu64 " failed on d3d12_video_encoder_ensure_fence_finished\n", requested_metadata_fence); @@ -3256,10 +3209,12 @@ d3d12_video_encoder_encode_bitstream_impl(struct pipe_video_codec *codec, } } - ASSERTED bool success = d3d12_reset_fence(&pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_FenceData, pD3D12Enc->m_spFence.Get(), pD3D12Enc->m_fenceValue); - assert(success); + pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)]. + m_CompletionFence.reset(d3d12_create_fence_raw(pD3D12Enc->m_spFence.Get(), pD3D12Enc->m_fenceValue)); - *feedback = (void*) &pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_FenceData; + pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_fence.reset(d3d12_create_fence_raw(pD3D12Enc->m_spFence.Get(), pD3D12Enc->m_fenceValue)); + + *feedback = (void*)pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_fence.get(); std::vector rgCurrentFrameStateTransitions = { CD3DX12_RESOURCE_BARRIER::Transition(pInputVideoD3D12Res, @@ -3837,7 +3792,7 @@ d3d12_video_encoder_encode_bitstream_impl(struct pipe_video_codec *codec, pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pspSubregionSizes.resize(num_slice_objects, {}); pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pspSubregionOffsets.resize(num_slice_objects, {}); pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pspSubregionFences.resize(num_slice_objects, NULL); - pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences.resize(num_slice_objects, {}); + pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences.resize(num_slice_objects); pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].ppSubregionFenceValues.resize(num_slice_objects, pD3D12Enc->m_fenceValue); pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].ppSubregionSizes.resize(num_slice_objects, NULL); @@ -3896,14 +3851,11 @@ d3d12_video_encoder_encode_bitstream_impl(struct pipe_video_codec *codec, 0, sizeof(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[i])); - pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[i].value = pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].ppSubregionFenceValues[i]; - pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[i].cmdqueue_fence = pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pspSubregionFences[i].Get(); - int event_fd = 0; - pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[i].event = d3d12_fence_create_event(&event_fd); - pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[i].cmdqueue_fence->SetEventOnCompletion(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[i].value, - pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[i].event); + pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[i].reset( + d3d12_create_fence_raw(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pspSubregionFences[i].Get(), + pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].ppSubregionFenceValues[i])); - slice_fences[i] = (pipe_fence_handle*) &pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[i]; + d3d12_fence_reference((struct d3d12_fence **)&slice_fences[i], pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[i].get()); } pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionBitstreamsBaseOffsets.resize(num_slice_objects, 0u); @@ -4259,8 +4211,9 @@ d3d12_video_encoder_get_feedback(struct pipe_video_codec *codec, *pMetadata = opt_metadata; return; } - - bool wait_res = d3d12_video_encoder_sync_completion(codec, feedback_fence->cmdqueue_fence, requested_metadata_fence, OS_TIMEOUT_INFINITE); + + + bool wait_res = d3d12_fence_finish(feedback_fence, OS_TIMEOUT_INFINITE); if (!wait_res) { opt_metadata.encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED; debug_printf("Error: d3d12_video_encoder_get_feedback for Encode GPU command for fence %" PRIu64 " failed on d3d12_video_encoder_sync_completion\n", @@ -4740,7 +4693,8 @@ d3d12_video_encoder_end_frame(struct pipe_video_codec * codec, pD3D12Enc->m_bPendingWorkNotFlushed = true; size_t current_metadata_slot = d3d12_video_encoder_metadata_current_index(pD3D12Enc); - *picture->fence = (pipe_fence_handle *) &pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_FenceData; + if (picture->fence) + d3d12_fence_reference((struct d3d12_fence **)picture->fence, pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_fence.get()); return 0; } @@ -4883,8 +4837,12 @@ d3d12_video_encoder_fence_wait(struct pipe_video_codec *codec, struct d3d12_fence *fence = (struct d3d12_fence *) _fence; assert(fence); - bool wait_res = - d3d12_video_encoder_sync_completion(codec, fence->cmdqueue_fence, fence->value, timeout); + bool wait_res = d3d12_fence_finish(fence, timeout); + if (wait_res) { + // Opportunistically reset batches + for (uint32_t i = 0; i < D3D12_VIDEO_ENC_ASYNC_DEPTH; ++i) + d3d12_video_encoder_sync_completion(codec, i, 0); + } // Return semantics based on p_video_codec interface // ret == 0 -> Encode in progress diff --git a/src/gallium/drivers/d3d12/d3d12_video_enc.h b/src/gallium/drivers/d3d12/d3d12_video_enc.h index cb6feeb0441..93e79a45ace 100644 --- a/src/gallium/drivers/d3d12/d3d12_video_enc.h +++ b/src/gallium/drivers/d3d12/d3d12_video_enc.h @@ -99,7 +99,7 @@ d3d12_video_encoder_flush(struct pipe_video_codec *codec); * and releases the in-flight resources */ bool -d3d12_video_encoder_sync_completion(struct pipe_video_codec *codec, ID3D12Fence *fence, uint64_t fenceValueToWaitOn, uint64_t timeout_ns); +d3d12_video_encoder_sync_completion(struct pipe_video_codec *codec, size_t pool_index, uint64_t timeout_ns); /** * Get feedback fence. @@ -490,7 +490,7 @@ struct EncodedBitstreamResolvedMetadata std::vector ppResolvedSubregionSizes; std::vector ppResolvedSubregionOffsets; std::vector ppSubregionFences; - std::vector pSubregionPipeFences; + std::vector pSubregionPipeFences; std::vector pSubregionBitstreamsBaseOffsets; std::vector ppSubregionFenceValues; /* Slice headers written before each slices */ @@ -523,7 +523,7 @@ struct EncodedBitstreamResolvedMetadata uint64_t expected_max_slice_size = 0; /* Pending fence data for this frame */ - struct d3d12_fence m_FenceData; + d3d12_unique_fence m_fence; }; enum d3d12_video_encoder_driver_workarounds @@ -591,6 +591,7 @@ struct d3d12_video_encoder ComPtr m_spCommandAllocator; struct d3d12_fence* m_InputSurfaceFence = NULL; + d3d12_unique_fence m_CompletionFence; /* Stores encode result for submission error control in the D3D12_VIDEO_ENC_ASYNC_DEPTH slots */ enum pipe_video_feedback_encode_result_flags encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_OK; diff --git a/src/gallium/drivers/d3d12/d3d12_video_proc.cpp b/src/gallium/drivers/d3d12/d3d12_video_proc.cpp index d8db3f736ad..cca7fb06be9 100644 --- a/src/gallium/drivers/d3d12/d3d12_video_proc.cpp +++ b/src/gallium/drivers/d3d12/d3d12_video_proc.cpp @@ -179,10 +179,10 @@ d3d12_video_processor_end_frame(struct pipe_video_codec * codec, pD3D12Proc->m_spCommandList->ResourceBarrier(static_cast(barrier_transitions.size()), barrier_transitions.data()); - ASSERTED bool success = d3d12_reset_fence(&pD3D12Proc->m_PendingFences[d3d12_video_processor_pool_current_index(pD3D12Proc)], pD3D12Proc->m_spFence.Get(), pD3D12Proc->m_fenceValue); - assert(success); - - *picture->fence = (pipe_fence_handle*) &pD3D12Proc->m_PendingFences[d3d12_video_processor_pool_current_index(pD3D12Proc)]; + d3d12_unique_fence &fence = pD3D12Proc->m_PendingFences[d3d12_video_processor_pool_current_index(pD3D12Proc)]; + fence.reset(d3d12_create_fence_raw(pD3D12Proc->m_spFence.Get(), pD3D12Proc->m_fenceValue)); + if (picture->fence) + d3d12_fence_reference((struct d3d12_fence **)picture->fence, fence.get()); return 0; } @@ -426,6 +426,7 @@ d3d12_video_processor_create(struct pipe_context *context, const struct pipe_vid pD3D12Proc->base.end_frame = d3d12_video_processor_end_frame; pD3D12Proc->base.flush = d3d12_video_processor_flush; pD3D12Proc->base.fence_wait = d3d12_video_processor_fence_wait; + pD3D12Proc->base.destroy_fence = d3d12_video_destroy_fence; /// diff --git a/src/gallium/drivers/d3d12/d3d12_video_proc.h b/src/gallium/drivers/d3d12/d3d12_video_proc.h index cb3a428c9bb..3aef3b1dff5 100644 --- a/src/gallium/drivers/d3d12/d3d12_video_proc.h +++ b/src/gallium/drivers/d3d12/d3d12_video_proc.h @@ -110,7 +110,7 @@ struct d3d12_video_processor ComPtr m_spVideoProcessor; ComPtr m_spCommandQueue; std::vector> m_spCommandAllocators; - std::vector m_PendingFences; + std::vector m_PendingFences; ComPtr m_spCommandList; std::vector m_transitionsBeforeCloseCmdList;