d3d12: Fix video fence lifetime issues

pipe_fence_handle is a refcounted object, it can't be owned by a container
which might have a different lifetime, it needs a dedicated heap allocation
so it can outlive its container.

Make sure that when we're handing out pipe_fence_handle references, that
we add a ref to them before handing them out.

Instead of assuming that a fence_wait call is for the exact fence that we
returned from a given op, mirror what's done on graphics and
opportunistically scan the batches to see what's done, and reclaim
resources for them.

Use d3d12_fence helpers to replace a lot of duplicated code.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35900>
This commit is contained in:
Jesse Natalie 2025-07-02 14:35:49 -07:00 committed by Marge Bot
parent e9301b36fe
commit 1959a352ea
8 changed files with 153 additions and 225 deletions

View file

@ -40,27 +40,30 @@ destroy_fence(struct d3d12_fence *fence)
FREE(fence);
}
bool
d3d12_reset_fence(struct d3d12_fence *fence, ID3D12Fence *d3d12_fence_obj, uint64_t fence_value)
struct d3d12_fence *
d3d12_create_fence(struct d3d12_screen *screen, bool signal_new)
{
d3d12_fence_close_event(fence->event, fence->event_fd);
fence->cmdqueue_fence = d3d12_fence_obj;
fence->value = fence_value;
fence->event = d3d12_fence_create_event(&fence->event_fd);
fence->signaled = false;
struct d3d12_fence *ret =
d3d12_create_fence_raw(screen->fence, screen->fence_value + (signal_new ? 1 : 0));
if (!ret) {
debug_printf("CALLOC_STRUCT failed\n");
return NULL;
}
if (FAILED(fence->cmdqueue_fence->SetEventOnCompletion(fence->value, fence->event)))
goto fail;
return true;
if (signal_new) {
++screen->fence_value;
if (FAILED(screen->cmdqueue->Signal(screen->fence, ret->value)))
goto fail;
}
return ret;
fail:
d3d12_fence_close_event(fence->event, fence->event_fd);
return false;
destroy_fence(ret);
return NULL;
}
struct d3d12_fence *
d3d12_create_fence(struct d3d12_screen *screen, bool signal_new)
d3d12_create_fence_raw(ID3D12Fence *fence, uint64_t value)
{
struct d3d12_fence *ret = CALLOC_STRUCT(d3d12_fence);
if (!ret) {
@ -69,13 +72,12 @@ d3d12_create_fence(struct d3d12_screen *screen, bool signal_new)
}
ret->type = PIPE_FD_TYPE_NATIVE_SYNC;
uint64_t value = screen->fence_value;
if (signal_new) {
value = ++screen->fence_value;
if (FAILED(screen->cmdqueue->Signal(screen->fence, value)))
goto fail;
}
if(!d3d12_reset_fence(ret, screen->fence, value))
ret->cmdqueue_fence = fence;
ret->value = value;
ret->event = d3d12_fence_create_event(&ret->event_fd);
ret->signaled = false;
if (FAILED(fence->SetEventOnCompletion(value, ret->event)))
goto fail;
pipe_reference_init(&ret->reference, 1);
@ -124,6 +126,12 @@ d3d12_fence_reference(struct d3d12_fence **ptr, struct d3d12_fence *fence)
*ptr = fence;
}
void
d3d12_video_destroy_fence(struct pipe_video_codec *codec, struct pipe_fence_handle *fence)
{
d3d12_fence_reference((struct d3d12_fence **)&fence, nullptr);
}
static void
fence_reference(struct pipe_screen *pscreen,
struct pipe_fence_handle **pptr,

View file

@ -101,22 +101,37 @@ d3d12_fence(struct pipe_fence_handle *pfence)
return (struct d3d12_fence *)pfence;
}
bool
d3d12_reset_fence(struct d3d12_fence *fence, ID3D12Fence *d3d12_fence_obj, uint64_t fence_value);
struct d3d12_fence *
d3d12_create_fence(struct d3d12_screen *screen, bool signal_new);
struct d3d12_fence *
d3d12_create_fence_raw(ID3D12Fence *d3d12_fence_obj, uint64_t fence_value);
struct d3d12_fence *
d3d12_open_fence(struct d3d12_screen *screen, HANDLE handle, const void *name, pipe_fd_type type);
void
d3d12_fence_reference(struct d3d12_fence **ptr, struct d3d12_fence *fence);
void
d3d12_video_destroy_fence(struct pipe_video_codec *codec, struct pipe_fence_handle *fence);
bool
d3d12_fence_finish(struct d3d12_fence *fence, uint64_t timeout_ns);
void
d3d12_screen_fence_init(struct pipe_screen *pscreen);
#if defined(__cplusplus)
#include <memory>
struct d3d12_fence_deleter
{
void operator()(struct d3d12_fence *f)
{
d3d12_fence_reference(&f, nullptr);
}
};
using d3d12_unique_fence = std::unique_ptr<struct d3d12_fence, d3d12_fence_deleter>;
#endif
#endif

View file

@ -65,7 +65,7 @@ d3d12_video_create_decoder(struct pipe_context *context, const struct pipe_video
// Not using new doesn't call ctor and the initializations in the class declaration are lost
struct d3d12_video_decoder *pD3D12Dec = new d3d12_video_decoder;
pD3D12Dec->m_inflightResourcesPool.resize(D3D12_VIDEO_DEC_ASYNC_DEPTH, { 0 });
pD3D12Dec->m_inflightResourcesPool.resize(D3D12_VIDEO_DEC_ASYNC_DEPTH);
pD3D12Dec->base = *codec;
pD3D12Dec->m_screen = context->screen;
@ -81,6 +81,7 @@ d3d12_video_create_decoder(struct pipe_context *context, const struct pipe_video
pD3D12Dec->base.end_frame = d3d12_video_decoder_end_frame;
pD3D12Dec->base.flush = d3d12_video_decoder_flush;
pD3D12Dec->base.fence_wait = d3d12_video_decoder_fence_wait;
pD3D12Dec->base.destroy_fence = d3d12_video_destroy_fence;
pD3D12Dec->m_decodeFormat = d3d12_convert_pipe_video_profile_to_dxgi_format(codec->profile);
pD3D12Dec->m_d3d12DecProfileType = d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(codec->profile);
@ -157,8 +158,7 @@ d3d12_video_decoder_destroy(struct pipe_video_codec *codec)
// Flush and wait for completion of any in-flight GPU work before destroying objects
d3d12_video_decoder_flush(codec);
if (pD3D12Dec->m_fenceValue > 1 /* Check we submitted at least one frame */) {
auto decode_queue_completion_fence = pD3D12Dec->m_inflightResourcesPool[(pD3D12Dec->m_fenceValue - 1u) % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_FenceData;
d3d12_video_decoder_sync_completion(codec, decode_queue_completion_fence.cmdqueue_fence, decode_queue_completion_fence.value, OS_TIMEOUT_INFINITE);
d3d12_video_decoder_sync_completion(codec, (pD3D12Dec->m_fenceValue - 1u) % D3D12_VIDEO_DEC_ASYNC_DEPTH, OS_TIMEOUT_INFINITE);
struct pipe_fence_handle *context_queue_completion_fence = NULL;
pD3D12Dec->base.context->flush(pD3D12Dec->base.context, &context_queue_completion_fence, PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH);
pD3D12Dec->m_pD3D12Screen->base.fence_finish(&pD3D12Dec->m_pD3D12Screen->base, NULL, context_queue_completion_fence, OS_TIMEOUT_INFINITE);
@ -201,17 +201,13 @@ d3d12_video_decoder_begin_frame(struct pipe_video_codec *codec,
///
/// Wait here to make sure the next in flight resource set is empty before using it
///
uint64_t fenceValueToWaitOn = static_cast<uint64_t>(
std::max(static_cast<int64_t>(0l),
static_cast<int64_t>(pD3D12Dec->m_fenceValue) - static_cast<int64_t>(D3D12_VIDEO_DEC_ASYNC_DEPTH)));
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_begin_frame Waiting for completion of in flight resource "
"sets with previous work with fenceValue: %" PRIu64 "\n",
fenceValueToWaitOn);
ASSERTED bool wait_res =
d3d12_video_decoder_sync_completion(codec, pD3D12Dec->m_spFence.Get(), fenceValueToWaitOn, OS_TIMEOUT_INFINITE);
assert(wait_res);
if (pD3D12Dec->m_fenceValue >= D3D12_VIDEO_DEC_ASYNC_DEPTH) {
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_begin_frame Waiting for completion of in flight resource "
"sets with previous work\n");
ASSERTED bool wait_res =
d3d12_video_decoder_sync_completion(codec, pD3D12Dec->m_fenceValue % D3D12_VIDEO_DEC_ASYNC_DEPTH, OS_TIMEOUT_INFINITE);
assert(wait_res);
}
HRESULT hr = pD3D12Dec->m_spDecodeCommandList->Reset(
pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_spCommandAllocator.Get());
@ -679,7 +675,8 @@ d3d12_video_decoder_end_frame(struct pipe_video_codec *codec,
if (pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()) {
// No need to copy, the output surface fence is merely the decode queue fence
*picture->fence = (pipe_fence_handle *) &pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_FenceData;
if (picture->fence)
d3d12_fence_reference((struct d3d12_fence **)picture->fence, pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_fence.get());
} else {
///
/// If !pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()
@ -698,9 +695,8 @@ d3d12_video_decoder_end_frame(struct pipe_video_codec *codec,
assert(pPipeSrc);
// GPU wait on the graphics context which will do the copy until the decode finishes
pD3D12Screen->cmdqueue->Wait(
pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_FenceData.cmdqueue_fence,
pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_FenceData.value);
pD3D12Dec->base.context->fence_server_sync(pD3D12Dec->base.context,
(struct pipe_fence_handle *)pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_fence.get());
// Copy all format subresources/texture planes
for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
@ -740,8 +736,12 @@ d3d12_video_decoder_fence_wait(struct pipe_video_codec *codec, struct pipe_fence
struct d3d12_fence *fenceValueToWaitOn = (struct d3d12_fence *) fence;
assert(fenceValueToWaitOn);
ASSERTED bool wait_res =
d3d12_video_decoder_sync_completion(codec, fenceValueToWaitOn->cmdqueue_fence, fenceValueToWaitOn->value, timeout);
bool wait_res = d3d12_fence_finish(fenceValueToWaitOn, timeout);
if (wait_res) {
// Opportunistically reset batches
for (uint32_t i = 0; i < D3D12_VIDEO_DEC_ASYNC_DEPTH; ++i)
(void)d3d12_video_decoder_sync_completion(codec, i, 0);
}
// Return semantics based on p_video_codec interface
// ret == 0 -> Decode in progress
@ -806,10 +806,8 @@ d3d12_video_decoder_flush(struct pipe_video_codec *codec)
}
// Set async fence info
ASSERTED bool success = d3d12_reset_fence(&inFlightResources.m_FenceData, pD3D12Dec->m_spFence.Get(), pD3D12Dec->m_fenceValue);
assert(success);
inFlightResources.m_fence.reset(d3d12_create_fence_raw(pD3D12Dec->m_spFence.Get(), pD3D12Dec->m_fenceValue++));
pD3D12Dec->m_fenceValue++;
pD3D12Dec->m_needsGPUFlush = false;
}
return;
@ -843,6 +841,7 @@ d3d12_video_decoder_create_command_objects(const struct d3d12_screen *pD3D12Scre
return false;
}
uint64_t CompletionFenceValue = pD3D12Dec->m_fenceValue;
for (auto &inputResource : pD3D12Dec->m_inflightResourcesPool) {
hr = pD3D12Dec->m_pD3D12Screen->dev->CreateCommandAllocator(
D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
@ -853,6 +852,9 @@ d3d12_video_decoder_create_command_objects(const struct d3d12_screen *pD3D12Scre
hr);
return false;
}
// Initialize fence for the in flight resource pool slot
inputResource.m_fence.reset(d3d12_create_fence_raw(pD3D12Dec->m_spFence.Get(), CompletionFenceValue++));
}
ComPtr<ID3D12Device4> spD3D12Device4;
@ -1620,59 +1622,9 @@ d3d12_video_decoder_resolve_profile(d3d12_video_decode_profile_type profileType,
#endif // D3D12_VIDEO_ANY_DECODER_ENABLED
}
bool
d3d12_video_decoder_ensure_fence_finished(struct pipe_video_codec *codec,
ID3D12Fence *fence,
uint64_t fenceValueToWaitOn,
uint64_t timeout_ns)
{
bool wait_result = true;
HRESULT hr = S_OK;
uint64_t completedValue = fence->GetCompletedValue();
debug_printf(
"[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - Waiting for fence (with timeout_ns %" PRIu64
") to finish with "
"fenceValue: %" PRIu64 " - Current Fence Completed Value %" PRIu64 "\n",
timeout_ns,
fenceValueToWaitOn,
completedValue);
if (completedValue < fenceValueToWaitOn) {
HANDLE event = {};
int event_fd = 0;
event = d3d12_fence_create_event(&event_fd);
hr = fence->SetEventOnCompletion(fenceValueToWaitOn, event);
if (FAILED(hr)) {
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - SetEventOnCompletion for "
"fenceValue %" PRIu64 " failed with HR %x\n",
fenceValueToWaitOn,
hr);
return false;
}
wait_result = d3d12_fence_wait_event(event, event_fd, timeout_ns);
d3d12_fence_close_event(event, event_fd);
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - Waiting on fence to be done with "
"fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n",
fenceValueToWaitOn,
completedValue);
} else {
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - Fence already done with "
"fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n",
fenceValueToWaitOn,
completedValue);
}
return wait_result;
}
bool
d3d12_video_decoder_sync_completion(struct pipe_video_codec *codec,
ID3D12Fence *fence,
uint64_t fenceValueToWaitOn,
uint32_t frame_index,
uint64_t timeout_ns)
{
struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
@ -1681,18 +1633,16 @@ d3d12_video_decoder_sync_completion(struct pipe_video_codec *codec,
assert(pD3D12Dec->m_spDecodeCommandQueue);
HRESULT hr = S_OK;
ASSERTED bool wait_result = d3d12_video_decoder_ensure_fence_finished(codec, fence, fenceValueToWaitOn, timeout_ns);
assert(wait_result);
auto &pool_entry = pD3D12Dec->m_inflightResourcesPool[frame_index];
if (!d3d12_fence_finish(pool_entry.m_fence.get(), timeout_ns))
return false;
// Release references granted on end_frame for this inflight operations
pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_spDecoder.Reset();
pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_spDecoderHeap.Reset();
pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_References.reset();
pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_stagingDecodeBitstream.resize(
0);
pipe_resource_reference(
&pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].pPipeCompressedBufferObj,
NULL);
pool_entry.m_spDecoder.Reset();
pool_entry.m_spDecoderHeap.Reset();
pool_entry.m_References.reset();
pool_entry.m_stagingDecodeBitstream.clear();
pipe_resource_reference(&pool_entry.pPipeCompressedBufferObj, NULL);
struct d3d12_screen *pD3D12Screen = (struct d3d12_screen *) pD3D12Dec->m_pD3D12Screen;
assert(pD3D12Screen);
@ -1703,8 +1653,7 @@ d3d12_video_decoder_sync_completion(struct pipe_video_codec *codec,
.m_pBitstreamUploadGPUCompletionFence,
NULL);
hr =
pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_spCommandAllocator->Reset();
hr = pool_entry.m_spCommandAllocator->Reset();
if (FAILED(hr)) {
debug_printf("failed with %x.\n", hr);
goto sync_with_token_fail;
@ -1721,15 +1670,14 @@ d3d12_video_decoder_sync_completion(struct pipe_video_codec *codec,
}
debug_printf(
"[d3d12_video_decoder] d3d12_video_decoder_sync_completion - GPU execution finalized for fenceValue: %" PRIu64
"\n",
fenceValueToWaitOn);
"[d3d12_video_decoder] d3d12_video_decoder_sync_completion - GPU execution finalized for frame index: %u\n",
frame_index);
return wait_result;
return true;
sync_with_token_fail:
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_sync_completion failed for fenceValue: %" PRIu64 "\n",
fenceValueToWaitOn);
debug_printf("[d3d12_video_decoder] d3d12_video_decoder_sync_completion failed for frame index: %u\n",
frame_index);
assert(false);
return false;
}
}

View file

@ -148,7 +148,7 @@ struct d3d12_video_decoder
{
struct pipe_fence_handle *m_pBitstreamUploadGPUCompletionFence;
struct d3d12_fence m_FenceData;
d3d12_unique_fence m_fence;
// In case of reconfigurations that trigger creation of new
// decoder or decoderheap or reference frames allocations
@ -276,10 +276,7 @@ void
d3d12_video_decoder_prepare_dxva_slices_control(struct d3d12_video_decoder *pD3D12Dec, struct pipe_picture_desc *picture);
bool
d3d12_video_decoder_ensure_fence_finished(struct pipe_video_codec *codec, ID3D12Fence* fence, uint64_t fenceValueToWaitOn, uint64_t timeout_ns);
bool
d3d12_video_decoder_sync_completion(struct pipe_video_codec *codec, ID3D12Fence* fence, uint64_t fenceValueToWaitOn, uint64_t timeout_ns);
d3d12_video_decoder_sync_completion(struct pipe_video_codec *codec, uint32_t frame_index, uint64_t timeout_ns);
///
/// d3d12_video_decoder functions ends

View file

@ -179,59 +179,9 @@ flush_fail:
assert(false);
}
bool
d3d12_video_encoder_ensure_fence_finished(struct pipe_video_codec *codec, ID3D12Fence *fence, uint64_t fenceValueToWaitOn, uint64_t timeout_ns)
{
bool wait_result = true;
struct d3d12_video_encoder *pD3D12Enc = (struct d3d12_video_encoder *) codec;
HRESULT hr = S_OK;
uint64_t completedValue = fence->GetCompletedValue();
debug_printf("[d3d12_video_encoder] d3d12_video_encoder_ensure_fence_finished - Waiting for fence (with timeout_ns %" PRIu64 ") to finish with "
"fenceValue: %" PRIu64 " - Current Fence Completed Value %" PRIu64 "\n",
timeout_ns, fenceValueToWaitOn, completedValue);
if(completedValue < fenceValueToWaitOn) {
HANDLE event = { };
int event_fd = 0;
event = d3d12_fence_create_event(&event_fd);
hr = fence->SetEventOnCompletion(fenceValueToWaitOn, event);
if (FAILED(hr)) {
debug_printf(
"[d3d12_video_encoder] d3d12_video_encoder_ensure_fence_finished - SetEventOnCompletion for fenceValue %" PRIu64 " failed with HR %x\n",
fenceValueToWaitOn, hr);
goto ensure_fence_finished_fail;
}
debug_printf("[d3d12_video_encoder] d3d12_video_encoder_ensure_fence_finished - Waiting on fence to be done with "
"fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n",
fenceValueToWaitOn,
completedValue);
wait_result = d3d12_fence_wait_event(event, event_fd, timeout_ns);
d3d12_fence_close_event(event, event_fd);
} else {
debug_printf("[d3d12_video_encoder] d3d12_video_encoder_ensure_fence_finished - Fence already done with "
"fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n",
fenceValueToWaitOn,
completedValue);
}
return wait_result;
ensure_fence_finished_fail:
debug_printf("[d3d12_video_encoder] d3d12_video_encoder_ensure_fence_finished failed for fenceValue: %" PRIu64 "\n", fenceValueToWaitOn);
pD3D12Enc->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_ENC_ASYNC_DEPTH].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED;
pD3D12Enc->m_spEncodedFrameMetadata[fenceValueToWaitOn % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED;
assert(false);
return false;
}
bool
d3d12_video_encoder_sync_completion(struct pipe_video_codec *codec,
ID3D12Fence *fence,
uint64_t fenceValueToWaitOn,
size_t pool_index,
uint64_t timeout_ns)
{
struct d3d12_video_encoder *pD3D12Enc = (struct d3d12_video_encoder *) codec;
@ -240,22 +190,23 @@ d3d12_video_encoder_sync_completion(struct pipe_video_codec *codec,
assert(pD3D12Enc->m_spEncodeCommandQueue);
HRESULT hr = S_OK;
bool wait_result = d3d12_video_encoder_ensure_fence_finished(codec, fence, fenceValueToWaitOn, timeout_ns);
assert(wait_result);
auto &pool_entry = pD3D12Enc->m_inflightResourcesPool[pool_index];
if (!d3d12_fence_finish(pool_entry.m_CompletionFence.get(), timeout_ns))
return false;
debug_printf("[d3d12_video_encoder] d3d12_video_encoder_sync_completion - resetting ID3D12CommandAllocator %p...\n",
pD3D12Enc->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_ENC_ASYNC_DEPTH].m_spCommandAllocator.Get());
hr = pD3D12Enc->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_ENC_ASYNC_DEPTH].m_spCommandAllocator->Reset();
pool_entry.m_spCommandAllocator.Get());
hr = pool_entry.m_spCommandAllocator->Reset();
if(FAILED(hr)) {
debug_printf("failed with %x.\n", hr);
goto sync_with_token_fail;
}
// Release references granted on end_frame for this inflight operations
pD3D12Enc->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_ENC_ASYNC_DEPTH].m_spEncoder.Reset();
pD3D12Enc->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_ENC_ASYNC_DEPTH].m_spEncoderHeap.Reset();
pD3D12Enc->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_ENC_ASYNC_DEPTH].m_References.reset();
pD3D12Enc->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_ENC_ASYNC_DEPTH].m_InputSurfaceFence = NULL;
pool_entry.m_spEncoder.Reset();
pool_entry.m_spEncoderHeap.Reset();
pool_entry.m_References.reset();
pool_entry.m_InputSurfaceFence = NULL;
// Validate device was not removed
hr = pD3D12Enc->m_pD3D12Screen->dev->GetDeviceRemovedReason();
@ -268,15 +219,14 @@ d3d12_video_encoder_sync_completion(struct pipe_video_codec *codec,
}
debug_printf(
"[d3d12_video_encoder] d3d12_video_encoder_sync_completion - GPU execution finalized for fenceValue: %" PRIu64 "\n",
fenceValueToWaitOn);
"[d3d12_video_encoder] d3d12_video_encoder_sync_completion - GPU execution finalized for pool index: %" PRIu64 "\n",
(uint64_t)pool_index);
return wait_result;
return true;
sync_with_token_fail:
debug_printf("[d3d12_video_encoder] d3d12_video_encoder_sync_completion failed for fenceValue: %" PRIu64 "\n", fenceValueToWaitOn);
pD3D12Enc->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_ENC_ASYNC_DEPTH].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED;
pD3D12Enc->m_spEncodedFrameMetadata[fenceValueToWaitOn % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED;
debug_printf("[d3d12_video_encoder] d3d12_video_encoder_sync_completion failed for pool index: %" PRIu64 "\n", (uint64_t)pool_index);
pool_entry.encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED;
assert(false);
return false;
}
@ -298,9 +248,9 @@ d3d12_video_encoder_destroy(struct pipe_video_codec *codec)
// Flush pending work before destroying
if(pD3D12Enc->m_bPendingWorkNotFlushed){
uint64_t curBatchFence = pD3D12Enc->m_fenceValue;
size_t pool_index = d3d12_video_encoder_pool_current_index(pD3D12Enc);
d3d12_video_encoder_flush(codec);
d3d12_video_encoder_sync_completion(codec, pD3D12Enc->m_spFence.Get(), curBatchFence, OS_TIMEOUT_INFINITE);
d3d12_video_encoder_sync_completion(codec, pool_index, OS_TIMEOUT_INFINITE);
}
if (pD3D12Enc->m_SliceHeaderRepackBuffer)
@ -2387,6 +2337,7 @@ d3d12_video_encoder_create_command_objects(struct d3d12_video_encoder *pD3D12Enc
return false;
}
uint64_t CompletionFenceValue = pD3D12Enc->m_fenceValue;
for (auto& inputResource : pD3D12Enc->m_inflightResourcesPool)
{
// Create associated command allocator for Encode, Resolve operations
@ -2399,6 +2350,9 @@ d3d12_video_encoder_create_command_objects(struct d3d12_video_encoder *pD3D12Enc
hr);
return false;
}
// Initialize fence for the in flight resource pool slot
inputResource.m_CompletionFence.reset(d3d12_create_fence_raw(pD3D12Enc->m_spFence.Get(), CompletionFenceValue++));
}
ComPtr<ID3D12Device4> spD3D12Device4;
@ -2434,8 +2388,8 @@ d3d12_video_encoder_create_encoder(struct pipe_context *context, const struct pi
// Not using new doesn't call ctor and the initializations in the class declaration are lost
struct d3d12_video_encoder *pD3D12Enc = new d3d12_video_encoder;
pD3D12Enc->m_spEncodedFrameMetadata.resize(D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT, {nullptr, 0, 0});
pD3D12Enc->m_inflightResourcesPool.resize(D3D12_VIDEO_ENC_ASYNC_DEPTH, { 0 });
pD3D12Enc->m_spEncodedFrameMetadata.resize(D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT);
pD3D12Enc->m_inflightResourcesPool.resize(D3D12_VIDEO_ENC_ASYNC_DEPTH);
pD3D12Enc->base = *codec;
pD3D12Enc->m_screen = context->screen;
@ -2453,6 +2407,7 @@ d3d12_video_encoder_create_encoder(struct pipe_context *context, const struct pi
pD3D12Enc->base.get_feedback = d3d12_video_encoder_get_feedback;
pD3D12Enc->base.create_dpb_buffer = d3d12_video_create_dpb_buffer;
pD3D12Enc->base.fence_wait = d3d12_video_encoder_fence_wait;
pD3D12Enc->base.destroy_fence = d3d12_video_destroy_fence;
pD3D12Enc->base.encode_bitstream_sliced = d3d12_video_encoder_encode_bitstream_sliced;
pD3D12Enc->base.get_slice_bitstream_data = d3d12_video_encoder_get_slice_bitstream_data;
@ -2750,12 +2705,12 @@ d3d12_video_encoder_begin_frame(struct pipe_video_codec * codec,
///
/// Wait here to make sure the next in flight resource set is empty before using it
///
uint64_t fenceValueToWaitOn = static_cast<uint64_t>(std::max(static_cast<int64_t>(0l), static_cast<int64_t>(pD3D12Enc->m_fenceValue) - static_cast<int64_t>(D3D12_VIDEO_ENC_ASYNC_DEPTH) ));
debug_printf("[d3d12_video_encoder] d3d12_video_encoder_begin_frame Waiting for completion of in flight resource sets with previous work with fenceValue: %" PRIu64 "\n",
fenceValueToWaitOn);
d3d12_video_encoder_ensure_fence_finished(codec, pD3D12Enc->m_spFence.Get(), fenceValueToWaitOn, OS_TIMEOUT_INFINITE);
if (pD3D12Enc->m_fenceValue >= D3D12_VIDEO_ENC_ASYNC_DEPTH) {
debug_printf("[d3d12_video_encoder] d3d12_video_encoder_begin_frame Waiting for completion of in flight resource sets with previous work for pool index:"
"%" PRIu64 "\n",
(uint64_t)d3d12_video_encoder_pool_current_index(pD3D12Enc));
d3d12_fence_finish(pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].m_CompletionFence.get(), OS_TIMEOUT_INFINITE);
}
if (!d3d12_video_encoder_reconfigure_session(pD3D12Enc, target, picture)) {
debug_printf("[d3d12_video_encoder] d3d12_video_encoder_begin_frame - Failure on "
@ -2771,7 +2726,7 @@ d3d12_video_encoder_begin_frame(struct pipe_video_codec * codec,
goto fail;
}
pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].m_InputSurfaceFence = (struct d3d12_fence*) *picture->fence;
pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].m_InputSurfaceFence = picture->fence ? d3d12_fence(*picture->fence) : nullptr;
pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_OK;
pD3D12Enc->m_spEncodedFrameMetadata[d3d12_video_encoder_metadata_current_index(pD3D12Enc)].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_OK;
@ -2896,9 +2851,7 @@ d3d12_video_encoder_get_slice_bitstream_data(struct pipe_video_codec *codec,
return;
}
bool wait_res = d3d12_video_encoder_ensure_fence_finished(codec,
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[slice_idx].cmdqueue_fence,
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].ppSubregionFenceValues[slice_idx], OS_TIMEOUT_INFINITE);
bool wait_res = d3d12_fence_finish(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[slice_idx].get(), OS_TIMEOUT_INFINITE);
if (!wait_res) {
debug_printf("Error: d3d12_video_encoder_get_slice_bitstream_data for Encode GPU command for fence %" PRIu64 " failed on d3d12_video_encoder_ensure_fence_finished\n",
requested_metadata_fence);
@ -3256,10 +3209,12 @@ d3d12_video_encoder_encode_bitstream_impl(struct pipe_video_codec *codec,
}
}
ASSERTED bool success = d3d12_reset_fence(&pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_FenceData, pD3D12Enc->m_spFence.Get(), pD3D12Enc->m_fenceValue);
assert(success);
pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].
m_CompletionFence.reset(d3d12_create_fence_raw(pD3D12Enc->m_spFence.Get(), pD3D12Enc->m_fenceValue));
*feedback = (void*) &pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_FenceData;
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_fence.reset(d3d12_create_fence_raw(pD3D12Enc->m_spFence.Get(), pD3D12Enc->m_fenceValue));
*feedback = (void*)pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_fence.get();
std::vector<D3D12_RESOURCE_BARRIER> rgCurrentFrameStateTransitions = {
CD3DX12_RESOURCE_BARRIER::Transition(pInputVideoD3D12Res,
@ -3837,7 +3792,7 @@ d3d12_video_encoder_encode_bitstream_impl(struct pipe_video_codec *codec,
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pspSubregionSizes.resize(num_slice_objects, {});
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pspSubregionOffsets.resize(num_slice_objects, {});
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pspSubregionFences.resize(num_slice_objects, NULL);
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences.resize(num_slice_objects, {});
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences.resize(num_slice_objects);
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].ppSubregionFenceValues.resize(num_slice_objects, pD3D12Enc->m_fenceValue);
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].ppSubregionSizes.resize(num_slice_objects, NULL);
@ -3896,14 +3851,11 @@ d3d12_video_encoder_encode_bitstream_impl(struct pipe_video_codec *codec,
0,
sizeof(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[i]));
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[i].value = pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].ppSubregionFenceValues[i];
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[i].cmdqueue_fence = pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pspSubregionFences[i].Get();
int event_fd = 0;
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[i].event = d3d12_fence_create_event(&event_fd);
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[i].cmdqueue_fence->SetEventOnCompletion(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[i].value,
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[i].event);
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[i].reset(
d3d12_create_fence_raw(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pspSubregionFences[i].Get(),
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].ppSubregionFenceValues[i]));
slice_fences[i] = (pipe_fence_handle*) &pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[i];
d3d12_fence_reference((struct d3d12_fence **)&slice_fences[i], pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionPipeFences[i].get());
}
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pSubregionBitstreamsBaseOffsets.resize(num_slice_objects, 0u);
@ -4259,8 +4211,9 @@ d3d12_video_encoder_get_feedback(struct pipe_video_codec *codec,
*pMetadata = opt_metadata;
return;
}
bool wait_res = d3d12_video_encoder_sync_completion(codec, feedback_fence->cmdqueue_fence, requested_metadata_fence, OS_TIMEOUT_INFINITE);
bool wait_res = d3d12_fence_finish(feedback_fence, OS_TIMEOUT_INFINITE);
if (!wait_res) {
opt_metadata.encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED;
debug_printf("Error: d3d12_video_encoder_get_feedback for Encode GPU command for fence %" PRIu64 " failed on d3d12_video_encoder_sync_completion\n",
@ -4740,7 +4693,8 @@ d3d12_video_encoder_end_frame(struct pipe_video_codec * codec,
pD3D12Enc->m_bPendingWorkNotFlushed = true;
size_t current_metadata_slot = d3d12_video_encoder_metadata_current_index(pD3D12Enc);
*picture->fence = (pipe_fence_handle *) &pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_FenceData;
if (picture->fence)
d3d12_fence_reference((struct d3d12_fence **)picture->fence, pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_fence.get());
return 0;
}
@ -4883,8 +4837,12 @@ d3d12_video_encoder_fence_wait(struct pipe_video_codec *codec,
struct d3d12_fence *fence = (struct d3d12_fence *) _fence;
assert(fence);
bool wait_res =
d3d12_video_encoder_sync_completion(codec, fence->cmdqueue_fence, fence->value, timeout);
bool wait_res = d3d12_fence_finish(fence, timeout);
if (wait_res) {
// Opportunistically reset batches
for (uint32_t i = 0; i < D3D12_VIDEO_ENC_ASYNC_DEPTH; ++i)
d3d12_video_encoder_sync_completion(codec, i, 0);
}
// Return semantics based on p_video_codec interface
// ret == 0 -> Encode in progress

View file

@ -99,7 +99,7 @@ d3d12_video_encoder_flush(struct pipe_video_codec *codec);
* and releases the in-flight resources
*/
bool
d3d12_video_encoder_sync_completion(struct pipe_video_codec *codec, ID3D12Fence *fence, uint64_t fenceValueToWaitOn, uint64_t timeout_ns);
d3d12_video_encoder_sync_completion(struct pipe_video_codec *codec, size_t pool_index, uint64_t timeout_ns);
/**
* Get feedback fence.
@ -490,7 +490,7 @@ struct EncodedBitstreamResolvedMetadata
std::vector<UINT64> ppResolvedSubregionSizes;
std::vector<UINT64> ppResolvedSubregionOffsets;
std::vector<ID3D12Fence*> ppSubregionFences;
std::vector<struct d3d12_fence> pSubregionPipeFences;
std::vector<d3d12_unique_fence> pSubregionPipeFences;
std::vector<UINT64> pSubregionBitstreamsBaseOffsets;
std::vector<UINT64> ppSubregionFenceValues;
/* Slice headers written before each slices */
@ -523,7 +523,7 @@ struct EncodedBitstreamResolvedMetadata
uint64_t expected_max_slice_size = 0;
/* Pending fence data for this frame */
struct d3d12_fence m_FenceData;
d3d12_unique_fence m_fence;
};
enum d3d12_video_encoder_driver_workarounds
@ -591,6 +591,7 @@ struct d3d12_video_encoder
ComPtr<ID3D12CommandAllocator> m_spCommandAllocator;
struct d3d12_fence* m_InputSurfaceFence = NULL;
d3d12_unique_fence m_CompletionFence;
/* Stores encode result for submission error control in the D3D12_VIDEO_ENC_ASYNC_DEPTH slots */
enum pipe_video_feedback_encode_result_flags encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_OK;

View file

@ -179,10 +179,10 @@ d3d12_video_processor_end_frame(struct pipe_video_codec * codec,
pD3D12Proc->m_spCommandList->ResourceBarrier(static_cast<uint32_t>(barrier_transitions.size()), barrier_transitions.data());
ASSERTED bool success = d3d12_reset_fence(&pD3D12Proc->m_PendingFences[d3d12_video_processor_pool_current_index(pD3D12Proc)], pD3D12Proc->m_spFence.Get(), pD3D12Proc->m_fenceValue);
assert(success);
*picture->fence = (pipe_fence_handle*) &pD3D12Proc->m_PendingFences[d3d12_video_processor_pool_current_index(pD3D12Proc)];
d3d12_unique_fence &fence = pD3D12Proc->m_PendingFences[d3d12_video_processor_pool_current_index(pD3D12Proc)];
fence.reset(d3d12_create_fence_raw(pD3D12Proc->m_spFence.Get(), pD3D12Proc->m_fenceValue));
if (picture->fence)
d3d12_fence_reference((struct d3d12_fence **)picture->fence, fence.get());
return 0;
}
@ -426,6 +426,7 @@ d3d12_video_processor_create(struct pipe_context *context, const struct pipe_vid
pD3D12Proc->base.end_frame = d3d12_video_processor_end_frame;
pD3D12Proc->base.flush = d3d12_video_processor_flush;
pD3D12Proc->base.fence_wait = d3d12_video_processor_fence_wait;
pD3D12Proc->base.destroy_fence = d3d12_video_destroy_fence;
///

View file

@ -110,7 +110,7 @@ struct d3d12_video_processor
ComPtr<ID3D12VideoProcessor1> m_spVideoProcessor;
ComPtr<ID3D12CommandQueue> m_spCommandQueue;
std::vector<ComPtr<ID3D12CommandAllocator>> m_spCommandAllocators;
std::vector<struct d3d12_fence> m_PendingFences;
std::vector<d3d12_unique_fence> m_PendingFences;
ComPtr<ID3D12VideoProcessCommandList1> m_spCommandList;
std::vector<D3D12_RESOURCE_BARRIER> m_transitionsBeforeCloseCmdList;