d3d12: Allow frontends to set_video_encoder_max_async_queue_depth() to manage encoder memory overhead

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37565>
This commit is contained in:
Silvio Vilerino 2025-09-24 16:26:25 -04:00 committed by Marge Bot
parent b2b009fc29
commit b8f2c41581
11 changed files with 73 additions and 31 deletions

View file

@ -303,6 +303,8 @@ struct d3d12_context {
struct d3d12_context_queue_priority_manager* priority_manager; // Object passed and managed by frontend
mtx_t priority_manager_lock; // Mutex to protect access to priority_manager
#endif // ( USE_D3D12_PREVIEW_HEADERS && ( D3D12_PREVIEW_SDK_VERSION >= 717 ) )
uint32_t max_video_encoding_async_depth = 0u;
};
static inline struct d3d12_context *
@ -335,6 +337,9 @@ int
d3d12_context_set_queue_priority_manager(struct pipe_context *ctx, struct d3d12_context_queue_priority_manager *priority_manager);
#endif // ( USE_D3D12_PREVIEW_HEADERS && ( D3D12_PREVIEW_SDK_VERSION >= 717 ) )
int
d3d12_video_encoder_set_max_async_queue_depth(struct pipe_context *ctx, uint32_t max_async_depth);
bool
d3d12_enable_fake_so_buffers(struct d3d12_context *ctx, unsigned factor);

View file

@ -481,6 +481,19 @@ d3d12_context_set_queue_priority_manager(struct pipe_context *ctx, struct d3d12_
#endif // ( USE_D3D12_PREVIEW_HEADERS && ( D3D12_PREVIEW_SDK_VERSION >= 717 ) )
int
d3d12_video_encoder_set_max_async_queue_depth(struct pipe_context *ctx, uint32_t max_async_depth)
{
if (max_async_depth > 8) {
debug_printf("d3d12_video_encoder_set_max_async_queue_depth: max_async_depth must be between 1 and 8\n");
return -1;
}
struct d3d12_context *d3d12_ctx = d3d12_context(ctx);
d3d12_ctx->max_video_encoding_async_depth = max_async_depth;
return 0;
}
struct pipe_context *
d3d12_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
{
@ -513,6 +526,7 @@ d3d12_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
if (!ctx)
return NULL;
ctx->max_video_encoding_async_depth = static_cast<uint32_t>(debug_get_num_option("D3D12_VIDEO_ENC_ASYNC_DEPTH", 8));
ctx->base.screen = pscreen;
ctx->base.priv = priv;

View file

@ -171,6 +171,26 @@ struct d3d12_interop_device_info1 {
* Returns int (0 for success, error code otherwise)
*/
int (*set_context_queue_priority_manager)(struct pipe_context *context, struct d3d12_context_queue_priority_manager *manager);
/*
* Function pointer to set the maximum queue async depth for video encode work queues.
* If this function is NULL, the driver does not support setting max queue depth.
* Some frontends that have modes where they limit the number of frames in flight
* and this function allows the frontend to communicate that to the driver.
* That way the driver can allocate less command allocators and resources for
* video in flight frames and reduce memory usage.
*
* A call to this function alters the behavior of pipe_context::create_video_codec
* and any video codec created AFTER a call to this function will have the specified
* max async queue depth. Created video codecs previous to calling this function are not affected.
*
* Parameters:
* - pipe_context*: context to configure
* - unsigned int: maximum queue depth to set
*
* Returns int (0 for success, error code otherwise)
*/
int (*set_video_encoder_max_async_queue_depth)(struct pipe_context *context, uint32_t max_async_queue_depth);
};
#endif // ( USE_D3D12_PREVIEW_HEADERS && ( D3D12_PREVIEW_SDK_VERSION >= 717 ) )

View file

@ -1162,6 +1162,7 @@ d3d12_interop_query_device_info(struct pipe_screen *pscreen, uint32_t data_size,
if (data_size >= sizeof(d3d12_interop_device_info1)) {
d3d12_interop_device_info1 *info1 = (d3d12_interop_device_info1 *)data;
info1->set_context_queue_priority_manager = d3d12_context_set_queue_priority_manager;
info1->set_video_encoder_max_async_queue_depth = d3d12_video_encoder_set_max_async_queue_depth;
return sizeof(*info1);
}
#endif // ( USE_D3D12_PREVIEW_HEADERS && ( D3D12_PREVIEW_SDK_VERSION >= 717 ) )

View file

@ -509,7 +509,7 @@ d3d12_video_create_dpb_buffer_texarray(struct pipe_video_codec *codec,
if (!pD3D12Enc->m_pVideoTexArrayDPBPool)
{
pipe_resource resource_creation_info = {};
resource_creation_info.array_size = static_cast<uint16_t>(d3d12_video_encoder_get_current_max_dpb_capacity(pD3D12Enc) + D3D12_VIDEO_ENC_ASYNC_DEPTH + 1u);
resource_creation_info.array_size = static_cast<uint16_t>(d3d12_video_encoder_get_current_max_dpb_capacity(pD3D12Enc) + pD3D12Enc->m_MaxQueueAsyncDepth + 1u);
assert(resource_creation_info.array_size <= 32); // uint32_t used as a usage bitmap into m_pVideoTexArrayDPBPool
buf = (d3d12_video_buffer*) d3d12_video_buffer_create_impl(codec->context, templat, &resource_creation_info, d3d12_video_buffer_creation_mode::create_resource, NULL, 0);
pD3D12Enc->m_pVideoTexArrayDPBPool = &buf->texture->base.b;

View file

@ -88,13 +88,13 @@ d3d12_video_encoder_convert_codec_to_d3d12_enc_codec(enum pipe_video_profile pro
size_t
d3d12_video_encoder_pool_current_index(struct d3d12_video_encoder *pD3D12Enc)
{
return static_cast<size_t>(pD3D12Enc->m_fenceValue % D3D12_VIDEO_ENC_ASYNC_DEPTH);
return static_cast<size_t>(pD3D12Enc->m_fenceValue % pD3D12Enc->m_MaxQueueAsyncDepth);
}
size_t
d3d12_video_encoder_metadata_current_index(struct d3d12_video_encoder *pD3D12Enc)
{
return static_cast<size_t>(pD3D12Enc->m_fenceValue % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT);
return static_cast<size_t>(pD3D12Enc->m_fenceValue % pD3D12Enc->m_MaxMetadataBuffersCount);
}
void
@ -2454,6 +2454,8 @@ UINT d3d12_video_encoder_calculate_max_output_compressed_bitstream_size(
struct pipe_video_codec *
d3d12_video_encoder_create_encoder(struct pipe_context *context, const struct pipe_video_codec *codec)
{
struct d3d12_context *pD3D12Ctx = (struct d3d12_context *) context;
///
/// Initialize d3d12_video_encoder
///
@ -2461,8 +2463,12 @@ d3d12_video_encoder_create_encoder(struct pipe_context *context, const struct pi
// Not using new doesn't call ctor and the initializations in the class declaration are lost
struct d3d12_video_encoder *pD3D12Enc = new d3d12_video_encoder;
pD3D12Enc->m_spEncodedFrameMetadata.resize(D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT);
pD3D12Enc->m_inflightResourcesPool.resize(D3D12_VIDEO_ENC_ASYNC_DEPTH);
pD3D12Enc->m_MaxQueueAsyncDepth = pD3D12Ctx->max_video_encoding_async_depth;
assert(pD3D12Enc->m_MaxQueueAsyncDepth > 0);
pD3D12Enc->m_MaxMetadataBuffersCount = 2 * pD3D12Enc->m_MaxQueueAsyncDepth;
pD3D12Enc->m_spEncodedFrameMetadata.resize(pD3D12Enc->m_MaxMetadataBuffersCount);
pD3D12Enc->m_inflightResourcesPool.resize(pD3D12Enc->m_MaxQueueAsyncDepth);
pD3D12Enc->base = *codec;
pD3D12Enc->m_screen = context->screen;
@ -2484,7 +2490,6 @@ d3d12_video_encoder_create_encoder(struct pipe_context *context, const struct pi
pD3D12Enc->base.encode_bitstream_sliced = d3d12_video_encoder_encode_bitstream_sliced;
pD3D12Enc->base.get_slice_bitstream_data = d3d12_video_encoder_get_slice_bitstream_data;
struct d3d12_context *pD3D12Ctx = (struct d3d12_context *) context;
pD3D12Enc->m_pD3D12Screen = d3d12_screen(pD3D12Ctx->base.screen);
if (FAILED(pD3D12Enc->m_pD3D12Screen->dev->QueryInterface(
@ -2807,7 +2812,7 @@ d3d12_video_encoder_begin_frame(struct pipe_video_codec * codec,
///
/// Wait here to make sure the next in flight resource set is empty before using it
///
if (pD3D12Enc->m_fenceValue >= D3D12_VIDEO_ENC_ASYNC_DEPTH) {
if (pD3D12Enc->m_fenceValue >= pD3D12Enc->m_MaxQueueAsyncDepth) {
debug_printf("[d3d12_video_encoder] d3d12_video_encoder_begin_frame Waiting for completion of in flight resource sets with previous work for pool index:"
"%" PRIu64 "\n",
(uint64_t)d3d12_video_encoder_pool_current_index(pD3D12Enc));
@ -2939,7 +2944,7 @@ d3d12_video_encoder_get_slice_bitstream_data(struct pipe_video_codec *codec,
//
// Only resolve them once and cache them for future calls
//
size_t current_metadata_slot = (requested_metadata_fence % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT);
size_t current_metadata_slot = (requested_metadata_fence % pD3D12Enc->m_MaxMetadataBuffersCount);
if (pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].ppResolvedSubregionSizes[slice_idx] == 0)
{
@ -2964,14 +2969,14 @@ d3d12_video_encoder_get_slice_bitstream_data(struct pipe_video_codec *codec,
return;
}
if((pD3D12Enc->m_fenceValue - requested_metadata_fence) > D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT)
if((pD3D12Enc->m_fenceValue - requested_metadata_fence) > pD3D12Enc->m_MaxMetadataBuffersCount)
{
debug_printf("[d3d12_video_encoder_get_slice_bitstream_data] Requested metadata for fence %" PRIu64 " at current fence %" PRIu64
" is too far back in time for the ring buffer of size %" PRIu64 " we keep track off - "
" Please increase the D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT environment variable and try again.\n",
" Please increase the m_MaxMetadataBuffersCount of the encoder and try again.\n",
requested_metadata_fence,
pD3D12Enc->m_fenceValue,
static_cast<uint64_t>(D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT));
static_cast<uint64_t>(pD3D12Enc->m_MaxMetadataBuffersCount));
if (codec_unit_metadata_count)
*codec_unit_metadata_count = 0u;
assert(false);
@ -3173,7 +3178,7 @@ d3d12_video_encoder_encode_bitstream_impl(struct pipe_video_codec *codec,
/* Warning if the previous finished async execution stored was read not by get_feedback()
before overwriting. This should be handled correctly by the app by calling vaSyncBuffer/vaSyncSurface
without having the async depth going beyond D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT frames without syncing */
without having the async depth going beyond pD3D12Enc->m_MaxMetadataBuffersCount frames without syncing */
if(!pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].bRead) {
debug_printf("WARNING: [d3d12_video_encoder] d3d12_video_encoder_encode_bitstream - overwriting metadata slot %" PRIu64 " before calling get_feedback", static_cast<uint64_t>(current_metadata_slot));
assert(false);
@ -3885,7 +3890,7 @@ d3d12_video_encoder_encode_bitstream_impl(struct pipe_video_codec *codec,
debug_printf("User requested sliced encoding, but there is no HW support for it (PIPE_VIDEO_CAP_ENC_SLICED_NOTIFICATIONS)\n");
assert(pD3D12Enc->supports_sliced_fences.bits.supported);
pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED;
pD3D12Enc->m_spEncodedFrameMetadata[pD3D12Enc->m_fenceValue % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED;
pD3D12Enc->m_spEncodedFrameMetadata[pD3D12Enc->m_fenceValue % pD3D12Enc->m_MaxMetadataBuffersCount].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED;
assert(false);
return;
}
@ -4304,7 +4309,7 @@ d3d12_video_encoder_get_feedback(struct pipe_video_codec *codec,
return;
}
size_t current_metadata_slot = static_cast<size_t>(requested_metadata_fence % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT);
size_t current_metadata_slot = static_cast<size_t>(requested_metadata_fence % pD3D12Enc->m_MaxMetadataBuffersCount);
opt_metadata.encode_result = pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].encode_result;
if (opt_metadata.encode_result & PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED) {
debug_printf("Error: d3d12_video_encoder_get_feedback for Encode GPU command for fence %" PRIu64 " failed on submission with encode_result: %x\n",
@ -4341,18 +4346,18 @@ d3d12_video_encoder_get_feedback(struct pipe_video_codec *codec,
debug_printf("d3d12_video_encoder_get_feedback with feedback: %" PRIu64 ", resources slot %" PRIu64 " metadata resolved ID3D12Resource buffer %p metadata required size %" PRIu64 "\n",
requested_metadata_fence,
(requested_metadata_fence % D3D12_VIDEO_ENC_ASYNC_DEPTH),
(requested_metadata_fence % pD3D12Enc->m_MaxQueueAsyncDepth),
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].spBuffer.Get(),
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].bufferSize);
if((pD3D12Enc->m_fenceValue - requested_metadata_fence) > D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT)
if((pD3D12Enc->m_fenceValue - requested_metadata_fence) > pD3D12Enc->m_MaxMetadataBuffersCount)
{
debug_printf("[d3d12_video_encoder_get_feedback] Requested metadata for fence %" PRIu64 " at current fence %" PRIu64
" is too far back in time for the ring buffer of size %" PRIu64 " we keep track off - "
" Please increase the D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT environment variable and try again.\n",
" Please increase the m_MaxMetadataBuffersCount of the encoder and try again.\n",
requested_metadata_fence,
pD3D12Enc->m_fenceValue,
static_cast<uint64_t>(D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT));
static_cast<uint64_t>(pD3D12Enc->m_MaxMetadataBuffersCount));
opt_metadata.encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED;
assert(false);
if(pMetadata)
@ -4938,13 +4943,15 @@ d3d12_video_encoder_fence_wait(struct pipe_video_codec *codec,
struct pipe_fence_handle *_fence,
uint64_t timeout)
{
struct d3d12_video_encoder *pD3D12Enc = (struct d3d12_video_encoder *) codec;
assert(pD3D12Enc);
struct d3d12_fence *fence = (struct d3d12_fence *) _fence;
assert(fence);
bool wait_res = d3d12_fence_finish(fence, timeout);
if (wait_res) {
// Opportunistically reset batches
for (uint32_t i = 0; i < D3D12_VIDEO_ENC_ASYNC_DEPTH; ++i)
for (uint32_t i = 0; i < pD3D12Enc->m_MaxQueueAsyncDepth; ++i)
d3d12_video_encoder_sync_completion(codec, i, 0);
}

View file

@ -536,7 +536,7 @@ struct EncodedBitstreamResolvedMetadata
*/
std::vector<uint8_t> m_StagingBitstreamConstruction;
/* Stores encode result for get_feedback readback in the D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT slots */
/* Stores encode result for get_feedback readback in the m_MaxMetadataBuffersCount slots */
enum pipe_video_feedback_encode_result_flags encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_OK;
/* Expected max frame, slice sizes */
@ -575,6 +575,8 @@ struct d3d12_video_encoder
const uint m_NodeMask = 0u;
const uint m_NodeIndex = 0u;
UINT m_MaxOutputBitstreamSize = 0;
size_t m_MaxQueueAsyncDepth = 0;
size_t m_MaxMetadataBuffersCount = 0;
ComPtr<ID3D12Fence> m_spFence;
uint64_t m_fenceValue = 1u;
@ -616,7 +618,7 @@ struct d3d12_video_encoder
uint64_t m_InputSurfaceFenceValue = 0;
d3d12_unique_fence m_CompletionFence;
/* Stores encode result for submission error control in the D3D12_VIDEO_ENC_ASYNC_DEPTH slots */
/* Stores encode result for submission error control in the m_MaxQueueAsyncDepth slots */
enum pipe_video_feedback_encode_result_flags encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_OK;
ComPtr<ID3D12Resource> m_spDirtyRectsResolvedOpaqueMap; // output of ID3D12VideoEncodeCommandList::ResolveInputParamLayout

View file

@ -1619,7 +1619,7 @@ d3d12_video_encoder_update_current_frame_pic_params_info_av1(struct d3d12_video_
pD3D12Enc->m_upDPBManager->get_current_frame_picture_control_data(picParams);
// Save state snapshot from record time to resolve headers at get_feedback time
size_t current_metadata_slot = static_cast<size_t>(pD3D12Enc->m_fenceValue % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT);
size_t current_metadata_slot = static_cast<size_t>(pD3D12Enc->m_fenceValue % pD3D12Enc->m_MaxMetadataBuffersCount);
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_associatedEncodeCapabilities =
pD3D12Enc->m_currentEncodeCapabilities;
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_associatedEncodeConfig =

View file

@ -404,7 +404,7 @@ d3d12_video_encoder_update_current_frame_pic_params_info_h264(struct d3d12_video
pD3D12Enc->m_upDPBManager->get_current_frame_picture_control_data(picParams);
// Save state snapshot from record time to resolve headers at get_feedback time
size_t current_metadata_slot = static_cast<size_t>(pD3D12Enc->m_fenceValue % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT);
size_t current_metadata_slot = static_cast<size_t>(pD3D12Enc->m_fenceValue % pD3D12Enc->m_MaxMetadataBuffersCount);
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_associatedEncodeCapabilities =
pD3D12Enc->m_currentEncodeCapabilities;
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_associatedEncodeConfig =

View file

@ -633,7 +633,7 @@ d3d12_video_encoder_update_current_frame_pic_params_info_hevc(struct d3d12_video
#endif // D3D12_VIDEO_USE_NEW_ENCODECMDLIST4_INTERFACE
// Save state snapshot from record time to resolve headers at get_feedback time
size_t current_metadata_slot = static_cast<size_t>(pD3D12Enc->m_fenceValue % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT);
size_t current_metadata_slot = static_cast<size_t>(pD3D12Enc->m_fenceValue % pD3D12Enc->m_MaxMetadataBuffersCount);
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_associatedEncodeCapabilities =
pD3D12Enc->m_currentEncodeCapabilities;
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_associatedEncodeConfig =

View file

@ -66,13 +66,6 @@ GetDesc(ID3D12VideoDecoderHeap *heap)
*/
const bool D3D12_VIDEO_ENC_CBR_FORCE_VBV_EQUAL_BITRATE = debug_get_bool_option("D3D12_VIDEO_ENC_CBR_FORCE_VBV_EQUAL_BITRATE", false);
/**
* This indicates how many in-flight encode commands can happen before blocking on the next request
*/
const size_t D3D12_VIDEO_ENC_ASYNC_DEPTH = static_cast<size_t>(debug_get_num_option("D3D12_VIDEO_ENC_ASYNC_DEPTH", 8));
const size_t D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT = static_cast<size_t>(debug_get_num_option("D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT", 2 * D3D12_VIDEO_ENC_ASYNC_DEPTH));
constexpr unsigned int D3D12_VIDEO_H264_MB_IN_PIXELS = 16;
/* If enabled, the D3D12 AV1 encoder will use always ...CONFIGURABLE_GRID_PARTITION mode */