From 93d434362b22b5e6b93dbb3b2d70809fd080796b Mon Sep 17 00:00:00 2001 From: David Rosca Date: Sat, 2 Nov 2024 10:28:06 +0100 Subject: [PATCH] frontends/va: Move encode fence to coded buffer Instead of using the surface fence, store the fence in buffer. This way the fence won't be overwritten when encoding multiple frames using the same source surface and SyncBuffer will sync the correct job. Also fixes possible crash when destroying coded buffer before calling SyncSurface and possible leak when destroying or reusing coded buffer with pending encode job without calling SyncSurface/SyncBuffer. Reviewed-by: Ruijing Dong Part-of: --- src/gallium/frontends/va/buffer.c | 51 ++++++++++++++++----------- src/gallium/frontends/va/context.c | 13 +++++++ src/gallium/frontends/va/picture.c | 36 ++++++++++++++++--- src/gallium/frontends/va/surface.c | 51 +++++++++++++-------------- src/gallium/frontends/va/va_private.h | 10 +++--- 5 files changed, 105 insertions(+), 56 deletions(-) diff --git a/src/gallium/frontends/va/buffer.c b/src/gallium/frontends/va/buffer.c index 2e15295c8a0..a962f095821 100644 --- a/src/gallium/frontends/va/buffer.c +++ b/src/gallium/frontends/va/buffer.c @@ -31,6 +31,7 @@ #include "util/u_memory.h" #include "util/u_handle_table.h" #include "util/u_transfer.h" +#include "util/set.h" #include "vl/vl_winsys.h" #include "va_private.h" @@ -197,6 +198,8 @@ VAStatus vlVaMapBuffer2(VADriverContextP ctx, VABufferID buf_id, if (buf->type == VAEncCodedBufferType) { VACodedBufferSegment* curr_buf_ptr = (VACodedBufferSegment*) buf->data; + vlVaGetBufferFeedback(buf); + if ((buf->extended_metadata.present_metadata & PIPE_VIDEO_FEEDBACK_METADATA_TYPE_ENCODE_RESULT) && (buf->extended_metadata.encode_result & PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED)) { curr_buf_ptr->status = VA_CODED_BUF_STATUS_BAD_BITSTREAM; @@ -335,6 +338,17 @@ vlVaDestroyBuffer(VADriverContextP ctx, VABufferID buf_id) FREE(buf->data); } + if (buf->ctx) { + assert(_mesa_set_search(buf->ctx->buffers, buf)); + _mesa_set_remove_key(buf->ctx->buffers, buf); + vlVaGetBufferFeedback(buf); + if (buf->fence && buf->ctx->decoder && buf->ctx->decoder->destroy_fence) + buf->ctx->decoder->destroy_fence(buf->ctx->decoder, buf->fence); + } + + if (buf->coded_surf) + buf->coded_surf->coded_buf = NULL; + FREE(buf); handle_table_remove(VL_VA_DRIVER(ctx)->htab, buf_id); mtx_unlock(&drv->mutex); @@ -551,38 +565,35 @@ vlVaSyncBuffer(VADriverContextP ctx, VABufferID buf_id, uint64_t timeout_ns) return VA_STATUS_ERROR_INVALID_BUFFER; } - if (!buf->feedback) { + if (!buf->fence) { /* No outstanding operation: nothing to do. */ mtx_unlock(&drv->mutex); return VA_STATUS_SUCCESS; } - context = handle_table_get(drv->htab, buf->ctx); + context = buf->ctx; if (!context) { mtx_unlock(&drv->mutex); return VA_STATUS_ERROR_INVALID_CONTEXT; } - vlVaSurface* surf = handle_table_get(drv->htab, buf->associated_encode_input_surf); - - if ((buf->feedback) && (context->decoder->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE)) { - if (surf && context->decoder->fence_wait && - !context->decoder->fence_wait(context->decoder, surf->fence, timeout_ns)) { - mtx_unlock(&drv->mutex); - return VA_STATUS_ERROR_TIMEDOUT; - } - context->decoder->get_feedback(context->decoder, buf->feedback, &(buf->coded_size), &(buf->extended_metadata)); - buf->feedback = NULL; - /* Also mark the associated render target (encode source texture) surface as done - in case they call vaSyncSurface on it to avoid getting the feedback twice*/ - if(surf) - { - surf->feedback = NULL; - buf->associated_encode_input_surf = VA_INVALID_ID; - } + if (!context->decoder) { + mtx_unlock(&drv->mutex); + return VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; } + int ret = context->decoder->fence_wait(context->decoder, buf->fence, timeout_ns); mtx_unlock(&drv->mutex); - return VA_STATUS_SUCCESS; + return ret ? VA_STATUS_SUCCESS : VA_STATUS_ERROR_TIMEDOUT; } #endif + +void vlVaGetBufferFeedback(vlVaBuffer *buf) +{ + if (!buf->ctx || !buf->ctx->decoder || !buf->feedback) + return; + + buf->ctx->decoder->get_feedback(buf->ctx->decoder, buf->feedback, + &buf->coded_size, &buf->extended_metadata); + buf->feedback = NULL; +} diff --git a/src/gallium/frontends/va/context.c b/src/gallium/frontends/va/context.c index 6e52986711b..8d8384bcd37 100644 --- a/src/gallium/frontends/va/context.c +++ b/src/gallium/frontends/va/context.c @@ -451,6 +451,7 @@ vlVaCreateContext(VADriverContextP ctx, VAConfigID config_id, int picture_width, } context->surfaces = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + context->buffers = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); mtx_lock(&drv->mutex); *context_id = handle_table_add(drv->htab, context); @@ -490,6 +491,18 @@ vlVaDestroyContext(VADriverContextP ctx, VAContextID context_id) } _mesa_set_destroy(context->surfaces, NULL); + set_foreach(context->buffers, entry) { + vlVaBuffer *buf = (vlVaBuffer *)entry->key; + assert(buf->ctx == context); + vlVaGetBufferFeedback(buf); + buf->ctx = NULL; + if (buf->fence && context->decoder && context->decoder->destroy_fence) { + context->decoder->destroy_fence(context->decoder, buf->fence); + buf->fence = NULL; + } + } + _mesa_set_destroy(context->buffers, NULL); + if (context->decoder) { if (context->desc.base.entry_point == PIPE_VIDEO_ENTRYPOINT_ENCODE) { if (u_reduce_video_profile(context->decoder->profile) == diff --git a/src/gallium/frontends/va/picture.c b/src/gallium/frontends/va/picture.c index 40be483d632..ec964356578 100644 --- a/src/gallium/frontends/va/picture.c +++ b/src/gallium/frontends/va/picture.c @@ -70,6 +70,21 @@ vlVaSetSurfaceContext(vlVaDriver *drv, vlVaSurface *surf, vlVaContext *context) _mesa_set_add(surf->ctx->surfaces, surf); } +static void +vlVaSetBufferContext(vlVaDriver *drv, vlVaBuffer *buf, vlVaContext *context) +{ + if (buf->ctx == context) + return; + + if (buf->ctx) { + assert(_mesa_set_search(buf->ctx->buffers, buf)); + _mesa_set_remove_key(buf->ctx->buffers, buf); + } + + buf->ctx = context; + _mesa_set_add(buf->ctx->buffers, buf); +} + VAStatus vlVaBeginPicture(VADriverContextP ctx, VAContextID context_id, VASurfaceID render_target) { @@ -103,8 +118,16 @@ vlVaBeginPicture(VADriverContextP ctx, VAContextID context_id, VASurfaceID rende return VA_STATUS_ERROR_INVALID_SURFACE; } + if (surf->coded_buf) { + surf->coded_buf->coded_surf = NULL; + surf->coded_buf = NULL; + } + + /* Encode only reads from the surface and doesn't set surface fence. */ + if (context->templat.entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE) + vlVaSetSurfaceContext(drv, surf, context); + context->target_id = render_target; - vlVaSetSurfaceContext(drv, surf, context); context->target = surf->buffer; context->mjpeg.sampling_factor = 0; @@ -1270,9 +1293,9 @@ vlVaEndPicture(VADriverContextP ctx, VAContextID context_id) } if (context->decoder->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) { - context->desc.base.fence = &surf->fence; struct pipe_screen *screen = context->decoder->context->screen; coded_buf = context->coded_buf; + context->desc.base.fence = &coded_buf->fence; if (u_reduce_video_profile(context->templat.profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC) context->desc.h264enc.frame_num_cnt++; @@ -1299,6 +1322,11 @@ vlVaEndPicture(VADriverContextP ctx, VAContextID context_id) return VA_STATUS_ERROR_INVALID_SURFACE; } + if (coded_buf->coded_surf) + coded_buf->coded_surf->coded_buf = NULL; + vlVaGetBufferFeedback(coded_buf); + vlVaSetBufferContext(drv, coded_buf, context); + int driver_metadata_support = drv->pipe->screen->get_video_param(drv->pipe->screen, context->decoder->profile, context->decoder->entrypoint, @@ -1314,10 +1342,8 @@ vlVaEndPicture(VADriverContextP ctx, VAContextID context_id) context->decoder->encode_bitstream(context->decoder, context->target, coded_buf->derived_surface.resource, &feedback); coded_buf->feedback = feedback; - coded_buf->ctx = context_id; - surf->feedback = feedback; + coded_buf->coded_surf = surf; surf->coded_buf = coded_buf; - coded_buf->associated_encode_input_surf = context->target_id; } else if (context->decoder->entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) { context->desc.base.fence = &surf->fence; } else if (context->decoder->entrypoint == PIPE_VIDEO_ENTRYPOINT_PROCESSING) { diff --git a/src/gallium/frontends/va/surface.c b/src/gallium/frontends/va/surface.c index 423c60a9a02..fba841035ac 100644 --- a/src/gallium/frontends/va/surface.c +++ b/src/gallium/frontends/va/surface.c @@ -138,6 +138,8 @@ vlVaDestroySurfaces(VADriverContextP ctx, VASurfaceID *surface_list, int num_sur drv->efc_count = -1; } } + if (surf->coded_buf) + surf->coded_buf->coded_surf = NULL; util_dynarray_fini(&surf->subpics); FREE(surf); handle_table_remove(drv->htab, surface_list[i]); @@ -153,6 +155,7 @@ _vlVaSyncSurface(VADriverContextP ctx, VASurfaceID render_target, uint64_t timeo vlVaDriver *drv; vlVaContext *context; vlVaSurface *surf; + struct pipe_fence_handle *fence; if (!ctx) return VA_STATUS_ERROR_INVALID_CONTEXT; @@ -168,19 +171,26 @@ _vlVaSyncSurface(VADriverContextP ctx, VASurfaceID render_target, uint64_t timeo return VA_STATUS_ERROR_INVALID_SURFACE; } + if (surf->coded_buf) { + context = surf->coded_buf->ctx; + fence = surf->coded_buf->fence; + } else { + context = surf->ctx; + fence = surf->fence; + } + /* This is checked before getting the context below as * surf->ctx is only set in begin_frame * and not when the surface is created * Some apps try to sync/map the surface right after creation and * would get VA_STATUS_ERROR_INVALID_CONTEXT */ - if (!surf->buffer || (!surf->feedback && !surf->fence)) { + if (!surf->buffer || !fence) { // No outstanding encode/decode operation: nothing to do. mtx_unlock(&drv->mutex); return VA_STATUS_SUCCESS; } - context = surf->ctx; if (!context) { mtx_unlock(&drv->mutex); return VA_STATUS_ERROR_INVALID_CONTEXT; @@ -191,19 +201,7 @@ _vlVaSyncSurface(VADriverContextP ctx, VASurfaceID render_target, uint64_t timeo return VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; } - /* If driver does not implement fence_wait assume no - * async work needed to be waited on and return success - */ - int ret = (context->decoder->fence_wait) ? 0 : 1; - if (context->decoder->fence_wait) - ret = context->decoder->fence_wait(context->decoder, surf->fence, timeout_ns); - - if (context->decoder->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE && surf->feedback && ret) { - context->decoder->get_feedback(context->decoder, surf->feedback, &(surf->coded_buf->coded_size), &(surf->coded_buf->extended_metadata)); - surf->feedback = NULL; - surf->coded_buf->feedback = NULL; - surf->coded_buf->associated_encode_input_surf = VA_INVALID_ID; - } + int ret = context->decoder->fence_wait(context->decoder, fence, timeout_ns); mtx_unlock(&drv->mutex); return ret ? VA_STATUS_SUCCESS : VA_STATUS_ERROR_TIMEDOUT; } @@ -228,6 +226,7 @@ vlVaQuerySurfaceStatus(VADriverContextP ctx, VASurfaceID render_target, VASurfac vlVaDriver *drv; vlVaSurface *surf; vlVaContext *context; + struct pipe_fence_handle *fence; if (!ctx) return VA_STATUS_ERROR_INVALID_CONTEXT; @@ -244,20 +243,27 @@ vlVaQuerySurfaceStatus(VADriverContextP ctx, VASurfaceID render_target, VASurfac return VA_STATUS_ERROR_INVALID_SURFACE; } + if (surf->coded_buf) { + context = surf->coded_buf->ctx; + fence = surf->coded_buf->fence; + } else { + context = surf->ctx; + fence = surf->fence; + } + /* This is checked before getting the context below as * surf->ctx is only set in begin_frame * and not when the surface is created * Some apps try to sync/map the surface right after creation and * would get VA_STATUS_ERROR_INVALID_CONTEXT */ - if (!surf->buffer || (!surf->feedback && !surf->fence)) { + if (!surf->buffer || !fence) { // No outstanding encode/decode operation: nothing to do. *status = VASurfaceReady; mtx_unlock(&drv->mutex); return VA_STATUS_SUCCESS; } - context = surf->ctx; if (!context) { mtx_unlock(&drv->mutex); return VA_STATUS_ERROR_INVALID_CONTEXT; @@ -268,16 +274,7 @@ vlVaQuerySurfaceStatus(VADriverContextP ctx, VASurfaceID render_target, VASurfac return VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; } - /* If driver does not implement fence_wait assume no - * async work needed to be waited on and return surface ready - */ - int ret = (context->decoder->fence_wait) ? 0 : 1; - - if (context->decoder->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE && !surf->feedback) - ret = 1; - else if (context->decoder->fence_wait) - ret = context->decoder->fence_wait(context->decoder, surf->fence, 0); - + int ret = context->decoder->fence_wait(context->decoder, fence, 0); mtx_unlock(&drv->mutex); if (ret) diff --git a/src/gallium/frontends/va/va_private.h b/src/gallium/frontends/va/va_private.h index 5d90fff2c7e..2f2998af9d2 100644 --- a/src/gallium/frontends/va/va_private.h +++ b/src/gallium/frontends/va/va_private.h @@ -364,11 +364,12 @@ typedef struct { struct pipe_enc_feedback_metadata extended_metadata; struct pipe_video_buffer *derived_image_buffer; void *feedback; - VASurfaceID associated_encode_input_surf; - VAContextID ctx; + struct vlVaContext *ctx; + struct vlVaSurface *coded_surf; + struct pipe_fence_handle *fence; } vlVaBuffer; -typedef struct { +typedef struct vlVaContext { struct pipe_video_codec templat, *decoder; struct pipe_video_buffer *target; union { @@ -416,6 +417,7 @@ typedef struct { int packed_header_type; bool packed_header_emulation_bytes; struct set *surfaces; + struct set *buffers; unsigned slice_data_offset; bool have_slice_params; @@ -439,7 +441,6 @@ typedef struct vlVaSurface { struct util_dynarray subpics; /* vlVaSubpicture */ vlVaContext *ctx; vlVaBuffer *coded_buf; - void *feedback; bool full_range; struct pipe_fence_handle *fence; struct vlVaSurface *efc_surface; /* input surface for EFC */ @@ -564,6 +565,7 @@ VAStatus vlVaHandleSurfaceAllocate(vlVaDriver *drv, vlVaSurface *surface, struct struct pipe_video_buffer *vlVaGetSurfaceBuffer(vlVaDriver *drv, vlVaSurface *surface); void vlVaAddRawHeader(struct util_dynarray *headers, uint8_t type, uint32_t size, uint8_t *buf, bool is_slice, uint32_t emulation_bytes_start); +void vlVaGetBufferFeedback(vlVaBuffer *buf); void vlVaSetSurfaceContext(vlVaDriver *drv, vlVaSurface *surf, vlVaContext *context); void vlVaGetReferenceFrame(vlVaDriver *drv, VASurfaceID surface_id, struct pipe_video_buffer **ref_frame); void vlVaHandlePictureParameterBufferMPEG12(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf);