From 712e49f13729c5b7ff7f25b1fad0eea1c00e47be Mon Sep 17 00:00:00 2001 From: David Rosca Date: Mon, 16 Sep 2024 11:22:11 +0200 Subject: [PATCH] radeonsi/vcn: Don't reuse context with multiple VCN instances Kernel does VCN instance scheduling per context, so when we have multiple instances we should use new context to be able to utilize all of them. Another issue is with AV1, VCN 3 and VCN 4 only support AV1 on first instance. Kernel parses IBs and switches to first instance when it detects AV1, but this only works for first submitted IB in context. The CS would be rejected if we first decode/encode other codecs, kernel schedules on second instance (default) and then we try to decode/encode AV1. Cc: mesa-stable Reviewed-by: Leo Liu Part-of: --- src/gallium/drivers/radeonsi/radeon_vcn_dec.c | 16 ++++++++-- src/gallium/drivers/radeonsi/radeon_vcn_dec.h | 2 ++ src/gallium/drivers/radeonsi/si_uvd.c | 30 +++++++++++++++---- 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_dec.c b/src/gallium/drivers/radeonsi/radeon_vcn_dec.c index 92b256d3b51..d8a04436afa 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_dec.c +++ b/src/gallium/drivers/radeonsi/radeon_vcn_dec.c @@ -2414,6 +2414,8 @@ static void radeon_dec_destroy(struct pipe_video_codec *decoder) dec->ws->fence_reference(dec->ws, &dec->prev_fence, NULL); dec->ws->cs_destroy(&dec->cs); + if (dec->ectx) + dec->ectx->destroy(dec->ectx); if (dec->stream_type == RDECODE_CODEC_JPEG) { for (i = 0; i < dec->njctx; i++) { @@ -2762,8 +2764,14 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context, if (!dec) return NULL; + if (sctx->vcn_has_ctx) { + dec->ectx = pipe_create_multimedia_context(context->screen); + if (!dec->ectx) + sctx->vcn_has_ctx = false; + } + dec->base = *templ; - dec->base.context = context; + dec->base.context = (sctx->vcn_has_ctx) ? dec->ectx : context; dec->base.width = width; dec->base.height = height; dec->max_width = width; @@ -2792,7 +2800,9 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context, dec->sq.ib_total_size_in_dw = NULL; dec->sq.ib_checksum = NULL; - if (!ws->cs_create(&dec->cs, sctx->ctx, ring, NULL, NULL)) { + if (!ws->cs_create(&dec->cs, + (sctx->vcn_has_ctx) ? ((struct si_context *)dec->ectx)->ctx : sctx->ctx, + ring, NULL, NULL)) { RVID_ERR("Can't get command submission context.\n"); goto error; } @@ -3056,6 +3066,8 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context, error: dec->ws->cs_destroy(&dec->cs); + if (dec->ectx) + dec->ectx->destroy(dec->ectx); if (dec->stream_type == RDECODE_CODEC_JPEG) { for (i = 0; i < dec->njctx; i++) { diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_dec.h b/src/gallium/drivers/radeonsi/radeon_vcn_dec.h index f14df3d8189..769039ed952 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_dec.h +++ b/src/gallium/drivers/radeonsi/radeon_vcn_dec.h @@ -140,6 +140,8 @@ struct radeon_decoder { struct pipe_fence_handle *destroy_fence; bool dpb_use_surf; uint64_t dpb_modifier; + + struct pipe_context *ectx; }; void send_cmd_dec(struct radeon_decoder *dec, struct pipe_video_buffer *target, diff --git a/src/gallium/drivers/radeonsi/si_uvd.c b/src/gallium/drivers/radeonsi/si_uvd.c index 3f01b6f5591..52491151d4b 100644 --- a/src/gallium/drivers/radeonsi/si_uvd.c +++ b/src/gallium/drivers/radeonsi/si_uvd.c @@ -97,6 +97,20 @@ static void si_vce_get_buffer(struct pipe_resource *resource, struct pb_buffer_l *surface = &res->surface; } +static bool si_vcn_need_context(struct si_context *ctx) +{ + /* Kernel does VCN instance scheduling per context, so when we have + * multiple instances we should use new context to be able to utilize + * all of them. + * Another issue is with AV1, VCN 3 and VCN 4 only support AV1 on + * first instance. Kernel parses IBs and switches to first instance when + * it detects AV1, but this only works for first submitted IB in context. + * The CS would be rejected if we first decode/encode other codecs, kernel + * schedules on second instance (default) and then we try to decode/encode AV1. + */ + return ctx->screen->info.ip[AMD_IP_VCN_ENC].num_instances > 1; +} + /** * creates an UVD compatible decoder */ @@ -105,10 +119,13 @@ struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context *context, { struct si_context *ctx = (struct si_context *)context; bool vcn = ctx->vcn_ip_ver >= VCN_1_0_0; + struct pipe_video_codec *codec = NULL; if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) { if (vcn) { - return radeon_create_encoder(context, templ, ctx->ws, si_vce_get_buffer); + codec = radeon_create_encoder(context, templ, ctx->ws, si_vce_get_buffer); + ctx->vcn_has_ctx = si_vcn_need_context(ctx); + return codec; } else { if (u_reduce_video_profile(templ->profile) == PIPE_VIDEO_FORMAT_HEVC) return radeon_uvd_create_encoder(context, templ, ctx->ws, si_vce_get_buffer); @@ -119,9 +136,10 @@ struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context *context, templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_PROCESSING) return si_vpe_create_processor(context, templ); - if (ctx->vcn_ip_ver == VCN_4_0_0) - ctx->vcn_has_ctx = true; - - return (vcn) ? radeon_create_decoder(context, templ) - : si_common_uvd_create_decoder(context, templ, si_uvd_set_dtb); + if (vcn) { + codec = radeon_create_decoder(context, templ); + ctx->vcn_has_ctx = si_vcn_need_context(ctx); + return codec; + } + return si_common_uvd_create_decoder(context, templ, si_uvd_set_dtb); }