radeon/vcn: engage all available jpeg engines

use multiple contexts and submit in a round robin scheme to make
use of all the available jpeg engines simultaneously. During mjpeg
decode context need not be same across frames as they are discrete
jpeg images.

V2: number of ctx to be equal to number of engines and fix indent (Leo)
V3: decide ctx count in create_decoder, don't add a video param (Boyuan)

Signed-off-by: Sathishkumar S <sathishkumar.sundararaju@amd.com>
Reviewed-by: Leo Liu <leo.liu@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16355>
This commit is contained in:
Sathishkumar S 2022-04-27 18:42:57 +05:30 committed by Marge Bot
parent 3ec64a5bab
commit 324898f5c6
3 changed files with 77 additions and 6 deletions

View file

@ -2285,6 +2285,13 @@ static void radeon_dec_destroy(struct pipe_video_codec *decoder)
dec->ws->cs_destroy(&dec->cs);
if (dec->stream_type == RDECODE_CODEC_JPEG) {
for (i = 0; i < dec->njctx; i++) {
dec->ws->cs_destroy(&dec->jcs[i]);
dec->ws->ctx_destroy(dec->jctx[i]);
}
}
for (i = 0; i < NUM_BUFFERS; ++i) {
si_vid_destroy_buffer(&dec->msg_fb_it_probs_buffers[i]);
si_vid_destroy_buffer(&dec->bs_buffers[i]);
@ -2302,6 +2309,8 @@ static void radeon_dec_destroy(struct pipe_video_codec *decoder)
si_vid_destroy_buffer(&dec->ctx);
si_vid_destroy_buffer(&dec->sessionctx);
FREE(dec->jcs);
FREE(dec->jctx);
FREE(dec);
}
@ -2442,6 +2451,25 @@ static void radeon_dec_end_frame(struct pipe_video_codec *decoder, struct pipe_v
next_buffer(dec);
}
/**
* end decoding of the current jpeg frame
*/
static void radeon_dec_jpeg_end_frame(struct pipe_video_codec *decoder, struct pipe_video_buffer *target,
struct pipe_picture_desc *picture)
{
struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
assert(decoder);
if (!dec->bs_ptr)
return;
dec->send_cmd(dec, target, picture);
dec->ws->cs_flush(&dec->jcs[dec->cb_idx], PIPE_FLUSH_ASYNC, NULL);
next_buffer(dec);
dec->cb_idx = (dec->cb_idx+1) % dec->njctx;
}
/**
* flush any outstanding command buffers to the hardware
*/
@ -2526,6 +2554,32 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
goto error;
}
if (dec->stream_type == RDECODE_CODEC_JPEG) {
if (sctx->family == CHIP_ARCTURUS || sctx->family == CHIP_ALDEBARAN)
dec->njctx = 2;
else
dec->njctx = 1;
dec->jctx = (struct radeon_winsys_ctx **) CALLOC(dec->njctx,
sizeof(struct radeon_winsys_ctx *));
dec->jcs = (struct radeon_cmdbuf *) CALLOC(dec->njctx, sizeof(struct radeon_cmdbuf));
if(!dec->jctx || !dec->jcs)
goto err;
for (i = 0; i < dec->njctx; i++) {
/* Initialize the context handle and the command stream. */
dec->jctx[i] = dec->ws->ctx_create(dec->ws);
if (!sctx->ctx)
goto error;
if (!dec->ws->cs_create(&dec->jcs[i], dec->jctx[i], ring, NULL, NULL, false)) {
RVID_ERR("Can't get additional command submission context for mJPEG.\n");
goto error;
}
}
dec->base.end_frame = radeon_dec_jpeg_end_frame;
dec->cb_idx = 0;
}
for (i = 0; i < ARRAY_SIZE(dec->render_pic_list); i++)
dec->render_pic_list[i] = NULL;
@ -2668,6 +2722,13 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
error:
dec->ws->cs_destroy(&dec->cs);
if (dec->stream_type == RDECODE_CODEC_JPEG) {
for (i = 0; i < dec->njctx; i++) {
dec->ws->cs_destroy(&dec->jcs[i]);
dec->ws->ctx_destroy(dec->jctx[i]);
}
}
for (i = 0; i < NUM_BUFFERS; ++i) {
si_vid_destroy_buffer(&dec->msg_fb_it_probs_buffers[i]);
si_vid_destroy_buffer(&dec->bs_buffers[i]);
@ -2678,6 +2739,11 @@ error:
si_vid_destroy_buffer(&dec->ctx);
si_vid_destroy_buffer(&dec->sessionctx);
err:
if (dec->jcs)
FREE(dec->jcs);
if (dec->jctx)
FREE(dec->jctx);
FREE(dec);
return NULL;

View file

@ -104,6 +104,11 @@ struct radeon_decoder {
void (*send_cmd)(struct radeon_decoder *dec, struct pipe_video_buffer *target,
struct pipe_picture_desc *picture);
/* Additional contexts for mJPEG */
struct radeon_cmdbuf *jcs;
struct radeon_winsys_ctx **jctx;
unsigned cb_idx;
unsigned njctx;
};
void send_cmd_dec(struct radeon_decoder *dec, struct pipe_video_buffer *target,

View file

@ -57,8 +57,8 @@ static struct pb_buffer *radeon_jpeg_get_decode_param(struct radeon_decoder *dec
static void set_reg_jpeg(struct radeon_decoder *dec, unsigned reg, unsigned cond, unsigned type,
uint32_t val)
{
radeon_emit(&dec->cs, RDECODE_PKTJ(reg, cond, type));
radeon_emit(&dec->cs, val);
radeon_emit(&dec->jcs[dec->cb_idx], RDECODE_PKTJ(reg, cond, type));
radeon_emit(&dec->jcs[dec->cb_idx], val);
}
/* send a bitstream buffer command */
@ -85,7 +85,7 @@ static void send_cmd_bitstream(struct radeon_decoder *dec, struct pb_buffer *buf
set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_CTX_DATA), COND0, TYPE0, (0 << 9));
set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_SOFT_RESET), COND0, TYPE3, (1 << 9));
dec->ws->cs_add_buffer(&dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain);
dec->ws->cs_add_buffer(&dec->jcs[dec->cb_idx], buf, usage | RADEON_USAGE_SYNCHRONIZED, domain);
addr = dec->ws->buffer_get_virtual_address(buf);
addr = addr + off;
@ -117,7 +117,7 @@ static void send_cmd_target(struct radeon_decoder *dec, struct pb_buffer *buf, u
set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_TILING_CTRL), COND0, TYPE0, 0);
set_reg_jpeg(dec, SOC15_REG_ADDR(mmUVD_JPEG_UV_TILING_CTRL), COND0, TYPE0, 0);
dec->ws->cs_add_buffer(&dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain);
dec->ws->cs_add_buffer(&dec->jcs[dec->cb_idx], buf, usage | RADEON_USAGE_SYNCHRONIZED, domain);
addr = dec->ws->buffer_get_virtual_address(buf);
addr = addr + off;
@ -205,7 +205,7 @@ static void send_cmd_bitstream_direct(struct radeon_decoder *dec, struct pb_buff
set_reg_jpeg(dec, vcnipUVD_JRBC_IB_REF_DATA, COND0, TYPE0, (0 << 0x10));
set_reg_jpeg(dec, vcnipUVD_JPEG_DEC_SOFT_RST, COND3, TYPE3, (0x1 << 0x10));
dec->ws->cs_add_buffer(&dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain);
dec->ws->cs_add_buffer(&dec->jcs[dec->cb_idx], buf, usage | RADEON_USAGE_SYNCHRONIZED, domain);
addr = dec->ws->buffer_get_virtual_address(buf);
addr = addr + off;
@ -236,7 +236,7 @@ static void send_cmd_target_direct(struct radeon_decoder *dec, struct pb_buffer
set_reg_jpeg(dec, vcnipJPEG_DEC_Y_GFX10_TILING_SURFACE, COND0, TYPE0, 0);
set_reg_jpeg(dec, vcnipJPEG_DEC_UV_GFX10_TILING_SURFACE, COND0, TYPE0, 0);
dec->ws->cs_add_buffer(&dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain);
dec->ws->cs_add_buffer(&dec->jcs[dec->cb_idx], buf, usage | RADEON_USAGE_SYNCHRONIZED, domain);
addr = dec->ws->buffer_get_virtual_address(buf);
addr = addr + off;