From 00c1f3aef2d5dce6c1ca5050c87264d09e0bc89a Mon Sep 17 00:00:00 2001 From: David Rosca Date: Fri, 7 Nov 2025 11:25:19 +0100 Subject: [PATCH] radeonsi/vcn: Reduce allocated size for pre-encode recon pics We use 4x downscale for pre-encode, so we don't need full size pre-encode reconstructed pictures. Cc: mesa-stable Reviewed-by: Benjamin Cheng Part-of: (cherry picked from commit 1f83e731451b84b16cc1fa2f04b46a71a66e43ff) [Eric: zero-initialised the two new vars, as per https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38303#note_3201571] --- .pick_status.json | 2 +- src/gallium/drivers/radeonsi/radeon_vcn_enc.c | 35 +++++++++++++------ 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 2a89b3894fd..ddec95e8445 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -5414,7 +5414,7 @@ "description": "radeonsi/vcn: Reduce allocated size for pre-encode recon pics", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_enc.c b/src/gallium/drivers/radeonsi/radeon_vcn_enc.c index 7ed0705eb0d..a9b60397640 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_enc.c +++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc.c @@ -1280,7 +1280,7 @@ static int setup_dpb(struct radeon_encoder *enc, uint32_t num_reconstructed_pict uint32_t aligned_width = align(enc->base.width, rec_alignment); uint32_t aligned_height = align(enc->base.height, rec_alignment); uint32_t pitch = align(aligned_width, enc->alignment); - uint32_t luma_size, chroma_size, offset; + uint32_t luma_size, chroma_size, pre_luma_size = 0, pre_chroma_size = 0, offset; struct radeon_enc_pic *enc_pic = &enc->enc_pic; int i; bool has_b = enc_pic->spec_misc.b_picture_enabled; /* for h264 only */ @@ -1290,9 +1290,21 @@ static int setup_dpb(struct radeon_encoder *enc, uint32_t num_reconstructed_pict luma_size = align(pitch * aligned_dpb_height , enc->alignment); chroma_size = align(luma_size / 2 , enc->alignment); + + if (enc_pic->quality_modes.pre_encode_mode) { + uint32_t scale = enc_pic->quality_modes.pre_encode_mode; + uint32_t pre_pitch = align(aligned_width / scale, enc->alignment); + uint32_t pre_aligned_height = align(aligned_height / scale, enc->alignment); + + pre_luma_size = align(pre_pitch * MAX2(256, pre_aligned_height), enc->alignment); + pre_chroma_size = align(pre_luma_size / 2, enc->alignment); + } + if (enc_pic->bit_depth_luma_minus8 || enc_pic->bit_depth_chroma_minus8) { luma_size *= 2; chroma_size *= 2; + pre_luma_size *= 2; + pre_chroma_size *= 2; } assert(num_reconstructed_pictures <= RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES); @@ -1320,11 +1332,11 @@ static int setup_dpb(struct radeon_encoder *enc, uint32_t num_reconstructed_pict if (enc_pic->quality_modes.pre_encode_mode) { enc_pic->ctx_buf.pre_encode_input_picture.rgb.red_offset = offset; - offset += luma_size; + offset += pre_luma_size; enc_pic->ctx_buf.pre_encode_input_picture.rgb.green_offset = offset; - offset += luma_size; + offset += pre_luma_size; enc_pic->ctx_buf.pre_encode_input_picture.rgb.blue_offset = offset; - offset += luma_size; + offset += pre_luma_size; } if (is_av1) { @@ -1338,7 +1350,7 @@ static int setup_dpb(struct radeon_encoder *enc, uint32_t num_reconstructed_pict if (enc_pic->quality_modes.pre_encode_mode) radeon_enc_rec_offset(&enc_pic->ctx_buf.pre_encode_reconstructed_pictures[i], - &offset, luma_size, chroma_size, is_av1); + &offset, pre_luma_size, pre_chroma_size, is_av1); } for (; i < RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES; i++) { @@ -1362,11 +1374,11 @@ static int setup_dpb(struct radeon_encoder *enc, uint32_t num_reconstructed_pict if (enc_pic->quality_modes.pre_encode_mode) { enc_pic->ctx_buf.pre_encode_input_picture.rgb.red_offset = offset; - offset += luma_size; + offset += pre_luma_size; enc_pic->ctx_buf.pre_encode_input_picture.rgb.green_offset = offset; - offset += luma_size; + offset += pre_luma_size; enc_pic->ctx_buf.pre_encode_input_picture.rgb.blue_offset = offset; - offset += luma_size; + offset += pre_luma_size; } for (i = 0; i < num_reconstructed_pictures; i++) { @@ -1375,7 +1387,7 @@ static int setup_dpb(struct radeon_encoder *enc, uint32_t num_reconstructed_pict if (enc_pic->quality_modes.pre_encode_mode) radeon_enc_rec_offset(&enc_pic->ctx_buf.pre_encode_reconstructed_pictures[i], - &offset, luma_size, chroma_size, false); + &offset, pre_luma_size, pre_chroma_size, false); } for (; i < RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES; i++) { @@ -1953,7 +1965,10 @@ void radeon_enc_create_dpb_aux_buffers(struct radeon_encoder *enc, struct radeon } if (enc->enc_pic.quality_modes.pre_encode_mode) { - buf->pre = enc->base.context->create_video_buffer(enc->base.context, &buf->templ); + struct pipe_video_buffer templ = buf->templ; + templ.width /= enc->enc_pic.quality_modes.pre_encode_mode; + templ.height /= enc->enc_pic.quality_modes.pre_encode_mode; + buf->pre = enc->base.context->create_video_buffer(enc->base.context, &templ); if (!buf->pre) { RADEON_ENC_ERR("Can't create preenc buffer!\n"); return;