From acde3456064a9e5a492118e21cbddc741199f1e1 Mon Sep 17 00:00:00 2001 From: Roy Chan Date: Mon, 17 Jun 2024 11:43:40 -0400 Subject: [PATCH] amd/vpelib: Optimize the CPU usage by caching all the LUT configs [why] The fix point conversion takes quite a bit of CPU time. And if there are no changes, we don't need to convert all sw points into hw points and generate the corresponding configs. [how] Introduce a config cache header so that all config caching handlings can be done in the same way. Luts won't be cached if it is in bypass mode, only cache when it is non bypass and some dirty flag is set by the upper layer. The config cache handling will re-apply the cached config if not dirty and not in bypass mode. Reviewed-by: Krunoslav Kovac Acked-by: Alan Liu Signed-off-by: Roy Chan Part-of: --- src/amd/vpelib/inc/vpe_types.h | 12 +- src/amd/vpelib/meson.build | 1 + src/amd/vpelib/src/chip/vpe10/inc/vpe10_mpc.h | 8 +- .../vpelib/src/chip/vpe10/vpe10_cm_common.c | 6 +- src/amd/vpelib/src/chip/vpe10/vpe10_dpp_cm.c | 11 +- src/amd/vpelib/src/chip/vpe10/vpe10_mpc.c | 97 ++++------ .../vpelib/src/chip/vpe10/vpe10_resource.c | 2 +- src/amd/vpelib/src/core/3dlut_builder.c | 11 +- src/amd/vpelib/src/core/color.c | 173 ++++++++++++------ src/amd/vpelib/src/core/inc/3dlut_builder.h | 4 +- src/amd/vpelib/src/core/inc/color.h | 46 +++-- src/amd/vpelib/src/core/inc/config_cache.h | 154 ++++++++++++++++ src/amd/vpelib/src/core/inc/mpc.h | 10 +- src/amd/vpelib/src/core/inc/vpe_priv.h | 25 +-- src/amd/vpelib/src/core/resource.c | 41 ++--- src/amd/vpelib/src/core/vpelib.c | 4 +- 16 files changed, 413 insertions(+), 192 deletions(-) create mode 100644 src/amd/vpelib/src/core/inc/config_cache.h diff --git a/src/amd/vpelib/inc/vpe_types.h b/src/amd/vpelib/inc/vpe_types.h index f719c3b84de..6d7d7f55369 100644 --- a/src/amd/vpelib/inc/vpe_types.h +++ b/src/amd/vpelib/inc/vpe_types.h @@ -239,8 +239,11 @@ struct vpe_cap_funcs { * @param[in/out] output dcc capable result and related settings * @return true if supported */ - bool (*get_dcc_compression_output_cap)(const struct vpe *vpe, const struct vpe_dcc_surface_param *params, struct vpe_surface_dcc_cap *cap); - bool (*get_dcc_compression_input_cap)(const struct vpe *vpe, const struct vpe_dcc_surface_param *params, struct vpe_surface_dcc_cap *cap); + bool (*get_dcc_compression_output_cap)(const struct vpe *vpe, + const struct vpe_dcc_surface_param *params, struct vpe_surface_dcc_cap *cap); + + bool (*get_dcc_compression_input_cap)(const struct vpe *vpe, + const struct vpe_dcc_surface_param *params, struct vpe_surface_dcc_cap *cap); }; /**************************************** @@ -351,7 +354,7 @@ struct vpe_debug_options { uint32_t bg_bit_depth : 1; uint32_t visual_confirm : 1; uint32_t skip_optimal_tap_check : 1; - uint32_t disable_3dlut_cache : 1; + uint32_t disable_lut_caching : 1; } flags; // valid only if the corresponding flag is set @@ -374,7 +377,8 @@ struct vpe_debug_options { uint32_t opp_pipe_crc_ctrl : 1; uint32_t mpc_crc_ctrl : 1; uint32_t skip_optimal_tap_check : 1; - uint32_t disable_3dlut_cache : 1; + uint32_t disable_lut_caching : 1; /*< disable config caching for all luts */ + uint32_t bg_bit_depth; struct vpe_mem_low_power_enable_options enable_mem_low_power; diff --git a/src/amd/vpelib/meson.build b/src/amd/vpelib/meson.build index 9d7c3cd4850..06e7a5fb96e 100644 --- a/src/amd/vpelib/meson.build +++ b/src/amd/vpelib/meson.build @@ -17,6 +17,7 @@ c_args_vpe = cc.get_supported_arguments([ '-Werror=enum-conversion', '-Werror=enum-compare', '-Werror=maybe-uninitialized', + '-Werror=parentheses', ]) c_args_vpe += [ diff --git a/src/amd/vpelib/src/chip/vpe10/inc/vpe10_mpc.h b/src/amd/vpelib/src/chip/vpe10/inc/vpe10_mpc.h index 74c8284d799..18f257347c7 100644 --- a/src/amd/vpelib/src/chip/vpe10/inc/vpe10_mpc.h +++ b/src/amd/vpelib/src/chip/vpe10/inc/vpe10_mpc.h @@ -1454,12 +1454,12 @@ void vpe10_mpc_program_mpc_out(struct mpc *mpc, enum vpe_surface_pixel_format fo void vpe10_mpc_set_output_transfer_func(struct mpc *mpc, struct output_ctx *output_ctx); void vpe10_mpc_set_mpc_shaper_3dlut( - struct mpc *mpc, const struct transfer_func *func_shaper, const struct vpe_3dlut *lut3d_func); + struct mpc *mpc, struct transfer_func *func_shaper, struct vpe_3dlut *lut3d_func); -void vpe10_mpc_set_blend_lut(struct mpc *mpc, const struct transfer_func *blend_tf); +void vpe10_mpc_set_blend_lut(struct mpc *mpc, struct transfer_func *blend_tf); -bool vpe10_mpc_program_movable_cm(struct mpc *mpc, const struct transfer_func *func_shaper, - const struct vpe_3dlut *lut3d_func, const struct transfer_func *blend_tf, bool afterblend); +bool vpe10_mpc_program_movable_cm(struct mpc *mpc, struct transfer_func *func_shaper, + struct vpe_3dlut *lut3d_func, struct transfer_func *blend_tf, bool afterblend); void vpe10_mpc_program_crc(struct mpc *mpc, bool enable); #ifdef __cplusplus diff --git a/src/amd/vpelib/src/chip/vpe10/vpe10_cm_common.c b/src/amd/vpelib/src/chip/vpe10/vpe10_cm_common.c index db46383c4c8..349de86da03 100644 --- a/src/amd/vpelib/src/chip/vpe10/vpe10_cm_common.c +++ b/src/amd/vpelib/src/chip/vpe10/vpe10_cm_common.c @@ -232,7 +232,8 @@ bool vpe10_cm_helper_translate_curve_to_hw_format( int32_t i; uint32_t j, k, seg_distr[MAX_REGIONS_NUMBER], increment, start_index, hw_points; - if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS) + if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS || + !output_tf->dirty) return false; corner_points = lut_params->corner_points; @@ -417,7 +418,8 @@ bool vpe10_cm_helper_translate_curve_to_degamma_hw_format( int32_t i; uint32_t k, seg_distr[MAX_REGIONS_NUMBER_DEGAMMA], num_segments, hw_points; - if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS) + if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS || + !output_tf->dirty) return false; corner_points = lut_params->corner_points; diff --git a/src/amd/vpelib/src/chip/vpe10/vpe10_dpp_cm.c b/src/amd/vpelib/src/chip/vpe10/vpe10_dpp_cm.c index e489f933e1f..053f5ef006d 100644 --- a/src/amd/vpelib/src/chip/vpe10/vpe10_dpp_cm.c +++ b/src/amd/vpelib/src/chip/vpe10/vpe10_dpp_cm.c @@ -185,8 +185,11 @@ void vpe10_dpp_program_input_transfer_func(struct dpp *dpp, struct transfer_func PROGRAM_ENTRY(); - // There should always have input_tf + struct stream_ctx *stream_ctx = &vpe_priv->stream_ctx[vpe_priv->fe_cb_ctx.stream_idx]; + bool bypass; + VPE_ASSERT(input_tf); + // There should always have input_tf // Only accept either DISTRIBUTED_POINTS or BYPASS // No support for PREDEFINED case VPE_ASSERT(input_tf->type == TF_TYPE_DISTRIBUTED_POINTS || input_tf->type == TF_TYPE_BYPASS); @@ -197,7 +200,11 @@ void vpe10_dpp_program_input_transfer_func(struct dpp *dpp, struct transfer_func vpe10_cm_helper_translate_curve_to_degamma_hw_format(input_tf, &dpp->degamma_params); params = &dpp->degamma_params; } - vpe10_dpp_program_gamcor_lut(dpp, params); + + bypass = ((input_tf->type == TF_TYPE_BYPASS) || dpp->vpe_priv->init.debug.bypass_gamcor); + + CONFIG_CACHE(input_tf, stream_ctx, vpe_priv->init.debug.disable_lut_caching, bypass, + vpe10_dpp_program_gamcor_lut(dpp, params)); } void vpe10_dpp_program_gamut_remap(struct dpp *dpp, struct colorspace_transform *gamut_remap) diff --git a/src/amd/vpelib/src/chip/vpe10/vpe10_mpc.c b/src/amd/vpelib/src/chip/vpe10/vpe10_mpc.c index 94fd3c539ff..31b9d2d19ce 100644 --- a/src/amd/vpelib/src/chip/vpe10/vpe10_mpc.c +++ b/src/amd/vpelib/src/chip/vpe10/vpe10_mpc.c @@ -1241,9 +1241,15 @@ void vpe10_mpc_program_mpc_out(struct mpc *mpc, enum vpe_surface_pixel_format fo } void vpe10_mpc_set_mpc_shaper_3dlut( - struct mpc *mpc, const struct transfer_func *func_shaper, const struct vpe_3dlut *lut3d_func) + struct mpc *mpc, struct transfer_func *func_shaper, struct vpe_3dlut *lut3d_func) { - const struct pwl_params *shaper_lut = NULL; + const struct pwl_params *shaper_lut = NULL; + const struct tetrahedral_params *lut3d_params; + + PROGRAM_ENTRY(); + struct stream_ctx *stream_ctx = &vpe_priv->stream_ctx[vpe_priv->fe_cb_ctx.stream_idx]; + bool bypass; + // get the shaper lut params if (func_shaper) { if (func_shaper->type == TF_TYPE_DISTRIBUTED_POINTS) { @@ -1255,57 +1261,14 @@ void vpe10_mpc_set_mpc_shaper_3dlut( } } - mpc->funcs->program_shaper(mpc, shaper_lut); + bypass = (!shaper_lut || (func_shaper && func_shaper->type == TF_TYPE_BYPASS)); + CONFIG_CACHE(func_shaper, stream_ctx, vpe_priv->init.debug.disable_lut_caching, bypass, + mpc->funcs->program_shaper(mpc, shaper_lut)); - if (lut3d_func) { - if (lut3d_func->state.bits.initialized) { - // check if 3D Lut cache enabled - PROGRAM_ENTRY(); - struct stream_ctx *stream_ctx = &vpe_priv->stream_ctx[vpe_priv->fe_cb_ctx.stream_idx]; - - if (mpc->vpe_priv->init.debug.disable_3dlut_cache || !stream_ctx->uid_3dlut || - !stream_ctx->lut3d_cache) { - mpc->funcs->program_3dlut(mpc, &lut3d_func->lut_3d); - } else { // 3D Lut cache enabled - - config_writer_force_new_with_type(config_writer, CONFIG_TYPE_DIRECT); - - // check cache status, if cache exist, use cache - if (stream_ctx->lut3d_cache->uid == stream_ctx->uid_3dlut && - config_writer->buf->size >= stream_ctx->lut3d_cache->buffer_size) { - memcpy((void *)(uintptr_t)config_writer->base_cpu_va, - stream_ctx->lut3d_cache->cache_buf, - stream_ctx->lut3d_cache->buffer_size); - config_writer->buf->cpu_va = config_writer->base_cpu_va + - stream_ctx->lut3d_cache->buffer_size; - config_writer->buf->gpu_va = config_writer->base_gpu_va + - stream_ctx->lut3d_cache->buffer_size; - config_writer->buf->size -= - (stream_ctx->lut3d_cache->buffer_size - sizeof(uint32_t)); - } else { // if cache not exist generate command and save to cache - uint64_t start, end; - - uint16_t config_num = stream_ctx->num_configs; - - start = config_writer->base_cpu_va; - mpc->funcs->program_3dlut(mpc, &lut3d_func->lut_3d); - end = config_writer->buf->cpu_va; - if (config_num == stream_ctx->num_configs) { // check if cross config - if ((end - start) <= VPE_3DLUT_CACHE_SIZE) { - stream_ctx->lut3d_cache->buffer_size = end - start; - memcpy(stream_ctx->lut3d_cache->cache_buf, (void *)(uintptr_t)start, - stream_ctx->lut3d_cache->buffer_size); - stream_ctx->lut3d_cache->uid = stream_ctx->uid_3dlut; - } - } else { // current cache does not support cross config - stream_ctx->lut3d_cache->uid = 0; - } - } - } - } else { - mpc->funcs->program_3dlut(mpc, NULL); - } - } + bypass = (!lut3d_func || !lut3d_func->state.bits.initialized); + lut3d_params = (bypass) ? (NULL) : (&lut3d_func->lut_3d); + CONFIG_CACHE(lut3d_func, stream_ctx, vpe_priv->init.debug.disable_lut_caching, bypass, + mpc->funcs->program_3dlut(mpc, lut3d_params)); return; } @@ -1314,9 +1277,11 @@ void vpe10_mpc_set_output_transfer_func(struct mpc *mpc, struct output_ctx *outp { /* program OGAM only for the top pipe*/ struct pwl_params *params = NULL; - bool ret = false; + bool bypass; - if (ret == false && output_ctx->output_tf) { + PROGRAM_ENTRY(); + + if (output_ctx->output_tf) { // No support HWPWL as it is legacy if (output_ctx->output_tf->type == TF_TYPE_DISTRIBUTED_POINTS) { vpe10_cm_helper_translate_curve_to_hw_format( // this is cm3.0 version instead 1.0 @@ -1328,14 +1293,24 @@ void vpe10_mpc_set_output_transfer_func(struct mpc *mpc, struct output_ctx *outp if (output_ctx->output_tf->type == TF_TYPE_PREDEFINED) VPE_ASSERT(0); } - mpc->funcs->set_output_gamma(mpc, params); + + bypass = (!output_ctx->output_tf || (output_ctx->output_tf->type == TF_TYPE_BYPASS) || + vpe_priv->init.debug.cm_in_bypass || vpe_priv->init.debug.bypass_ogam); + + CONFIG_CACHE(output_ctx->output_tf, output_ctx, vpe_priv->init.debug.disable_lut_caching, + bypass, mpc->funcs->set_output_gamma(mpc, params)); } -void vpe10_mpc_set_blend_lut(struct mpc *mpc, const struct transfer_func *blend_tf) +void vpe10_mpc_set_blend_lut(struct mpc *mpc, struct transfer_func *blend_tf) { struct pwl_params *blend_lut = NULL; enum cm_type gamma_type = CM_DEGAM; + PROGRAM_ENTRY(); + + struct stream_ctx *stream_ctx = &vpe_priv->stream_ctx[vpe_priv->fe_cb_ctx.stream_idx]; + bool bypass; + if (blend_tf && blend_tf->type == TF_TYPE_DISTRIBUTED_POINTS) { gamma_type = blend_tf->cm_gamma_type; @@ -1349,11 +1324,15 @@ void vpe10_mpc_set_blend_lut(struct mpc *mpc, const struct transfer_func *blend_ blend_lut = &mpc->blender_params; } - mpc->funcs->program_1dlut(mpc, blend_lut, gamma_type); + bypass = + ((!blend_tf) || (blend_tf->type == TF_TYPE_BYPASS) || vpe_priv->init.debug.bypass_blndgam); + + CONFIG_CACHE(blend_tf, stream_ctx, vpe_priv->init.debug.disable_lut_caching, bypass, + mpc->funcs->program_1dlut(mpc, blend_lut, gamma_type)); } -bool vpe10_mpc_program_movable_cm(struct mpc *mpc, const struct transfer_func *func_shaper, - const struct vpe_3dlut *lut3d_func, const struct transfer_func *blend_tf, bool afterblend) +bool vpe10_mpc_program_movable_cm(struct mpc *mpc, struct transfer_func *func_shaper, + struct vpe_3dlut *lut3d_func, struct transfer_func *blend_tf, bool afterblend) { struct pwl_params *params = NULL; bool ret = false; diff --git a/src/amd/vpelib/src/chip/vpe10/vpe10_resource.c b/src/amd/vpelib/src/chip/vpe10/vpe10_resource.c index 95c3dac8904..c447d11308b 100644 --- a/src/amd/vpelib/src/chip/vpe10/vpe10_resource.c +++ b/src/amd/vpelib/src/chip/vpe10/vpe10_resource.c @@ -1036,7 +1036,7 @@ void vpe10_create_stream_ops_config(struct vpe_priv *vpe_priv, uint32_t pipe_idx config_writer_complete(&vpe_priv->config_writer); } -#define VPE10_GENERAL_VPE_DESC_SIZE 64 // 4 * (4 + (2 * num_configs)) +#define VPE10_GENERAL_VPE_DESC_SIZE 144 // 4 * (4 + (2 * MAX_NUM_SAVED_CONFIG)) #define VPE10_GENERAL_EMB_USAGE_FRAME_SHARED 6000 // currently max 4804 is recorded #define VPE10_GENERAL_EMB_USAGE_3DLUT_FRAME_SHARED 40960 // currently max 35192 is recorded #define VPE10_GENERAL_EMB_USAGE_BG_SHARED 2400 // currently max 1772 + 92 + 72 = 1936 is recorded diff --git a/src/amd/vpelib/src/core/3dlut_builder.c b/src/amd/vpelib/src/core/3dlut_builder.c index dc53c98afeb..6aa0301fc16 100644 --- a/src/amd/vpelib/src/core/3dlut_builder.c +++ b/src/amd/vpelib/src/core/3dlut_builder.c @@ -74,15 +74,9 @@ static void convert_3dlut_to_tetrahedral_params( params->use_tetrahedral_9 = !is_17x17x17; } -bool vpe_convert_to_tetrahedral(struct vpe_priv *vpe_priv, uint16_t rgb_lib[17 * 17 * 17 * 3], - struct vpe_3dlut *params, bool enable_3dlut) +bool vpe_convert_to_tetrahedral( + struct vpe_priv *vpe_priv, uint16_t rgb_lib[17 * 17 * 17 * 3], struct vpe_3dlut *params) { - - if (!enable_3dlut) { - params->state.bits.initialized = 0; - return true; - } - bool ret = false; struct vpe_rgb *rgb_area = NULL; int ind = 0; @@ -108,7 +102,6 @@ bool vpe_convert_to_tetrahedral(struct vpe_priv *vpe_priv, uint16_t rgb_lib[17 * } } convert_3dlut_to_tetrahedral_params(rgb_area, true, true, ¶ms->lut_3d); - params->state.bits.initialized = 1; vpe_free(rgb_area); ret = true; diff --git a/src/amd/vpelib/src/core/color.c b/src/amd/vpelib/src/core/color.c index 7f57f774bec..65790547f52 100644 --- a/src/amd/vpelib/src/core/color.c +++ b/src/amd/vpelib/src/core/color.c @@ -49,9 +49,8 @@ static bool color_update_regamma_tf(struct vpe_priv *vpe_priv, struct transfer_func *output_tf); static bool color_update_degamma_tf(struct vpe_priv *vpe_priv, - enum color_transfer_func color_input_tf, struct fixed31_32 x_scale, - struct fixed31_32 y_scale, struct fixed31_32 y_bias, bool can_bypass, - struct transfer_func *input_tf); + enum color_transfer_func color_input_tf, struct fixed31_32 x_scale, struct fixed31_32 y_scale, + struct fixed31_32 y_bias, bool can_bypass, struct transfer_func *input_tf); static bool color_update_input_cs(struct vpe_priv *vpe_priv, enum color_space in_cs, const struct vpe_color_adjust *adjustments, struct vpe_csc_matrix *input_cs, @@ -153,10 +152,11 @@ static void color_check_input_cm_update(struct vpe_priv *vpe_priv, struct stream static bool color_update_regamma_tf(struct vpe_priv *vpe_priv, enum color_transfer_func output_transfer_function, struct fixed31_32 x_scale, struct fixed31_32 y_scale, struct fixed31_32 y_bias, bool can_bypass, - struct transfer_func* output_tf) + struct transfer_func *output_tf) { struct pwl_params *params = NULL; bool ret = true; + bool update = false; if (can_bypass || output_transfer_function == TRANSFER_FUNC_HLG) { output_tf->type = TF_TYPE_BYPASS; @@ -164,9 +164,9 @@ static bool color_update_regamma_tf(struct vpe_priv *vpe_priv, } output_tf->sdr_ref_white_level = 80; - output_tf->cm_gamma_type = CM_REGAM; - output_tf->type = TF_TYPE_DISTRIBUTED_POINTS; - output_tf->start_base = y_bias; + output_tf->cm_gamma_type = CM_REGAM; + output_tf->type = TF_TYPE_DISTRIBUTED_POINTS; + output_tf->start_base = y_bias; switch (output_transfer_function) { case TRANSFER_FUNC_SRGB: @@ -181,19 +181,41 @@ static bool color_update_regamma_tf(struct vpe_priv *vpe_priv, break; } - ret = vpe_color_calculate_regamma_params( - vpe_priv, x_scale, y_scale, &vpe_priv->cal_buffer, output_tf); + if (vpe_priv->init.debug.disable_lut_caching || + (output_tf->cache_info.cm_gamma_type != output_tf->cm_gamma_type) || + (output_tf->cache_info.tf != output_tf->tf) || + (output_tf->cache_info.x_scale.value != x_scale.value) || + (output_tf->cache_info.y_scale.value != y_scale.value) || + (output_tf->cache_info.y_bias.value != y_bias.value)) { + // if gamma points have been previously generated, + // skip the re-gen no matter it was config cached or not + update = true; + } + if (update) { + ret = vpe_color_calculate_regamma_params( + vpe_priv, x_scale, y_scale, &vpe_priv->cal_buffer, output_tf); + if (ret) { + // reset the cache status and mark as dirty to let hw layer to re-cache + output_tf->dirty = true; + output_tf->config_cache.cached = false; + output_tf->cache_info.cm_gamma_type = output_tf->cm_gamma_type; + output_tf->cache_info.tf = output_tf->tf; + output_tf->cache_info.x_scale = x_scale; + output_tf->cache_info.y_scale = y_scale; + output_tf->cache_info.y_bias = y_bias; + } + } return ret; } static bool color_update_degamma_tf(struct vpe_priv *vpe_priv, - enum color_transfer_func color_input_tf, struct fixed31_32 x_scale, - struct fixed31_32 y_scale, struct fixed31_32 y_bias, bool can_bypass, - struct transfer_func *input_tf) + enum color_transfer_func color_input_tf, struct fixed31_32 x_scale, struct fixed31_32 y_scale, + struct fixed31_32 y_bias, bool can_bypass, struct transfer_func *input_tf) { bool ret = true; struct pwl_params *params = NULL; + bool update = false; if (can_bypass || color_input_tf == TRANSFER_FUNC_HLG) { input_tf->type = TF_TYPE_BYPASS; @@ -201,8 +223,8 @@ static bool color_update_degamma_tf(struct vpe_priv *vpe_priv, } input_tf->cm_gamma_type = CM_DEGAM; - input_tf->type = TF_TYPE_DISTRIBUTED_POINTS; - input_tf->start_base = y_bias; + input_tf->type = TF_TYPE_DISTRIBUTED_POINTS; + input_tf->start_base = y_bias; switch (color_input_tf) { case TRANSFER_FUNC_SRGB: @@ -218,13 +240,36 @@ static bool color_update_degamma_tf(struct vpe_priv *vpe_priv, break; } - ret = vpe_color_calculate_degamma_params(vpe_priv, x_scale, y_scale, input_tf); + if (vpe_priv->init.debug.disable_lut_caching || + (input_tf->cache_info.cm_gamma_type != input_tf->cm_gamma_type) || + (input_tf->cache_info.tf != input_tf->tf) || + (input_tf->cache_info.x_scale.value != x_scale.value) || + (input_tf->cache_info.y_scale.value != y_scale.value) || + (input_tf->cache_info.y_bias.value != y_bias.value)) { + // if gamma points have been previously generated, + // skip the re-gen no matter it was config cached or not + update = true; + } + if (update) { + ret = vpe_color_calculate_degamma_params(vpe_priv, x_scale, y_scale, input_tf); + if (ret) { + // reset the cache status and mark as dirty to let hw layer to re-cache + input_tf->dirty = true; + input_tf->config_cache.cached = false; + input_tf->cache_info.cm_gamma_type = input_tf->cm_gamma_type; + input_tf->cache_info.tf = color_input_tf; + input_tf->cache_info.x_scale = x_scale; + input_tf->cache_info.y_scale = y_scale; + input_tf->cache_info.y_bias = y_bias; + } + } return ret; } -static enum vpe_status vpe_allocate_cm_memory(struct vpe_priv *vpe_priv, const struct vpe_build_param *param) { - +static enum vpe_status vpe_allocate_cm_memory( + struct vpe_priv *vpe_priv, const struct vpe_build_param *param) +{ struct stream_ctx *stream_ctx; struct output_ctx *output_ctx; enum vpe_status status = VPE_STATUS_OK; @@ -242,8 +287,7 @@ static enum vpe_status vpe_allocate_cm_memory(struct vpe_priv *vpe_priv, const s } if (!stream_ctx->input_tf) { - stream_ctx->input_tf = - (struct transfer_func *)vpe_zalloc(sizeof(struct transfer_func)); + stream_ctx->input_tf = (struct transfer_func *)vpe_zalloc(sizeof(struct transfer_func)); if (!stream_ctx->input_tf) { vpe_log("err: out of memory for input tf!"); return VPE_STATUS_NO_MEMORY; @@ -277,8 +321,7 @@ static enum vpe_status vpe_allocate_cm_memory(struct vpe_priv *vpe_priv, const s output_ctx = &vpe_priv->output_ctx; if (!output_ctx->output_tf) { - output_ctx->output_tf = - (struct transfer_func *)vpe_zalloc(sizeof(struct transfer_func)); + output_ctx->output_tf = (struct transfer_func *)vpe_zalloc(sizeof(struct transfer_func)); if (!output_ctx->output_tf) { vpe_log("err: out of memory for output tf!"); return VPE_STATUS_NO_MEMORY; @@ -379,8 +422,7 @@ static enum vpe_status vpe_update_blnd_gamma(struct vpe_priv *vpe_priv, const struct vpe_build_param *param, const struct vpe_stream *stream, struct transfer_func *blnd_tf) { - - struct output_ctx *output_ctx; + struct output_ctx *output_ctx; struct vpe_color_space tm_out_cs; struct fixed31_32 x_scale = vpe_fixpt_one; struct fixed31_32 y_scale = vpe_fixpt_one; @@ -622,6 +664,28 @@ enum vpe_status vpe_color_build_tm_cs(const struct vpe_tonemap_params *tm_params return VPE_STATUS_OK; } +enum vpe_status vpe_color_update_3dlut( + struct vpe_priv *vpe_priv, struct stream_ctx *stream_ctx, bool enable_3dlut) +{ + if (!enable_3dlut) { + stream_ctx->lut3d_func->state.bits.initialized = 0; + } else { + if (vpe_priv->init.debug.disable_lut_caching || + (stream_ctx->lut3d_func->cache_info.uid_3dlut != stream_ctx->stream.tm_params.UID)) { + vpe_convert_to_tetrahedral( + vpe_priv, stream_ctx->stream.tm_params.lut_data, stream_ctx->lut3d_func); + stream_ctx->lut3d_func->dirty = true; + stream_ctx->lut3d_func->config_cache.cached = false; + stream_ctx->lut3d_func->cache_info.uid_3dlut = stream_ctx->stream.tm_params.UID; + } + stream_ctx->lut3d_func->state.bits.initialized = 1; + } + + stream_ctx->uid_3dlut = stream_ctx->stream.tm_params.UID; + + return VPE_STATUS_OK; +} + enum vpe_status vpe_color_update_color_space_and_tf( struct vpe_priv *vpe_priv, const struct vpe_build_param *param) { @@ -730,22 +794,44 @@ enum vpe_status vpe_color_tm_update_hdr_mult(uint16_t shaper_in_exp_max, uint32_ return VPE_STATUS_OK; } -enum vpe_status vpe_color_update_shaper( - uint16_t shaper_in_exp_max, struct transfer_func *shaper_func, bool enable_3dlut) - +enum vpe_status vpe_color_update_shaper(const struct vpe_priv *vpe_priv, uint16_t shaper_in_exp_max, + struct transfer_func *shaper_func, bool enable_3dlut) { + enum color_transfer_func tf = TRANSFER_FUNC_LINEAR; + bool update = false; + enum vpe_status ret = VPE_STATUS_OK; + + VPE_ASSERT(shaper_func != NULL); + if (!enable_3dlut) { shaper_func->type = TF_TYPE_BYPASS; return VPE_STATUS_OK; } - struct vpe_shaper_setup_in shaper_in; - shaper_in.shaper_in_max = 1 << 16; - shaper_in.use_const_hdr_mult = false; // can't be true. Fix is required. + // right now shaper is always programmed with linear, once cached, it is always reused. + if (vpe_priv->init.debug.disable_lut_caching || + (shaper_func && shaper_func->cache_info.tf != tf)) { + // if the caching has the required data cached, skip the update + update = true; + } shaper_func->type = TF_TYPE_HWPWL; - shaper_func->tf = TRANSFER_FUNC_LINEAR; - return vpe_build_shaper(&shaper_in, &shaper_func->pwl); + shaper_func->tf = tf; + + if (update) { + struct vpe_shaper_setup_in shaper_in; + + shaper_in.shaper_in_max = 1 << 16; + shaper_in.use_const_hdr_mult = false; // can't be true. Fix is required. + + ret = vpe_build_shaper(&shaper_in, &shaper_func->pwl); + if (ret == VPE_STATUS_OK) { + shaper_func->dirty = true; + shaper_func->config_cache.cached = false; + shaper_func->cache_info.tf = tf; + } + } + return ret; } enum vpe_status vpe_color_update_movable_cm( @@ -796,19 +882,6 @@ enum vpe_status vpe_color_update_movable_cm( } } - if (enable_3dlut) { - if (!stream_ctx->lut3d_cache) { // setup cache if needed - stream_ctx->lut3d_cache = vpe_zalloc(sizeof(struct vpe_3dlut_cache)); - if (!stream_ctx->lut3d_cache) { - vpe_log("err: out of memory for 3d lut cache!"); - ret = VPE_STATUS_NO_MEMORY; - goto exit; - } - stream_ctx->lut3d_cache->uid = 0; - } - // 3D Lut updated, invalid cache - } - if (!output_ctx->gamut_remap) { output_ctx->gamut_remap = vpe_zalloc(sizeof(struct colorspace_transform)); if (!output_ctx->gamut_remap) { @@ -818,14 +891,13 @@ enum vpe_status vpe_color_update_movable_cm( } } - //Blendgam is updated by output vpe_update_output_gamma_sequence - get_shaper_norm_factor(&stream_ctx->stream.tm_params, stream_ctx, &shaper_norm_factor); vpe_color_tm_update_hdr_mult(SHAPER_EXP_MAX_IN, shaper_norm_factor, &stream_ctx->lut3d_func->hdr_multiplier, enable_3dlut); - vpe_color_update_shaper(SHAPER_EXP_MAX_IN, stream_ctx->in_shaper_func, enable_3dlut); + vpe_color_update_shaper( + vpe_priv, SHAPER_EXP_MAX_IN, stream_ctx->in_shaper_func, enable_3dlut); vpe_color_build_tm_cs(&stream_ctx->stream.tm_params, vpe_priv->output_ctx.surface, &tm_out_cs); @@ -834,12 +906,7 @@ enum vpe_status vpe_color_update_movable_cm( vpe_color_update_gamut(vpe_priv, out_lut_cs, vpe_priv->output_ctx.cs, output_ctx->gamut_remap, !enable_3dlut); - if ((enable_3dlut && !stream_ctx->stream.tm_params.UID) || - stream_ctx->lut3d_cache->uid != stream_ctx->stream.tm_params.UID) - vpe_convert_to_tetrahedral(vpe_priv, stream_ctx->stream.tm_params.lut_data, - stream_ctx->lut3d_func, enable_3dlut); - - stream_ctx->uid_3dlut = stream_ctx->stream.tm_params.UID; + vpe_color_update_3dlut(vpe_priv, stream_ctx, enable_3dlut); } } exit: diff --git a/src/amd/vpelib/src/core/inc/3dlut_builder.h b/src/amd/vpelib/src/core/inc/3dlut_builder.h index e82814fa22d..e6a7164e26b 100644 --- a/src/amd/vpelib/src/core/inc/3dlut_builder.h +++ b/src/amd/vpelib/src/core/inc/3dlut_builder.h @@ -30,5 +30,5 @@ #define LUT3D_SIZE_17x17x17 4913 #define LUT3D_SIZE_9x9x9 729 -bool vpe_convert_to_tetrahedral(struct vpe_priv *vpe_priv, uint16_t rgb[17 * 17 * 17 * 3], - struct vpe_3dlut *params, bool enable_3dlut); +bool vpe_convert_to_tetrahedral( + struct vpe_priv *vpe_priv, uint16_t rgb[17 * 17 * 17 * 3], struct vpe_3dlut *params); diff --git a/src/amd/vpelib/src/core/inc/color.h b/src/amd/vpelib/src/core/inc/color.h index e848d2f23e7..b2d084a9e39 100644 --- a/src/amd/vpelib/src/core/inc/color.h +++ b/src/amd/vpelib/src/core/inc/color.h @@ -27,17 +27,18 @@ #include "vpe_types.h" #include "fixed31_32.h" #include "hw_shared.h" +#include "config_cache.h" #ifdef __cplusplus extern "C" { #endif -#define SDR_VIDEO_WHITE_POINT 100 // nits -#define SDR_WHITE_POINT 80 // nits -#define HDR_PEAK_WHITE 10000 -#define CCCS_NORM HDR_PEAK_WHITE/SDR_WHITE_POINT -#define STUDIO_RANGE_FOOT_ROOM_10_BIT vpe_fixpt_from_fraction(64, 1023) -#define STUDIO_RANGE_SCALE_10_BIT vpe_fixpt_from_fraction(940 - 64, 1023) +#define SDR_VIDEO_WHITE_POINT 100 // nits +#define SDR_WHITE_POINT 80 // nits +#define HDR_PEAK_WHITE 10000 +#define CCCS_NORM HDR_PEAK_WHITE / SDR_WHITE_POINT +#define STUDIO_RANGE_FOOT_ROOM_10_BIT vpe_fixpt_from_fraction(64, 1023) +#define STUDIO_RANGE_SCALE_10_BIT vpe_fixpt_from_fraction(940 - 64, 1023) #define STUDIO_RANGE_FOOT_ROOM_8_BIT vpe_fixpt_from_fraction(16, 255) #define STUDIO_RANGE_SCALE_8_BIT vpe_fixpt_from_fraction(235 - 16, 255) @@ -57,7 +58,7 @@ enum color_depth { COLOR_DEPTH_COUNT }; - enum color_transfer_func { +enum color_transfer_func { TRANSFER_FUNC_UNKNOWN, TRANSFER_FUNC_SRGB, TRANSFER_FUNC_BT709, @@ -150,7 +151,7 @@ struct transfer_func { enum transfer_func_type type; enum color_transfer_func tf; enum cm_type cm_gamma_type; - struct fixed31_32 start_base; //Used to clamp curve start + struct fixed31_32 start_base; // Used to clamp curve start /* FP16 1.0 reference level in nits, default is 80 nits, only for PQ*/ uint32_t sdr_ref_white_level; @@ -158,6 +159,18 @@ struct transfer_func { struct pwl_params pwl; struct transfer_func_distributed_points tf_pts; }; + + // the followings are for optimization: skip if no change + bool dirty; /*< indicate this object is updated or not */ + struct config_cache config_cache; /*< used by the hw hook layer to do the caching */ + + struct { + enum color_transfer_func tf; + enum cm_type cm_gamma_type; + struct fixed31_32 x_scale; + struct fixed31_32 y_scale; + struct fixed31_32 y_bias; + } cache_info; }; enum color_white_point_type { @@ -211,7 +224,7 @@ struct color_gamut_data { union vpe_3dlut_state { struct { - uint32_t initialized : 1; /*if 3dlut is went through color module for initialization */ + uint32_t initialized : 1; /*< if 3dlut is went through color module for initialization */ uint32_t reserved : 15; } bits; uint32_t raw; @@ -222,6 +235,14 @@ struct vpe_3dlut { struct tetrahedral_params lut_3d; struct fixed31_32 hdr_multiplier; union vpe_3dlut_state state; + + // the followings are for optimization: skip if no change + bool dirty; /*< indicate this object is updated or not */ + struct config_cache config_cache; /*< used by the hw hook layer to do the caching */ + + struct { + uint64_t uid_3dlut; /*< UID for current 3D LUT params */ + } cache_info; }; enum vpe_status vpe_color_update_color_space_and_tf( @@ -247,12 +268,15 @@ enum vpe_status vpe_color_update_whitepoint( enum vpe_status vpe_color_tm_update_hdr_mult(uint16_t shaper_in_exp_max, uint32_t peak_white, struct fixed31_32 *hdr_multiplier, bool enable_3dlut); -enum vpe_status vpe_color_update_shaper( - uint16_t shaper_in_exp_max, struct transfer_func *shaper_func, bool enable_3dlut); +enum vpe_status vpe_color_update_shaper(const struct vpe_priv *vpe_priv, uint16_t shaper_in_exp_max, + struct transfer_func *shaper_func, bool enable_3dlut); enum vpe_status vpe_color_build_tm_cs(const struct vpe_tonemap_params *tm_params, struct vpe_surface_info surface_info, struct vpe_color_space *vcs); +enum vpe_status vpe_color_update_3dlut( + struct vpe_priv *vpe_priv, struct stream_ctx *stream_ctx, bool enable_3dlut); + #ifdef __cplusplus } #endif diff --git a/src/amd/vpelib/src/core/inc/config_cache.h b/src/amd/vpelib/src/core/inc/config_cache.h new file mode 100644 index 00000000000..5ca6c389311 --- /dev/null +++ b/src/amd/vpelib/src/core/inc/config_cache.h @@ -0,0 +1,154 @@ +/* Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ +#pragma once + +#include +#include +#include "config_writer.h" + +/** To use this config caching helper, there are pre-requisites: + * The object that passes to the hw programming layer must have the following members in its + * structure + * 1. struct config_cache config_cache; + * 2. bool dirty; + * + * e.g. + * struct transfer_function { + * bool dirty; + * struct config_cache config_cache; + * }; + * + * The upper layer has to indicate this object is dirty or not for the hw programming layer to + * determine i. re-use the config cache? ii. cache the new settings? + * + * Before using the CONFIG_CACHE(), make sure the function has these local variables visiable in the + * same code block: + * 1. struct config_writer *config_writer + * - usually been declared with PROGRAM_ENTRY() + * 2. a debug option that want to disable caching or not + * 3. an input object that has the config_cache member + * 4. the hw programming function that would generate command buffer content + * 5. the object that has num_configs which stores the generated configs + * + * Inside this CONFIG_CACHE macro it will clear the dirty bit after consuming the settings + * + * Make sure to free up this cache object when the parent object is destroyed using + * CONFIG_CACHE_FREE() + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +struct vpe_priv; + +/* a common config cache structure to be included in the object that is for program hardware API + * layer + */ +struct config_cache { + uint8_t *p_buffer; + uint64_t size; + bool cached; +}; + + +/* A macro that helps cache the config packet, it won't cache if it is in bypass mode + * as bypass mode is not heavy lifting programming. + * + * /param obj_cache an object that has the config cache member + * /param obj_cfg_array an object that contains the configs and num_configs member + * /param disable_cache a flag that controls a caching is needed + * /param is_bypass if it is in bypass, it doesn't cache the bypass config + * /param program_func_call the program call that generate config packet content + */ +#define CONFIG_CACHE(obj_cache, obj_cfg_array, disable_cache, is_bypass, program_func_call) \ + { \ + bool use_cache = false; \ + \ + /* make sure it opens a new config packet */ \ + config_writer_force_new_with_type(config_writer, CONFIG_TYPE_DIRECT); \ + \ + if ((obj_cache) && !disable_cache && (obj_cache)->config_cache.p_buffer && \ + (obj_cache)->config_cache.cached && !((obj_cache)->dirty) && !is_bypass) { \ + /* reuse the cache */ \ + if (config_writer->buf->size >= (obj_cache)->config_cache.size) { \ + memcpy((void *)(uintptr_t)config_writer->base_cpu_va, \ + (obj_cache)->config_cache.p_buffer, \ + (size_t)(obj_cache)->config_cache.size); \ + config_writer->buf->cpu_va = \ + config_writer->base_cpu_va + (obj_cache)->config_cache.size; \ + config_writer->buf->gpu_va = \ + config_writer->base_gpu_va + (obj_cache)->config_cache.size; \ + config_writer->buf->size -= ((obj_cache)->config_cache.size - sizeof(uint32_t)); \ + use_cache = true; \ + } \ + } \ + \ + if (!use_cache) { \ + uint64_t start, end; \ + uint16_t config_num = (uint16_t)(obj_cfg_array)->num_configs; \ + \ + start = config_writer->base_cpu_va; \ + program_func_call; \ + end = config_writer->buf->cpu_va; \ + \ + if (!disable_cache && !is_bypass) { \ + /* only cache when it is not crossing config packets */ \ + if (config_num == (obj_cfg_array)->num_configs) { \ + if ((obj_cache)->dirty) { \ + uint64_t size = end - start; \ + \ + if ((obj_cache)->config_cache.size < size) { \ + if ((obj_cache)->config_cache.p_buffer) \ + vpe_free((obj_cache)->config_cache.p_buffer); \ + \ + (obj_cache)->config_cache.p_buffer = vpe_zalloc((size_t)size); \ + if ((obj_cache)->config_cache.p_buffer) { \ + memcpy((obj_cache)->config_cache.p_buffer, \ + (void *)(uintptr_t)start, (size_t)size); \ + (obj_cache)->config_cache.size = size; \ + (obj_cache)->config_cache.cached = true; \ + } else { \ + (obj_cache)->config_cache.size = 0; \ + } \ + } \ + } \ + } \ + } \ + } \ + if ((obj_cache)) \ + (obj_cache)->dirty = false; \ + } + +/* the following macro requires a local variable vpr_priv to be present */ +#define CONFIG_CACHE_FREE(cache) \ + { \ + if (cache.p_buffer) \ + vpe_free(cache.p_buffer); \ + } + +#ifdef __cplusplus +} +#endif diff --git a/src/amd/vpelib/src/core/inc/mpc.h b/src/amd/vpelib/src/core/inc/mpc.h index 710d9e096c7..5663dd36d6f 100644 --- a/src/amd/vpelib/src/core/inc/mpc.h +++ b/src/amd/vpelib/src/core/inc/mpc.h @@ -157,13 +157,13 @@ struct mpc_funcs { void (*set_output_transfer_func)(struct mpc *mpc, struct output_ctx *output_ctx); - void (*set_mpc_shaper_3dlut)(struct mpc *mpc, const struct transfer_func *func_shaper, - const struct vpe_3dlut *lut3d_func); + void (*set_mpc_shaper_3dlut)( + struct mpc *mpc, struct transfer_func *func_shaper, struct vpe_3dlut *lut3d_func); - void (*set_blend_lut)(struct mpc *mpc, const struct transfer_func *blend_tf); + void (*set_blend_lut)(struct mpc *mpc, struct transfer_func *blend_tf); - bool (*program_movable_cm)(struct mpc *mpc, const struct transfer_func *func_shaper, - const struct vpe_3dlut *lut3d_func, const struct transfer_func *blend_tf, bool afterblend); + bool (*program_movable_cm)(struct mpc *mpc, struct transfer_func *func_shaper, + struct vpe_3dlut *lut3d_func, struct transfer_func *blend_tf, bool afterblend); void (*program_crc)(struct mpc *mpc, bool enable); }; diff --git a/src/amd/vpelib/src/core/inc/vpe_priv.h b/src/amd/vpelib/src/core/inc/vpe_priv.h index 749d031b5aa..2129a9d2069 100644 --- a/src/amd/vpelib/src/core/inc/vpe_priv.h +++ b/src/amd/vpelib/src/core/inc/vpe_priv.h @@ -56,6 +56,8 @@ extern "C" { #define MAX_LINE_SIZE 1024 // without 16 pixels for the seams #define MAX_LINE_CNT 4 +#define MAX_NUM_SAVED_CONFIG 16 + enum vpe_cmd_ops { VPE_CMD_OPS_BLENDING, VPE_CMD_OPS_BG, @@ -117,14 +119,6 @@ struct config_record { uint64_t config_size; }; -#define VPE_3DLUT_CACHE_SIZE 81920 - -struct vpe_3dlut_cache { - uint64_t uid; - uint8_t cache_buf[VPE_3DLUT_CACHE_SIZE]; - uint64_t buffer_size; -}; - /** represents a stream input, i.e. common to all segments */ struct stream_ctx { struct vpe_priv *vpe_priv; @@ -139,8 +133,8 @@ struct stream_ctx { uint16_t num_configs; // shared among same stream uint16_t num_stream_op_configs[VPE_CMD_TYPE_COUNT]; // shared among same cmd type within the // same stream - struct config_record configs[16]; - struct config_record stream_op_configs[VPE_CMD_TYPE_COUNT][16]; + struct config_record configs[MAX_NUM_SAVED_CONFIG]; + struct config_record stream_op_configs[VPE_CMD_TYPE_COUNT][MAX_NUM_SAVED_CONFIG]; // cached color properties bool per_pixel_alpha; @@ -167,14 +161,11 @@ struct stream_ctx { struct colorspace_transform *gamut_remap; struct transfer_func *in_shaper_func; // for shaper lut struct vpe_3dlut *lut3d_func; // for 3dlut - struct vpe_3dlut_cache *lut3d_cache; // for 3dlut cache struct transfer_func *blend_tf; // for 1dlut white_point_gain white_point_gain; - - bool flip_horizonal_output; - struct vpe_color_adjust color_adjustments; // stores the current color adjustments params - struct fixed31_32 - tf_scaling_factor; // a scaling factor that acts as a gain on the transfer function + bool flip_horizonal_output; + struct vpe_color_adjust color_adjustments; // stores the current color adjustments params + struct fixed31_32 tf_scaling_factor; // a gain applied on a transfer function }; struct output_ctx { @@ -190,7 +181,7 @@ struct output_ctx { enum color_space cs; uint32_t num_configs; - struct config_record configs[8]; + struct config_record configs[MAX_NUM_SAVED_CONFIG]; union { struct { diff --git a/src/amd/vpelib/src/core/resource.c b/src/amd/vpelib/src/core/resource.c index 3f3a5b782ea..afb5def0af7 100644 --- a/src/amd/vpelib/src/core/resource.c +++ b/src/amd/vpelib/src/core/resource.c @@ -36,20 +36,20 @@ #endif static const struct vpe_debug_options debug_defaults = { - .flags = {0}, - .cm_in_bypass = 0, - .vpcnvc_bypass = 0, - .mpc_bypass = 0, - .identity_3dlut = 0, - .sce_3dlut = 0, - .disable_reuse_bit = 0, - .bg_bit_depth = 0, - .bypass_gamcor = 0, - .bypass_ogam = 0, - .bypass_dpp_gamut_remap = 0, - .bypass_post_csc = 0, - .bg_color_fill_only = 0, - .assert_when_not_support = 0, + .flags = {0}, + .cm_in_bypass = 0, + .vpcnvc_bypass = 0, + .mpc_bypass = 0, + .identity_3dlut = 0, + .sce_3dlut = 0, + .disable_reuse_bit = 0, + .bg_bit_depth = 0, + .bypass_gamcor = 0, + .bypass_ogam = 0, + .bypass_dpp_gamut_remap = 0, + .bypass_post_csc = 0, + .bg_color_fill_only = 0, + .assert_when_not_support = 0, .enable_mem_low_power = { .bits = @@ -77,8 +77,8 @@ static const struct vpe_debug_options debug_defaults = { .mpc_crc_ctrl = 0, .visual_confirm_params = {{{0}}}, .skip_optimal_tap_check = 0, - .disable_3dlut_cache = 0, - .bypass_blndgam = 0 + .disable_lut_caching = 0, + .bypass_blndgam = 0, }; enum vpe_ip_level vpe_resource_parse_ip_version( @@ -199,6 +199,7 @@ void vpe_free_stream_ctx(struct vpe_priv *vpe_priv) for (i = 0; i < vpe_priv->num_streams; i++) { ctx = &vpe_priv->stream_ctx[i]; if (ctx->input_tf) { + CONFIG_CACHE_FREE(ctx->input_tf->config_cache); vpe_free(ctx->input_tf); ctx->input_tf = NULL; } @@ -219,25 +220,23 @@ void vpe_free_stream_ctx(struct vpe_priv *vpe_priv) } if (ctx->in_shaper_func) { + CONFIG_CACHE_FREE(ctx->in_shaper_func->config_cache); vpe_free(ctx->in_shaper_func); ctx->in_shaper_func = NULL; } if (ctx->blend_tf) { + CONFIG_CACHE_FREE(ctx->blend_tf->config_cache); vpe_free(ctx->blend_tf); ctx->blend_tf = NULL; } if (ctx->lut3d_func) { + CONFIG_CACHE_FREE(ctx->lut3d_func->config_cache); vpe_free(ctx->lut3d_func); ctx->lut3d_func = NULL; } - if (ctx->lut3d_cache) { - vpe_free(ctx->lut3d_cache); - ctx->lut3d_cache = NULL; - } - if (ctx->segment_ctx) { vpe_free(ctx->segment_ctx); ctx->segment_ctx = NULL; diff --git a/src/amd/vpelib/src/core/vpelib.c b/src/amd/vpelib/src/core/vpelib.c index e5fd99575ad..157b8c28f2f 100644 --- a/src/amd/vpelib/src/core/vpelib.c +++ b/src/amd/vpelib/src/core/vpelib.c @@ -124,8 +124,8 @@ static void override_debug_option( if (user_debug->flags.bypass_blndgam) debug->bypass_blndgam = user_debug->bypass_blndgam; - if (user_debug->flags.disable_3dlut_cache) - debug->disable_3dlut_cache = user_debug->disable_3dlut_cache; + if (user_debug->flags.disable_lut_caching) + debug->disable_lut_caching = user_debug->disable_lut_caching; } #ifdef VPE_BUILD_1_1