From ded1a2b3f0e7e45cce609e973b131833dece3710 Mon Sep 17 00:00:00 2001 From: Brendan Date: Thu, 27 Jun 2024 14:56:54 -0400 Subject: [PATCH] amd/vpelib: Multiple instance support in caching framework Generalize the caching to work with multiple instances of objects. Change some static functions to public functions to maximize function re-use possibilities. Reviewed-by: Roy Chan Acked-by: Chih-Wei Chien Signed-off-by: Brendan Part-of: --- src/amd/vpelib/src/chip/vpe10/inc/vpe10_dpp.h | 63 ++++++++++ .../vpelib/src/chip/vpe10/vpe10_cm_common.c | 4 +- src/amd/vpelib/src/chip/vpe10/vpe10_dpp_cm.c | 4 +- .../vpelib/src/chip/vpe10/vpe10_dpp_dscl.c | 94 +++++---------- src/amd/vpelib/src/chip/vpe10/vpe10_mpc.c | 8 +- src/amd/vpelib/src/core/color.c | 113 +++++++++++------- src/amd/vpelib/src/core/inc/cdc.h | 1 + src/amd/vpelib/src/core/inc/color.h | 26 ++-- src/amd/vpelib/src/core/inc/config_cache.h | 43 +++---- src/amd/vpelib/src/core/inc/dpp.h | 1 + src/amd/vpelib/src/core/inc/hw_shared.h | 5 + src/amd/vpelib/src/core/inc/mpc.h | 1 + src/amd/vpelib/src/core/inc/opp.h | 1 + src/amd/vpelib/src/core/inc/resource.h | 3 +- src/amd/vpelib/src/core/resource.c | 13 +- 15 files changed, 222 insertions(+), 158 deletions(-) diff --git a/src/amd/vpelib/src/chip/vpe10/inc/vpe10_dpp.h b/src/amd/vpelib/src/chip/vpe10/inc/vpe10_dpp.h index 164eb633cb4..65ae0375ca9 100644 --- a/src/amd/vpelib/src/chip/vpe10/inc/vpe10_dpp.h +++ b/src/amd/vpelib/src/chip/vpe10/inc/vpe10_dpp.h @@ -887,6 +887,69 @@ void vpe10_dpp_set_segment_scaler(struct dpp *dpp, const struct scaler_data *scl void vpe10_dpp_set_frame_scaler(struct dpp *dpp, const struct scaler_data *scl_data); +/*Scalar helper functions*/ +enum vpe10_coef_filter_type_sel { + SCL_COEF_LUMA_VERT_FILTER = 0, + SCL_COEF_LUMA_HORZ_FILTER = 1, + SCL_COEF_CHROMA_VERT_FILTER = 2, + SCL_COEF_CHROMA_HORZ_FILTER = 3, + SCL_COEF_ALPHA_VERT_FILTER = 4, + SCL_COEF_ALPHA_HORZ_FILTER = 5, +}; + +enum vpe10_dscl_autocal_mode { + AUTOCAL_MODE_OFF = 0, + + /* Autocal calculate the scaling ratio and initial phase and the + * DSCL_MODE_SEL must be set to 1 + */ + AUTOCAL_MODE_AUTOSCALE = 1, + /* Autocal perform auto centering without replication and the + * DSCL_MODE_SEL must be set to 0 + */ + AUTOCAL_MODE_AUTOCENTER = 2, + /* Autocal perform auto centering and auto replication and the + * DSCL_MODE_SEL must be set to 0 + */ + AUTOCAL_MODE_AUTOREPLICATE = 3 +}; + +enum vpe10_dscl_mode_sel { + DSCL_MODE_SCALING_444_BYPASS = 0, + DSCL_MODE_SCALING_444_RGB_ENABLE = 1, + DSCL_MODE_SCALING_444_YCBCR_ENABLE = 2, + DSCL_MODE_SCALING_420_YCBCR_ENABLE = 3, + DSCL_MODE_SCALING_420_LUMA_BYPASS = 4, + DSCL_MODE_SCALING_420_CHROMA_BYPASS = 5, + DSCL_MODE_DSCL_BYPASS = 6 +}; +void vpe10_dpp_dscl_set_h_blank(struct dpp *dpp, uint16_t start, uint16_t end); + +void vpe10_dpp_dscl_set_v_blank(struct dpp *dpp, uint16_t start, uint16_t end); + +void vpe10_dpp_power_on_dscl(struct dpp *dpp, bool power_on); + +void vpe10_dpp_dscl_set_lb(struct dpp *dpp, const struct line_buffer_params *lb_params, + enum lb_memory_config mem_size_config); + +void vpe10_dpp_dscl_set_scale_ratio(struct dpp *dpp, const struct scaler_data *data); + +void vpe10_dpp_dscl_set_taps(struct dpp *dpp, const struct scaler_data *scl_data); + +void vpe10_dpp_dscl_set_scl_filter(struct dpp *dpp, const struct scaler_data *scl_data, + enum vpe10_dscl_mode_sel scl_mode, bool chroma_coef_mode); + +void vpe10_dpp_dscl_set_dscl_mode(struct dpp *dpp, enum vpe10_dscl_mode_sel dscl_mode); + +enum vpe10_dscl_mode_sel vpe10_dpp_dscl_get_dscl_mode(const struct scaler_data *data); + +void vpe10_dpp_dscl_set_scaler_filter(struct dpp *dpp, uint32_t taps, + enum vpe10_coef_filter_type_sel filter_type, const uint16_t *filter); + +bool vpe10_dpp_dscl_is_ycbcr(const enum vpe_surface_pixel_format format); + +void vpe10_dpp_program_gamcor_lut(struct dpp *dpp, const struct pwl_params *params); + uint32_t vpe10_get_line_buffer_size(void); bool vpe10_dpp_validate_number_of_taps(struct dpp *dpp, struct scaler_data *scl_data); diff --git a/src/amd/vpelib/src/chip/vpe10/vpe10_cm_common.c b/src/amd/vpelib/src/chip/vpe10/vpe10_cm_common.c index 349de86da03..7ade007b378 100644 --- a/src/amd/vpelib/src/chip/vpe10/vpe10_cm_common.c +++ b/src/amd/vpelib/src/chip/vpe10/vpe10_cm_common.c @@ -233,7 +233,7 @@ bool vpe10_cm_helper_translate_curve_to_hw_format( uint32_t j, k, seg_distr[MAX_REGIONS_NUMBER], increment, start_index, hw_points; if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS || - !output_tf->dirty) + (!output_tf->dirty && (lut_params->hw_points_num != 0))) return false; corner_points = lut_params->corner_points; @@ -419,7 +419,7 @@ bool vpe10_cm_helper_translate_curve_to_degamma_hw_format( uint32_t k, seg_distr[MAX_REGIONS_NUMBER_DEGAMMA], num_segments, hw_points; if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS || - !output_tf->dirty) + (!output_tf->dirty && (lut_params->hw_points_num != 0))) return false; corner_points = lut_params->corner_points; diff --git a/src/amd/vpelib/src/chip/vpe10/vpe10_dpp_cm.c b/src/amd/vpelib/src/chip/vpe10/vpe10_dpp_cm.c index 053f5ef006d..14b3bbf338e 100644 --- a/src/amd/vpelib/src/chip/vpe10/vpe10_dpp_cm.c +++ b/src/amd/vpelib/src/chip/vpe10/vpe10_dpp_cm.c @@ -132,7 +132,7 @@ static void vpe10_dpp_program_gammcor_lut( } } -static void vpe10_dpp_program_gamcor_lut(struct dpp *dpp, const struct pwl_params *params) +void vpe10_dpp_program_gamcor_lut(struct dpp *dpp, const struct pwl_params *params) { struct vpe10_xfer_func_reg gam_regs = {0}; @@ -204,7 +204,7 @@ void vpe10_dpp_program_input_transfer_func(struct dpp *dpp, struct transfer_func bypass = ((input_tf->type == TF_TYPE_BYPASS) || dpp->vpe_priv->init.debug.bypass_gamcor); CONFIG_CACHE(input_tf, stream_ctx, vpe_priv->init.debug.disable_lut_caching, bypass, - vpe10_dpp_program_gamcor_lut(dpp, params)); + vpe10_dpp_program_gamcor_lut(dpp, params), dpp->inst); } void vpe10_dpp_program_gamut_remap(struct dpp *dpp, struct colorspace_transform *gamut_remap) diff --git a/src/amd/vpelib/src/chip/vpe10/vpe10_dpp_dscl.c b/src/amd/vpelib/src/chip/vpe10/vpe10_dpp_dscl.c index efd575346d5..2330d46a286 100644 --- a/src/amd/vpelib/src/chip/vpe10/vpe10_dpp_dscl.c +++ b/src/amd/vpelib/src/chip/vpe10/vpe10_dpp_dscl.c @@ -34,43 +34,7 @@ #define LB_MAX_PARTITION 12 -enum vpe10_coef_filter_type_sel { - SCL_COEF_LUMA_VERT_FILTER = 0, - SCL_COEF_LUMA_HORZ_FILTER = 1, - SCL_COEF_CHROMA_VERT_FILTER = 2, - SCL_COEF_CHROMA_HORZ_FILTER = 3, - SCL_COEF_ALPHA_VERT_FILTER = 4, - SCL_COEF_ALPHA_HORZ_FILTER = 5 -}; - -enum dscl_autocal_mode { - AUTOCAL_MODE_OFF = 0, - - /* Autocal calculate the scaling ratio and initial phase and the - * DSCL_MODE_SEL must be set to 1 - */ - AUTOCAL_MODE_AUTOSCALE = 1, - /* Autocal perform auto centering without replication and the - * DSCL_MODE_SEL must be set to 0 - */ - AUTOCAL_MODE_AUTOCENTER = 2, - /* Autocal perform auto centering and auto replication and the - * DSCL_MODE_SEL must be set to 0 - */ - AUTOCAL_MODE_AUTOREPLICATE = 3 -}; - -enum dscl_mode_sel { - DSCL_MODE_SCALING_444_BYPASS = 0, - DSCL_MODE_SCALING_444_RGB_ENABLE = 1, - DSCL_MODE_SCALING_444_YCBCR_ENABLE = 2, - DSCL_MODE_SCALING_420_YCBCR_ENABLE = 3, - DSCL_MODE_SCALING_420_LUMA_BYPASS = 4, - DSCL_MODE_SCALING_420_CHROMA_BYPASS = 5, - DSCL_MODE_DSCL_BYPASS = 6 -}; - -static bool dpp1_dscl_is_ycbcr(const enum vpe_surface_pixel_format format) +bool vpe10_dpp_dscl_is_ycbcr(const enum vpe_surface_pixel_format format) { return format >= VPE_SURFACE_PIXEL_FORMAT_VIDEO_BEGIN && format <= VPE_SURFACE_PIXEL_FORMAT_VIDEO_END; @@ -82,7 +46,7 @@ static bool dpp1_dscl_is_video_subsampled(const enum vpe_surface_pixel_format fo format <= VPE_SURFACE_PIXEL_FORMAT_SUBSAMPLE_END); } -static enum dscl_mode_sel dpp1_dscl_get_dscl_mode(const struct scaler_data *data) +enum vpe10_dscl_mode_sel vpe10_dpp_dscl_get_dscl_mode(const struct scaler_data *data) { // TODO Check if bypass bit enabled @@ -92,7 +56,7 @@ static enum dscl_mode_sel dpp1_dscl_get_dscl_mode(const struct scaler_data *data data->ratios.horz_c.value == one && data->ratios.vert_c.value == one) return DSCL_MODE_DSCL_BYPASS; - if (!dpp1_dscl_is_ycbcr(data->format)) + if (!vpe10_dpp_dscl_is_ycbcr(data->format)) return DSCL_MODE_SCALING_444_RGB_ENABLE; if (!dpp1_dscl_is_video_subsampled(data->format)) @@ -104,7 +68,7 @@ static enum dscl_mode_sel dpp1_dscl_get_dscl_mode(const struct scaler_data *data return DSCL_MODE_SCALING_420_YCBCR_ENABLE; } -static void dpp1_dscl_set_dscl_mode(struct dpp *dpp, enum dscl_mode_sel dscl_mode) +void vpe10_dpp_dscl_set_dscl_mode(struct dpp *dpp, enum vpe10_dscl_mode_sel dscl_mode) { PROGRAM_ENTRY(); @@ -130,21 +94,21 @@ static void dpp1_dscl_set_mpc_size(struct dpp *dpp, const struct scaler_data *sc REG_SET_2(VPMPC_SIZE, 0, VPMPC_WIDTH, scl_data->h_active, VPMPC_HEIGHT, scl_data->v_active); } -static void dpp1_dscl_set_h_blank(struct dpp *dpp, uint16_t start, uint16_t end) +void vpe10_dpp_dscl_set_h_blank(struct dpp *dpp, uint16_t start, uint16_t end) { PROGRAM_ENTRY(); REG_SET_2(VPOTG_H_BLANK, 0, OTG_H_BLANK_END, end, OTG_H_BLANK_START, start); } -static void dpp1_dscl_set_v_blank(struct dpp *dpp, uint16_t start, uint16_t end) +void vpe10_dpp_dscl_set_v_blank(struct dpp *dpp, uint16_t start, uint16_t end) { PROGRAM_ENTRY(); REG_SET_2(VPOTG_V_BLANK, 0, OTG_V_BLANK_END, end, OTG_V_BLANK_START, start); } -static void dpp1_dscl_set_taps(struct dpp *dpp, const struct scaler_data *scl_data) +void vpe10_dpp_dscl_set_taps(struct dpp *dpp, const struct scaler_data *scl_data) { PROGRAM_ENTRY(); @@ -172,7 +136,7 @@ static const uint16_t *dpp1_dscl_get_filter_coeffs_64p(int taps, struct fixed31_ } } -static void dpp1_dscl_set_scaler_filter(struct dpp *dpp, uint32_t taps, +void vpe10_dpp_dscl_set_scaler_filter(struct dpp *dpp, uint32_t taps, enum vpe10_coef_filter_type_sel filter_type, const uint16_t *filter) { const int tap_pairs = (taps + 1) / 2; @@ -206,8 +170,8 @@ static void dpp1_dscl_set_scaler_filter(struct dpp *dpp, uint32_t taps, } } -static void dpp1_dscl_set_scl_filter(struct dpp *dpp, const struct scaler_data *scl_data, - enum dscl_mode_sel scl_mode, bool chroma_coef_mode) +void vpe10_dpp_dscl_set_scl_filter(struct dpp *dpp, const struct scaler_data *scl_data, + enum vpe10_dscl_mode_sel scl_mode, bool chroma_coef_mode) { const uint16_t *filter_h = NULL; @@ -228,11 +192,11 @@ static void dpp1_dscl_set_scl_filter(struct dpp *dpp, const struct scaler_data * filter_v = (const uint16_t *)&scl_data->polyphase_filter_coeffs->vert_polyphase_coeffs; } if (filter_h != NULL) - dpp1_dscl_set_scaler_filter( + vpe10_dpp_dscl_set_scaler_filter( dpp, scl_data->taps.h_taps, SCL_COEF_LUMA_HORZ_FILTER, filter_h); if (filter_v != NULL) - dpp1_dscl_set_scaler_filter( + vpe10_dpp_dscl_set_scaler_filter( dpp, scl_data->taps.v_taps, SCL_COEF_LUMA_VERT_FILTER, filter_v); if (chroma_coef_mode) { @@ -243,18 +207,18 @@ static void dpp1_dscl_set_scl_filter(struct dpp *dpp, const struct scaler_data * dpp1_dscl_get_filter_coeffs_64p((int)scl_data->taps.v_taps_c, scl_data->ratios.vert_c); if (filter_h_c != NULL) - dpp1_dscl_set_scaler_filter( + vpe10_dpp_dscl_set_scaler_filter( dpp, scl_data->taps.h_taps_c, SCL_COEF_CHROMA_HORZ_FILTER, filter_h_c); if (filter_v_c != NULL) - dpp1_dscl_set_scaler_filter( + vpe10_dpp_dscl_set_scaler_filter( dpp, scl_data->taps.v_taps_c, SCL_COEF_CHROMA_VERT_FILTER, filter_v_c); } REG_UPDATE(VPDSCL_MODE, SCL_CHROMA_COEF_MODE, chroma_coef_mode); } -static void dpp1_dscl_set_lb(struct dpp *dpp, const struct line_buffer_params *lb_params, +void vpe10_dpp_dscl_set_lb(struct dpp *dpp, const struct line_buffer_params *lb_params, enum lb_memory_config mem_size_config) { @@ -266,7 +230,7 @@ static void dpp1_dscl_set_lb(struct dpp *dpp, const struct line_buffer_params *l VPLB_MEMORY_CTRL, 0, MEMORY_CONFIG, mem_size_config, LB_MAX_PARTITIONS, LB_MAX_PARTITION); } -static void dpp1_dscl_set_scale_ratio(struct dpp *dpp, const struct scaler_data *data) +void vpe10_dpp_dscl_set_scale_ratio(struct dpp *dpp, const struct scaler_data *data) { PROGRAM_ENTRY(); @@ -313,7 +277,7 @@ static void dpp1_dscl_set_scaler_position(struct dpp *dpp, const struct scaler_d VPDSCL_VERT_FILTER_INIT_C, 0, SCL_V_INIT_FRAC_C, init_frac, SCL_V_INIT_INT_C, init_int); } -static void dpp1_power_on_dscl(struct dpp *dpp, bool power_on) +void vpe10_dpp_power_on_dscl(struct dpp *dpp, bool power_on) { PROGRAM_ENTRY(); @@ -346,7 +310,7 @@ static void dpp1_power_on_dscl(struct dpp *dpp, bool power_on) void vpe10_dpp_set_segment_scaler(struct dpp *dpp, const struct scaler_data *scl_data) { - enum dscl_mode_sel dscl_mode = dpp1_dscl_get_dscl_mode(scl_data); + enum vpe10_dscl_mode_sel dscl_mode = vpe10_dpp_dscl_get_dscl_mode(scl_data); dpp1_dscl_set_recout(dpp, &scl_data->recout); dpp1_dscl_set_mpc_size(dpp, scl_data); @@ -360,24 +324,24 @@ void vpe10_dpp_set_segment_scaler(struct dpp *dpp, const struct scaler_data *scl void vpe10_dpp_set_frame_scaler(struct dpp *dpp, const struct scaler_data *scl_data) { - enum dscl_mode_sel dscl_mode = dpp1_dscl_get_dscl_mode(scl_data); - bool ycbcr = dpp1_dscl_is_ycbcr(scl_data->format); + enum vpe10_dscl_mode_sel dscl_mode = vpe10_dpp_dscl_get_dscl_mode(scl_data); + bool ycbcr = vpe10_dpp_dscl_is_ycbcr(scl_data->format); - dpp1_dscl_set_h_blank(dpp, 1, 0); - dpp1_dscl_set_v_blank(dpp, 1, 0); + vpe10_dpp_dscl_set_h_blank(dpp, 1, 0); + vpe10_dpp_dscl_set_v_blank(dpp, 1, 0); if (dscl_mode != DSCL_MODE_DSCL_BYPASS) - dpp1_power_on_dscl(dpp, true); + vpe10_dpp_power_on_dscl(dpp, true); - dpp1_dscl_set_dscl_mode(dpp, dscl_mode); + vpe10_dpp_dscl_set_dscl_mode(dpp, dscl_mode); if (dscl_mode == DSCL_MODE_DSCL_BYPASS) { - dpp1_power_on_dscl(dpp, false); + vpe10_dpp_power_on_dscl(dpp, false); return; } - dpp1_dscl_set_lb(dpp, &scl_data->lb_params, LB_MEMORY_CONFIG_0); - dpp1_dscl_set_scale_ratio(dpp, scl_data); - dpp1_dscl_set_taps(dpp, scl_data); - dpp1_dscl_set_scl_filter(dpp, scl_data, dscl_mode, ycbcr); + vpe10_dpp_dscl_set_lb(dpp, &scl_data->lb_params, LB_MEMORY_CONFIG_0); + vpe10_dpp_dscl_set_scale_ratio(dpp, scl_data); + vpe10_dpp_dscl_set_taps(dpp, scl_data); + vpe10_dpp_dscl_set_scl_filter(dpp, scl_data, dscl_mode, ycbcr); } diff --git a/src/amd/vpelib/src/chip/vpe10/vpe10_mpc.c b/src/amd/vpelib/src/chip/vpe10/vpe10_mpc.c index 31b9d2d19ce..d419b26eaa2 100644 --- a/src/amd/vpelib/src/chip/vpe10/vpe10_mpc.c +++ b/src/amd/vpelib/src/chip/vpe10/vpe10_mpc.c @@ -1263,12 +1263,12 @@ void vpe10_mpc_set_mpc_shaper_3dlut( bypass = (!shaper_lut || (func_shaper && func_shaper->type == TF_TYPE_BYPASS)); CONFIG_CACHE(func_shaper, stream_ctx, vpe_priv->init.debug.disable_lut_caching, bypass, - mpc->funcs->program_shaper(mpc, shaper_lut)); + mpc->funcs->program_shaper(mpc, shaper_lut), mpc->inst); bypass = (!lut3d_func || !lut3d_func->state.bits.initialized); lut3d_params = (bypass) ? (NULL) : (&lut3d_func->lut_3d); CONFIG_CACHE(lut3d_func, stream_ctx, vpe_priv->init.debug.disable_lut_caching, bypass, - mpc->funcs->program_3dlut(mpc, lut3d_params)); + mpc->funcs->program_3dlut(mpc, lut3d_params), mpc->inst); return; } @@ -1298,7 +1298,7 @@ void vpe10_mpc_set_output_transfer_func(struct mpc *mpc, struct output_ctx *outp vpe_priv->init.debug.cm_in_bypass || vpe_priv->init.debug.bypass_ogam); CONFIG_CACHE(output_ctx->output_tf, output_ctx, vpe_priv->init.debug.disable_lut_caching, - bypass, mpc->funcs->set_output_gamma(mpc, params)); + bypass, mpc->funcs->set_output_gamma(mpc, params), mpc->inst); } void vpe10_mpc_set_blend_lut(struct mpc *mpc, struct transfer_func *blend_tf) @@ -1328,7 +1328,7 @@ void vpe10_mpc_set_blend_lut(struct mpc *mpc, struct transfer_func *blend_tf) ((!blend_tf) || (blend_tf->type == TF_TYPE_BYPASS) || vpe_priv->init.debug.bypass_blndgam); CONFIG_CACHE(blend_tf, stream_ctx, vpe_priv->init.debug.disable_lut_caching, bypass, - mpc->funcs->program_1dlut(mpc, blend_lut, gamma_type)); + mpc->funcs->program_1dlut(mpc, blend_lut, gamma_type), mpc->inst); } bool vpe10_mpc_program_movable_cm(struct mpc *mpc, struct transfer_func *func_shaper, diff --git a/src/amd/vpelib/src/core/color.c b/src/amd/vpelib/src/core/color.c index 3b5ea03a507..3e385b6858c 100644 --- a/src/amd/vpelib/src/core/color.c +++ b/src/amd/vpelib/src/core/color.c @@ -181,29 +181,33 @@ static bool color_update_regamma_tf(struct vpe_priv *vpe_priv, break; } - if (vpe_priv->init.debug.disable_lut_caching || - (output_tf->cache_info.cm_gamma_type != output_tf->cm_gamma_type) || - (output_tf->cache_info.tf != output_tf->tf) || - (output_tf->cache_info.x_scale.value != x_scale.value) || - (output_tf->cache_info.y_scale.value != y_scale.value) || - (output_tf->cache_info.y_bias.value != y_bias.value)) { - // if gamma points have been previously generated, - // skip the re-gen no matter it was config cached or not - update = true; + for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_dpp; i++) { + if (vpe_priv->init.debug.disable_lut_caching || + (output_tf->cache_info[i].cm_gamma_type != output_tf->cm_gamma_type) || + (output_tf->cache_info[i].tf != output_tf->tf) || + (output_tf->cache_info[i].x_scale.value != x_scale.value) || + (output_tf->cache_info[i].y_scale.value != y_scale.value) || + (output_tf->cache_info[i].y_bias.value != y_bias.value)) { + // if gamma points have been previously generated, + // skip the re-gen no matter it was config cached or not + update = true; + } } if (update) { ret = vpe_color_calculate_regamma_params( vpe_priv, x_scale, y_scale, &vpe_priv->cal_buffer, output_tf); if (ret) { - // reset the cache status and mark as dirty to let hw layer to re-cache - output_tf->dirty = true; - output_tf->config_cache.cached = false; - output_tf->cache_info.cm_gamma_type = output_tf->cm_gamma_type; - output_tf->cache_info.tf = output_tf->tf; - output_tf->cache_info.x_scale = x_scale; - output_tf->cache_info.y_scale = y_scale; - output_tf->cache_info.y_bias = y_bias; + for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_dpp; i++) { + // reset the cache status and mark as dirty to let hw layer to re-cache + output_tf->dirty[i] = true; + output_tf->config_cache[i].cached = false; + output_tf->cache_info[i].cm_gamma_type = output_tf->cm_gamma_type; + output_tf->cache_info[i].tf = output_tf->tf; + output_tf->cache_info[i].x_scale = x_scale; + output_tf->cache_info[i].y_scale = y_scale; + output_tf->cache_info[i].y_bias = y_bias; + } } } return ret; @@ -240,28 +244,32 @@ static bool color_update_degamma_tf(struct vpe_priv *vpe_priv, break; } - if (vpe_priv->init.debug.disable_lut_caching || - (input_tf->cache_info.cm_gamma_type != input_tf->cm_gamma_type) || - (input_tf->cache_info.tf != input_tf->tf) || - (input_tf->cache_info.x_scale.value != x_scale.value) || - (input_tf->cache_info.y_scale.value != y_scale.value) || - (input_tf->cache_info.y_bias.value != y_bias.value)) { - // if gamma points have been previously generated, - // skip the re-gen no matter it was config cached or not - update = true; + for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_dpp; i++) { + if (vpe_priv->init.debug.disable_lut_caching || + (input_tf->cache_info[i].cm_gamma_type != input_tf->cm_gamma_type) || + (input_tf->cache_info[i].tf != input_tf->tf) || + (input_tf->cache_info[i].x_scale.value != x_scale.value) || + (input_tf->cache_info[i].y_scale.value != y_scale.value) || + (input_tf->cache_info[i].y_bias.value != y_bias.value)) { + // if gamma points have been previously generated, + // skip the re-gen no matter it was config cached or not + update = true; + } } if (update) { ret = vpe_color_calculate_degamma_params(vpe_priv, x_scale, y_scale, input_tf); if (ret) { - // reset the cache status and mark as dirty to let hw layer to re-cache - input_tf->dirty = true; - input_tf->config_cache.cached = false; - input_tf->cache_info.cm_gamma_type = input_tf->cm_gamma_type; - input_tf->cache_info.tf = color_input_tf; - input_tf->cache_info.x_scale = x_scale; - input_tf->cache_info.y_scale = y_scale; - input_tf->cache_info.y_bias = y_bias; + for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_dpp; i++) { + // reset the cache status and mark as dirty to let hw layer to re-cache + input_tf->dirty[i] = true; + input_tf->config_cache[i].cached = false; + input_tf->cache_info[i].cm_gamma_type = input_tf->cm_gamma_type; + input_tf->cache_info[i].tf = color_input_tf; + input_tf->cache_info[i].x_scale = x_scale; + input_tf->cache_info[i].y_scale = y_scale; + input_tf->cache_info[i].y_bias = y_bias; + } } } return ret; @@ -673,13 +681,22 @@ enum vpe_status vpe_color_update_3dlut( if (!enable_3dlut) { stream_ctx->lut3d_func->state.bits.initialized = 0; } else { - if (vpe_priv->init.debug.disable_lut_caching || - (stream_ctx->lut3d_func->cache_info.uid_3dlut != stream_ctx->stream.tm_params.UID)) { + bool update = false; + + for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_mpc_3dlut; i++) + if (vpe_priv->init.debug.disable_lut_caching || + (stream_ctx->lut3d_func->cache_info[i].uid_3dlut != + stream_ctx->stream.tm_params.UID)) + update = true; + + if (update) { vpe_convert_to_tetrahedral( vpe_priv, stream_ctx->stream.tm_params.lut_data, stream_ctx->lut3d_func); - stream_ctx->lut3d_func->dirty = true; - stream_ctx->lut3d_func->config_cache.cached = false; - stream_ctx->lut3d_func->cache_info.uid_3dlut = stream_ctx->stream.tm_params.UID; + for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_mpc_3dlut; i++) { + stream_ctx->lut3d_func->dirty[i] = true; + stream_ctx->lut3d_func->config_cache[i].cached = false; + stream_ctx->lut3d_func->cache_info[i].uid_3dlut = stream_ctx->stream.tm_params.UID; + } } stream_ctx->lut3d_func->state.bits.initialized = 1; } @@ -812,10 +829,12 @@ enum vpe_status vpe_color_update_shaper(const struct vpe_priv *vpe_priv, uint16_ } // right now shaper is always programmed with linear, once cached, it is always reused. - if (vpe_priv->init.debug.disable_lut_caching || - (shaper_func && shaper_func->cache_info.tf != tf)) { - // if the caching has the required data cached, skip the update - update = true; + for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_mpc_3dlut; i++) { + if (vpe_priv->init.debug.disable_lut_caching || + (shaper_func && shaper_func->cache_info[i].tf != tf)) { + // if the caching has the required data cached, skip the update + update = true; + } } shaper_func->type = TF_TYPE_HWPWL; @@ -829,9 +848,11 @@ enum vpe_status vpe_color_update_shaper(const struct vpe_priv *vpe_priv, uint16_ ret = vpe_build_shaper(&shaper_in, &shaper_func->pwl); if (ret == VPE_STATUS_OK) { - shaper_func->dirty = true; - shaper_func->config_cache.cached = false; - shaper_func->cache_info.tf = tf; + for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_mpc_3dlut; i++) { + shaper_func->dirty[i] = true; + shaper_func->config_cache[i].cached = false; + shaper_func->cache_info[i].tf = tf; + } } } return ret; diff --git a/src/amd/vpelib/src/core/inc/cdc.h b/src/amd/vpelib/src/core/inc/cdc.h index 7f519325228..154e2b42117 100644 --- a/src/amd/vpelib/src/core/inc/cdc.h +++ b/src/amd/vpelib/src/core/inc/cdc.h @@ -61,6 +61,7 @@ struct cdc_funcs { struct cdc { struct vpe_priv *vpe_priv; struct cdc_funcs *funcs; + unsigned int inst; }; #ifdef __cplusplus diff --git a/src/amd/vpelib/src/core/inc/color.h b/src/amd/vpelib/src/core/inc/color.h index b2d084a9e39..5640aea293c 100644 --- a/src/amd/vpelib/src/core/inc/color.h +++ b/src/amd/vpelib/src/core/inc/color.h @@ -147,6 +147,14 @@ struct transfer_func_distributed_points { uint16_t x_point_at_y1_blue; }; +struct cache_info { + enum color_transfer_func tf; + enum cm_type cm_gamma_type; + struct fixed31_32 x_scale; + struct fixed31_32 y_scale; + struct fixed31_32 y_bias; +}; + struct transfer_func { enum transfer_func_type type; enum color_transfer_func tf; @@ -161,16 +169,10 @@ struct transfer_func { }; // the followings are for optimization: skip if no change - bool dirty; /*< indicate this object is updated or not */ - struct config_cache config_cache; /*< used by the hw hook layer to do the caching */ + bool dirty[MAX_PIPE]; /*< indicate this object is updated or not */ + struct config_cache config_cache[MAX_PIPE]; /*< used by the hw hook layer to do the caching */ - struct { - enum color_transfer_func tf; - enum cm_type cm_gamma_type; - struct fixed31_32 x_scale; - struct fixed31_32 y_scale; - struct fixed31_32 y_bias; - } cache_info; + struct cache_info cache_info[MAX_PIPE]; }; enum color_white_point_type { @@ -237,12 +239,12 @@ struct vpe_3dlut { union vpe_3dlut_state state; // the followings are for optimization: skip if no change - bool dirty; /*< indicate this object is updated or not */ - struct config_cache config_cache; /*< used by the hw hook layer to do the caching */ + bool dirty[MAX_3DLUT]; /*< indicate this object is updated or not */ + struct config_cache config_cache[MAX_3DLUT]; /*< used by the hw hook layer to do the caching */ struct { uint64_t uid_3dlut; /*< UID for current 3D LUT params */ - } cache_info; + } cache_info[MAX_3DLUT]; }; enum vpe_status vpe_color_update_color_space_and_tf( diff --git a/src/amd/vpelib/src/core/inc/config_cache.h b/src/amd/vpelib/src/core/inc/config_cache.h index 5ca6c389311..489d3f3c80c 100644 --- a/src/amd/vpelib/src/core/inc/config_cache.h +++ b/src/amd/vpelib/src/core/inc/config_cache.h @@ -73,7 +73,6 @@ struct config_cache { bool cached; }; - /* A macro that helps cache the config packet, it won't cache if it is in bypass mode * as bypass mode is not heavy lifting programming. * @@ -82,26 +81,28 @@ struct config_cache { * /param disable_cache a flag that controls a caching is needed * /param is_bypass if it is in bypass, it doesn't cache the bypass config * /param program_func_call the program call that generate config packet content + * /param inst index to address the config_cache array */ -#define CONFIG_CACHE(obj_cache, obj_cfg_array, disable_cache, is_bypass, program_func_call) \ +#define CONFIG_CACHE(obj_cache, obj_cfg_array, disable_cache, is_bypass, program_func_call, inst) \ { \ bool use_cache = false; \ \ /* make sure it opens a new config packet */ \ config_writer_force_new_with_type(config_writer, CONFIG_TYPE_DIRECT); \ \ - if ((obj_cache) && !disable_cache && (obj_cache)->config_cache.p_buffer && \ - (obj_cache)->config_cache.cached && !((obj_cache)->dirty) && !is_bypass) { \ + if ((obj_cache) && !disable_cache && (obj_cache)->config_cache[inst].p_buffer && \ + (obj_cache)->config_cache[inst].cached && !((obj_cache)->dirty[inst]) && !is_bypass) { \ /* reuse the cache */ \ - if (config_writer->buf->size >= (obj_cache)->config_cache.size) { \ + if (config_writer->buf->size >= (obj_cache)->config_cache[inst].size) { \ memcpy((void *)(uintptr_t)config_writer->base_cpu_va, \ - (obj_cache)->config_cache.p_buffer, \ - (size_t)(obj_cache)->config_cache.size); \ + (obj_cache)->config_cache[inst].p_buffer, \ + (size_t)(obj_cache)->config_cache[inst].size); \ config_writer->buf->cpu_va = \ - config_writer->base_cpu_va + (obj_cache)->config_cache.size; \ + config_writer->base_cpu_va + (obj_cache)->config_cache[inst].size; \ config_writer->buf->gpu_va = \ - config_writer->base_gpu_va + (obj_cache)->config_cache.size; \ - config_writer->buf->size -= ((obj_cache)->config_cache.size - sizeof(uint32_t)); \ + config_writer->base_gpu_va + (obj_cache)->config_cache[inst].size; \ + config_writer->buf->size -= \ + ((obj_cache)->config_cache[inst].size - sizeof(uint32_t)); \ use_cache = true; \ } \ } \ @@ -117,21 +118,21 @@ struct config_cache { if (!disable_cache && !is_bypass) { \ /* only cache when it is not crossing config packets */ \ if (config_num == (obj_cfg_array)->num_configs) { \ - if ((obj_cache)->dirty) { \ + if ((obj_cache)->dirty[inst]) { \ uint64_t size = end - start; \ \ - if ((obj_cache)->config_cache.size < size) { \ - if ((obj_cache)->config_cache.p_buffer) \ - vpe_free((obj_cache)->config_cache.p_buffer); \ + if ((obj_cache)->config_cache[inst].size < size) { \ + if ((obj_cache)->config_cache[inst].p_buffer) \ + vpe_free((obj_cache)->config_cache[inst].p_buffer); \ \ - (obj_cache)->config_cache.p_buffer = vpe_zalloc((size_t)size); \ - if ((obj_cache)->config_cache.p_buffer) { \ - memcpy((obj_cache)->config_cache.p_buffer, \ + (obj_cache)->config_cache[inst].p_buffer = vpe_zalloc((size_t)size); \ + if ((obj_cache)->config_cache[inst].p_buffer) { \ + memcpy((obj_cache)->config_cache[inst].p_buffer, \ (void *)(uintptr_t)start, (size_t)size); \ - (obj_cache)->config_cache.size = size; \ - (obj_cache)->config_cache.cached = true; \ + (obj_cache)->config_cache[inst].size = size; \ + (obj_cache)->config_cache[inst].cached = true; \ } else { \ - (obj_cache)->config_cache.size = 0; \ + (obj_cache)->config_cache[inst].size = 0; \ } \ } \ } \ @@ -139,7 +140,7 @@ struct config_cache { } \ } \ if ((obj_cache)) \ - (obj_cache)->dirty = false; \ + (obj_cache)->dirty[inst] = false; \ } /* the following macro requires a local variable vpr_priv to be present */ diff --git a/src/amd/vpelib/src/core/inc/dpp.h b/src/amd/vpelib/src/core/inc/dpp.h index 5edf1a15f1e..97cbb302599 100644 --- a/src/amd/vpelib/src/core/inc/dpp.h +++ b/src/amd/vpelib/src/core/inc/dpp.h @@ -99,6 +99,7 @@ struct dpp_funcs { struct dpp { struct vpe_priv *vpe_priv; struct dpp_funcs *funcs; + unsigned int inst; struct pwl_params degamma_params; }; diff --git a/src/amd/vpelib/src/core/inc/hw_shared.h b/src/amd/vpelib/src/core/inc/hw_shared.h index 464358dd7ec..7df534ee471 100644 --- a/src/amd/vpelib/src/core/inc/hw_shared.h +++ b/src/amd/vpelib/src/core/inc/hw_shared.h @@ -26,6 +26,11 @@ #include "fixed31_32.h" +#define MAX_3DLUT 1 + +#define MAX_PIPE 2 +#define MAX_OUTPUT_PIPE 1 + #ifdef __cplusplus extern "C" { #endif diff --git a/src/amd/vpelib/src/core/inc/mpc.h b/src/amd/vpelib/src/core/inc/mpc.h index 5663dd36d6f..d9dd0aa7cf0 100644 --- a/src/amd/vpelib/src/core/inc/mpc.h +++ b/src/amd/vpelib/src/core/inc/mpc.h @@ -171,6 +171,7 @@ struct mpc_funcs { struct mpc { struct vpe_priv *vpe_priv; struct mpc_funcs *funcs; + unsigned int inst; struct pwl_params regamma_params; struct pwl_params blender_params; struct pwl_params shaper_params; diff --git a/src/amd/vpelib/src/core/inc/opp.h b/src/amd/vpelib/src/core/inc/opp.h index c202228f948..0180fa53d68 100644 --- a/src/amd/vpelib/src/core/inc/opp.h +++ b/src/amd/vpelib/src/core/inc/opp.h @@ -121,6 +121,7 @@ struct opp_funcs { struct opp { struct vpe_priv *vpe_priv; struct opp_funcs *funcs; + unsigned int inst; }; #ifdef __cplusplus diff --git a/src/amd/vpelib/src/core/inc/resource.h b/src/amd/vpelib/src/core/inc/resource.h index 99d1319b063..57d63516b75 100644 --- a/src/amd/vpelib/src/core/inc/resource.h +++ b/src/amd/vpelib/src/core/inc/resource.h @@ -32,6 +32,7 @@ #include "mpc.h" #include "opp.h" #include "vector.h" +#include "hw_shared.h" #ifdef __cplusplus extern "C" { @@ -41,8 +42,6 @@ struct vpe_priv; struct vpe_cmd_info; struct segment_ctx; -#define MAX_PIPE 2 -#define MAX_OUTPUT_PIPE 2 #define MIN_VPE_CMD 1024 enum vpe_cmd_ops; diff --git a/src/amd/vpelib/src/core/resource.c b/src/amd/vpelib/src/core/resource.c index afb5def0af7..12c769ff7da 100644 --- a/src/amd/vpelib/src/core/resource.c +++ b/src/amd/vpelib/src/core/resource.c @@ -199,7 +199,8 @@ void vpe_free_stream_ctx(struct vpe_priv *vpe_priv) for (i = 0; i < vpe_priv->num_streams; i++) { ctx = &vpe_priv->stream_ctx[i]; if (ctx->input_tf) { - CONFIG_CACHE_FREE(ctx->input_tf->config_cache); + for (int j = 0; j < MAX_PIPE; j++) + CONFIG_CACHE_FREE(ctx->input_tf->config_cache[j]); vpe_free(ctx->input_tf); ctx->input_tf = NULL; } @@ -220,19 +221,22 @@ void vpe_free_stream_ctx(struct vpe_priv *vpe_priv) } if (ctx->in_shaper_func) { - CONFIG_CACHE_FREE(ctx->in_shaper_func->config_cache); + for (int j = 0; j < MAX_PIPE; j++) + CONFIG_CACHE_FREE(ctx->in_shaper_func->config_cache[j]); vpe_free(ctx->in_shaper_func); ctx->in_shaper_func = NULL; } if (ctx->blend_tf) { - CONFIG_CACHE_FREE(ctx->blend_tf->config_cache); + for (int j = 0; j < MAX_PIPE; j++) + CONFIG_CACHE_FREE(ctx->blend_tf->config_cache[j]); vpe_free(ctx->blend_tf); ctx->blend_tf = NULL; } if (ctx->lut3d_func) { - CONFIG_CACHE_FREE(ctx->lut3d_func->config_cache); + for (int j = 0; j < MAX_3DLUT; j++) + CONFIG_CACHE_FREE(ctx->lut3d_func->config_cache[j]); vpe_free(ctx->lut3d_func); ctx->lut3d_func = NULL; } @@ -264,6 +268,7 @@ void vpe_pipe_reset(struct vpe_priv *vpe_priv) for (i = 0; i < vpe_priv->num_pipe; i++) { pipe_ctx = &vpe_priv->pipe_ctx[i]; + pipe_ctx->pipe_idx = i; pipe_ctx->is_top_pipe = true; pipe_ctx->owner = PIPE_CTX_NO_OWNER; pipe_ctx->top_pipe_idx = 0xff;