From ded1a2b3f0e7e45cce609e973b131833dece3710 Mon Sep 17 00:00:00 2001
From: Brendan <brendanSteve.leder@amd.com>
Date: Thu, 27 Jun 2024 14:56:54 -0400
Subject: [PATCH] amd/vpelib: Multiple instance support in caching framework

Generalize the caching to work with multiple instances of objects.
Change some static functions to public functions to maximize function
re-use possibilities.

Reviewed-by: Roy Chan <Roy.Chan@amd.com>
Acked-by: Chih-Wei Chien <Chih-Wei.Chien@amd.com>
Signed-off-by: Brendan <brendanSteve.leder@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31605>
---
 src/amd/vpelib/src/chip/vpe10/inc/vpe10_dpp.h |  63 ++++++++++
 .../vpelib/src/chip/vpe10/vpe10_cm_common.c   |   4 +-
 src/amd/vpelib/src/chip/vpe10/vpe10_dpp_cm.c  |   4 +-
 .../vpelib/src/chip/vpe10/vpe10_dpp_dscl.c    |  94 +++++----------
 src/amd/vpelib/src/chip/vpe10/vpe10_mpc.c     |   8 +-
 src/amd/vpelib/src/core/color.c               | 113 +++++++++++-------
 src/amd/vpelib/src/core/inc/cdc.h             |   1 +
 src/amd/vpelib/src/core/inc/color.h           |  26 ++--
 src/amd/vpelib/src/core/inc/config_cache.h    |  43 +++----
 src/amd/vpelib/src/core/inc/dpp.h             |   1 +
 src/amd/vpelib/src/core/inc/hw_shared.h       |   5 +
 src/amd/vpelib/src/core/inc/mpc.h             |   1 +
 src/amd/vpelib/src/core/inc/opp.h             |   1 +
 src/amd/vpelib/src/core/inc/resource.h        |   3 +-
 src/amd/vpelib/src/core/resource.c            |  13 +-
 15 files changed, 222 insertions(+), 158 deletions(-)

diff --git a/src/amd/vpelib/src/chip/vpe10/inc/vpe10_dpp.h b/src/amd/vpelib/src/chip/vpe10/inc/vpe10_dpp.h
index 164eb633cb4..65ae0375ca9 100644
--- a/src/amd/vpelib/src/chip/vpe10/inc/vpe10_dpp.h
+++ b/src/amd/vpelib/src/chip/vpe10/inc/vpe10_dpp.h
@@ -887,6 +887,69 @@ void vpe10_dpp_set_segment_scaler(struct dpp *dpp, const struct scaler_data *scl
 
 void vpe10_dpp_set_frame_scaler(struct dpp *dpp, const struct scaler_data *scl_data);
 
+/*Scalar helper functions*/
+enum vpe10_coef_filter_type_sel {
+    SCL_COEF_LUMA_VERT_FILTER   = 0,
+    SCL_COEF_LUMA_HORZ_FILTER   = 1,
+    SCL_COEF_CHROMA_VERT_FILTER = 2,
+    SCL_COEF_CHROMA_HORZ_FILTER = 3,
+    SCL_COEF_ALPHA_VERT_FILTER  = 4,
+    SCL_COEF_ALPHA_HORZ_FILTER  = 5,
+};
+
+enum vpe10_dscl_autocal_mode {
+    AUTOCAL_MODE_OFF = 0,
+
+    /* Autocal calculate the scaling ratio and initial phase and the
+     * DSCL_MODE_SEL must be set to 1
+     */
+    AUTOCAL_MODE_AUTOSCALE = 1,
+    /* Autocal perform auto centering without replication and the
+     * DSCL_MODE_SEL must be set to 0
+     */
+    AUTOCAL_MODE_AUTOCENTER = 2,
+    /* Autocal perform auto centering and auto replication and the
+     * DSCL_MODE_SEL must be set to 0
+     */
+    AUTOCAL_MODE_AUTOREPLICATE = 3
+};
+
+enum vpe10_dscl_mode_sel {
+    DSCL_MODE_SCALING_444_BYPASS        = 0,
+    DSCL_MODE_SCALING_444_RGB_ENABLE    = 1,
+    DSCL_MODE_SCALING_444_YCBCR_ENABLE  = 2,
+    DSCL_MODE_SCALING_420_YCBCR_ENABLE  = 3,
+    DSCL_MODE_SCALING_420_LUMA_BYPASS   = 4,
+    DSCL_MODE_SCALING_420_CHROMA_BYPASS = 5,
+    DSCL_MODE_DSCL_BYPASS               = 6
+};
+void vpe10_dpp_dscl_set_h_blank(struct dpp *dpp, uint16_t start, uint16_t end);
+
+void vpe10_dpp_dscl_set_v_blank(struct dpp *dpp, uint16_t start, uint16_t end);
+
+void vpe10_dpp_power_on_dscl(struct dpp *dpp, bool power_on);
+
+void vpe10_dpp_dscl_set_lb(struct dpp *dpp, const struct line_buffer_params *lb_params,
+    enum lb_memory_config mem_size_config);
+
+void vpe10_dpp_dscl_set_scale_ratio(struct dpp *dpp, const struct scaler_data *data);
+
+void vpe10_dpp_dscl_set_taps(struct dpp *dpp, const struct scaler_data *scl_data);
+
+void vpe10_dpp_dscl_set_scl_filter(struct dpp *dpp, const struct scaler_data *scl_data,
+    enum vpe10_dscl_mode_sel scl_mode, bool chroma_coef_mode);
+
+void vpe10_dpp_dscl_set_dscl_mode(struct dpp *dpp, enum vpe10_dscl_mode_sel dscl_mode);
+
+enum vpe10_dscl_mode_sel vpe10_dpp_dscl_get_dscl_mode(const struct scaler_data *data);
+
+void vpe10_dpp_dscl_set_scaler_filter(struct dpp *dpp, uint32_t taps,
+    enum vpe10_coef_filter_type_sel filter_type, const uint16_t *filter);
+
+bool vpe10_dpp_dscl_is_ycbcr(const enum vpe_surface_pixel_format format);
+
+void vpe10_dpp_program_gamcor_lut(struct dpp *dpp, const struct pwl_params *params);
+
 uint32_t vpe10_get_line_buffer_size(void);
 
 bool vpe10_dpp_validate_number_of_taps(struct dpp *dpp, struct scaler_data *scl_data);
diff --git a/src/amd/vpelib/src/chip/vpe10/vpe10_cm_common.c b/src/amd/vpelib/src/chip/vpe10/vpe10_cm_common.c
index 349de86da03..7ade007b378 100644
--- a/src/amd/vpelib/src/chip/vpe10/vpe10_cm_common.c
+++ b/src/amd/vpelib/src/chip/vpe10/vpe10_cm_common.c
@@ -233,7 +233,7 @@ bool vpe10_cm_helper_translate_curve_to_hw_format(
     uint32_t j, k, seg_distr[MAX_REGIONS_NUMBER], increment, start_index, hw_points;
 
     if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS ||
-        !output_tf->dirty)
+        (!output_tf->dirty && (lut_params->hw_points_num != 0)))
         return false;
 
     corner_points = lut_params->corner_points;
@@ -419,7 +419,7 @@ bool vpe10_cm_helper_translate_curve_to_degamma_hw_format(
     uint32_t k, seg_distr[MAX_REGIONS_NUMBER_DEGAMMA], num_segments, hw_points;
 
     if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS ||
-        !output_tf->dirty)
+        (!output_tf->dirty && (lut_params->hw_points_num != 0)))
         return false;
 
     corner_points = lut_params->corner_points;
diff --git a/src/amd/vpelib/src/chip/vpe10/vpe10_dpp_cm.c b/src/amd/vpelib/src/chip/vpe10/vpe10_dpp_cm.c
index 053f5ef006d..14b3bbf338e 100644
--- a/src/amd/vpelib/src/chip/vpe10/vpe10_dpp_cm.c
+++ b/src/amd/vpelib/src/chip/vpe10/vpe10_dpp_cm.c
@@ -132,7 +132,7 @@ static void vpe10_dpp_program_gammcor_lut(
     }
 }
 
-static void vpe10_dpp_program_gamcor_lut(struct dpp *dpp, const struct pwl_params *params)
+void vpe10_dpp_program_gamcor_lut(struct dpp *dpp, const struct pwl_params *params)
 {
     struct vpe10_xfer_func_reg gam_regs = {0};
 
@@ -204,7 +204,7 @@ void vpe10_dpp_program_input_transfer_func(struct dpp *dpp, struct transfer_func
     bypass = ((input_tf->type == TF_TYPE_BYPASS) || dpp->vpe_priv->init.debug.bypass_gamcor);
 
     CONFIG_CACHE(input_tf, stream_ctx, vpe_priv->init.debug.disable_lut_caching, bypass,
-        vpe10_dpp_program_gamcor_lut(dpp, params));
+        vpe10_dpp_program_gamcor_lut(dpp, params), dpp->inst);
 }
 
 void vpe10_dpp_program_gamut_remap(struct dpp *dpp, struct colorspace_transform *gamut_remap)
diff --git a/src/amd/vpelib/src/chip/vpe10/vpe10_dpp_dscl.c b/src/amd/vpelib/src/chip/vpe10/vpe10_dpp_dscl.c
index efd575346d5..2330d46a286 100644
--- a/src/amd/vpelib/src/chip/vpe10/vpe10_dpp_dscl.c
+++ b/src/amd/vpelib/src/chip/vpe10/vpe10_dpp_dscl.c
@@ -34,43 +34,7 @@
 
 #define LB_MAX_PARTITION 12
 
-enum vpe10_coef_filter_type_sel {
-    SCL_COEF_LUMA_VERT_FILTER   = 0,
-    SCL_COEF_LUMA_HORZ_FILTER   = 1,
-    SCL_COEF_CHROMA_VERT_FILTER = 2,
-    SCL_COEF_CHROMA_HORZ_FILTER = 3,
-    SCL_COEF_ALPHA_VERT_FILTER  = 4,
-    SCL_COEF_ALPHA_HORZ_FILTER  = 5
-};
-
-enum dscl_autocal_mode {
-    AUTOCAL_MODE_OFF = 0,
-
-    /* Autocal calculate the scaling ratio and initial phase and the
-     * DSCL_MODE_SEL must be set to 1
-     */
-    AUTOCAL_MODE_AUTOSCALE = 1,
-    /* Autocal perform auto centering without replication and the
-     * DSCL_MODE_SEL must be set to 0
-     */
-    AUTOCAL_MODE_AUTOCENTER = 2,
-    /* Autocal perform auto centering and auto replication and the
-     * DSCL_MODE_SEL must be set to 0
-     */
-    AUTOCAL_MODE_AUTOREPLICATE = 3
-};
-
-enum dscl_mode_sel {
-    DSCL_MODE_SCALING_444_BYPASS        = 0,
-    DSCL_MODE_SCALING_444_RGB_ENABLE    = 1,
-    DSCL_MODE_SCALING_444_YCBCR_ENABLE  = 2,
-    DSCL_MODE_SCALING_420_YCBCR_ENABLE  = 3,
-    DSCL_MODE_SCALING_420_LUMA_BYPASS   = 4,
-    DSCL_MODE_SCALING_420_CHROMA_BYPASS = 5,
-    DSCL_MODE_DSCL_BYPASS               = 6
-};
-
-static bool dpp1_dscl_is_ycbcr(const enum vpe_surface_pixel_format format)
+bool vpe10_dpp_dscl_is_ycbcr(const enum vpe_surface_pixel_format format)
 {
     return format >= VPE_SURFACE_PIXEL_FORMAT_VIDEO_BEGIN &&
            format <= VPE_SURFACE_PIXEL_FORMAT_VIDEO_END;
@@ -82,7 +46,7 @@ static bool dpp1_dscl_is_video_subsampled(const enum vpe_surface_pixel_format fo
             format <= VPE_SURFACE_PIXEL_FORMAT_SUBSAMPLE_END);
 }
 
-static enum dscl_mode_sel dpp1_dscl_get_dscl_mode(const struct scaler_data *data)
+enum vpe10_dscl_mode_sel vpe10_dpp_dscl_get_dscl_mode(const struct scaler_data *data)
 {
 
     // TODO Check if bypass bit enabled
@@ -92,7 +56,7 @@ static enum dscl_mode_sel dpp1_dscl_get_dscl_mode(const struct scaler_data *data
         data->ratios.horz_c.value == one && data->ratios.vert_c.value == one)
         return DSCL_MODE_DSCL_BYPASS;
 
-    if (!dpp1_dscl_is_ycbcr(data->format))
+    if (!vpe10_dpp_dscl_is_ycbcr(data->format))
         return DSCL_MODE_SCALING_444_RGB_ENABLE;
 
     if (!dpp1_dscl_is_video_subsampled(data->format))
@@ -104,7 +68,7 @@ static enum dscl_mode_sel dpp1_dscl_get_dscl_mode(const struct scaler_data *data
     return DSCL_MODE_SCALING_420_YCBCR_ENABLE;
 }
 
-static void dpp1_dscl_set_dscl_mode(struct dpp *dpp, enum dscl_mode_sel dscl_mode)
+void vpe10_dpp_dscl_set_dscl_mode(struct dpp *dpp, enum vpe10_dscl_mode_sel dscl_mode)
 {
 
     PROGRAM_ENTRY();
@@ -130,21 +94,21 @@ static void dpp1_dscl_set_mpc_size(struct dpp *dpp, const struct scaler_data *sc
     REG_SET_2(VPMPC_SIZE, 0, VPMPC_WIDTH, scl_data->h_active, VPMPC_HEIGHT, scl_data->v_active);
 }
 
-static void dpp1_dscl_set_h_blank(struct dpp *dpp, uint16_t start, uint16_t end)
+void vpe10_dpp_dscl_set_h_blank(struct dpp *dpp, uint16_t start, uint16_t end)
 {
 
     PROGRAM_ENTRY();
     REG_SET_2(VPOTG_H_BLANK, 0, OTG_H_BLANK_END, end, OTG_H_BLANK_START, start);
 }
 
-static void dpp1_dscl_set_v_blank(struct dpp *dpp, uint16_t start, uint16_t end)
+void vpe10_dpp_dscl_set_v_blank(struct dpp *dpp, uint16_t start, uint16_t end)
 {
 
     PROGRAM_ENTRY();
     REG_SET_2(VPOTG_V_BLANK, 0, OTG_V_BLANK_END, end, OTG_V_BLANK_START, start);
 }
 
-static void dpp1_dscl_set_taps(struct dpp *dpp, const struct scaler_data *scl_data)
+void vpe10_dpp_dscl_set_taps(struct dpp *dpp, const struct scaler_data *scl_data)
 {
 
     PROGRAM_ENTRY();
@@ -172,7 +136,7 @@ static const uint16_t *dpp1_dscl_get_filter_coeffs_64p(int taps, struct fixed31_
     }
 }
 
-static void dpp1_dscl_set_scaler_filter(struct dpp *dpp, uint32_t taps,
+void vpe10_dpp_dscl_set_scaler_filter(struct dpp *dpp, uint32_t taps,
     enum vpe10_coef_filter_type_sel filter_type, const uint16_t *filter)
 {
     const int tap_pairs = (taps + 1) / 2;
@@ -206,8 +170,8 @@ static void dpp1_dscl_set_scaler_filter(struct dpp *dpp, uint32_t taps,
     }
 }
 
-static void dpp1_dscl_set_scl_filter(struct dpp *dpp, const struct scaler_data *scl_data,
-    enum dscl_mode_sel scl_mode, bool chroma_coef_mode)
+void vpe10_dpp_dscl_set_scl_filter(struct dpp *dpp, const struct scaler_data *scl_data,
+    enum vpe10_dscl_mode_sel scl_mode, bool chroma_coef_mode)
 {
 
     const uint16_t *filter_h   = NULL;
@@ -228,11 +192,11 @@ static void dpp1_dscl_set_scl_filter(struct dpp *dpp, const struct scaler_data *
         filter_v = (const uint16_t *)&scl_data->polyphase_filter_coeffs->vert_polyphase_coeffs;
     }
     if (filter_h != NULL)
-        dpp1_dscl_set_scaler_filter(
+        vpe10_dpp_dscl_set_scaler_filter(
             dpp, scl_data->taps.h_taps, SCL_COEF_LUMA_HORZ_FILTER, filter_h);
 
     if (filter_v != NULL)
-        dpp1_dscl_set_scaler_filter(
+        vpe10_dpp_dscl_set_scaler_filter(
             dpp, scl_data->taps.v_taps, SCL_COEF_LUMA_VERT_FILTER, filter_v);
 
     if (chroma_coef_mode) {
@@ -243,18 +207,18 @@ static void dpp1_dscl_set_scl_filter(struct dpp *dpp, const struct scaler_data *
             dpp1_dscl_get_filter_coeffs_64p((int)scl_data->taps.v_taps_c, scl_data->ratios.vert_c);
 
         if (filter_h_c != NULL)
-            dpp1_dscl_set_scaler_filter(
+            vpe10_dpp_dscl_set_scaler_filter(
                 dpp, scl_data->taps.h_taps_c, SCL_COEF_CHROMA_HORZ_FILTER, filter_h_c);
 
         if (filter_v_c != NULL)
-            dpp1_dscl_set_scaler_filter(
+            vpe10_dpp_dscl_set_scaler_filter(
                 dpp, scl_data->taps.v_taps_c, SCL_COEF_CHROMA_VERT_FILTER, filter_v_c);
     }
 
     REG_UPDATE(VPDSCL_MODE, SCL_CHROMA_COEF_MODE, chroma_coef_mode);
 }
 
-static void dpp1_dscl_set_lb(struct dpp *dpp, const struct line_buffer_params *lb_params,
+void vpe10_dpp_dscl_set_lb(struct dpp *dpp, const struct line_buffer_params *lb_params,
     enum lb_memory_config mem_size_config)
 {
 
@@ -266,7 +230,7 @@ static void dpp1_dscl_set_lb(struct dpp *dpp, const struct line_buffer_params *l
         VPLB_MEMORY_CTRL, 0, MEMORY_CONFIG, mem_size_config, LB_MAX_PARTITIONS, LB_MAX_PARTITION);
 }
 
-static void dpp1_dscl_set_scale_ratio(struct dpp *dpp, const struct scaler_data *data)
+void vpe10_dpp_dscl_set_scale_ratio(struct dpp *dpp, const struct scaler_data *data)
 {
 
     PROGRAM_ENTRY();
@@ -313,7 +277,7 @@ static void dpp1_dscl_set_scaler_position(struct dpp *dpp, const struct scaler_d
         VPDSCL_VERT_FILTER_INIT_C, 0, SCL_V_INIT_FRAC_C, init_frac, SCL_V_INIT_INT_C, init_int);
 }
 
-static void dpp1_power_on_dscl(struct dpp *dpp, bool power_on)
+void vpe10_dpp_power_on_dscl(struct dpp *dpp, bool power_on)
 {
     PROGRAM_ENTRY();
 
@@ -346,7 +310,7 @@ static void dpp1_power_on_dscl(struct dpp *dpp, bool power_on)
 void vpe10_dpp_set_segment_scaler(struct dpp *dpp, const struct scaler_data *scl_data)
 {
 
-    enum dscl_mode_sel dscl_mode = dpp1_dscl_get_dscl_mode(scl_data);
+    enum vpe10_dscl_mode_sel dscl_mode = vpe10_dpp_dscl_get_dscl_mode(scl_data);
 
     dpp1_dscl_set_recout(dpp, &scl_data->recout);
     dpp1_dscl_set_mpc_size(dpp, scl_data);
@@ -360,24 +324,24 @@ void vpe10_dpp_set_segment_scaler(struct dpp *dpp, const struct scaler_data *scl
 void vpe10_dpp_set_frame_scaler(struct dpp *dpp, const struct scaler_data *scl_data)
 {
 
-    enum dscl_mode_sel dscl_mode = dpp1_dscl_get_dscl_mode(scl_data);
-    bool               ycbcr     = dpp1_dscl_is_ycbcr(scl_data->format);
+    enum vpe10_dscl_mode_sel dscl_mode = vpe10_dpp_dscl_get_dscl_mode(scl_data);
+    bool                     ycbcr     = vpe10_dpp_dscl_is_ycbcr(scl_data->format);
 
-    dpp1_dscl_set_h_blank(dpp, 1, 0);
-    dpp1_dscl_set_v_blank(dpp, 1, 0);
+    vpe10_dpp_dscl_set_h_blank(dpp, 1, 0);
+    vpe10_dpp_dscl_set_v_blank(dpp, 1, 0);
 
     if (dscl_mode != DSCL_MODE_DSCL_BYPASS)
-        dpp1_power_on_dscl(dpp, true);
+        vpe10_dpp_power_on_dscl(dpp, true);
 
-    dpp1_dscl_set_dscl_mode(dpp, dscl_mode);
+    vpe10_dpp_dscl_set_dscl_mode(dpp, dscl_mode);
 
     if (dscl_mode == DSCL_MODE_DSCL_BYPASS) {
-        dpp1_power_on_dscl(dpp, false);
+        vpe10_dpp_power_on_dscl(dpp, false);
         return;
     }
 
-    dpp1_dscl_set_lb(dpp, &scl_data->lb_params, LB_MEMORY_CONFIG_0);
-    dpp1_dscl_set_scale_ratio(dpp, scl_data);
-    dpp1_dscl_set_taps(dpp, scl_data);
-    dpp1_dscl_set_scl_filter(dpp, scl_data, dscl_mode, ycbcr);
+    vpe10_dpp_dscl_set_lb(dpp, &scl_data->lb_params, LB_MEMORY_CONFIG_0);
+    vpe10_dpp_dscl_set_scale_ratio(dpp, scl_data);
+    vpe10_dpp_dscl_set_taps(dpp, scl_data);
+    vpe10_dpp_dscl_set_scl_filter(dpp, scl_data, dscl_mode, ycbcr);
 }
diff --git a/src/amd/vpelib/src/chip/vpe10/vpe10_mpc.c b/src/amd/vpelib/src/chip/vpe10/vpe10_mpc.c
index 31b9d2d19ce..d419b26eaa2 100644
--- a/src/amd/vpelib/src/chip/vpe10/vpe10_mpc.c
+++ b/src/amd/vpelib/src/chip/vpe10/vpe10_mpc.c
@@ -1263,12 +1263,12 @@ void vpe10_mpc_set_mpc_shaper_3dlut(
 
     bypass = (!shaper_lut || (func_shaper && func_shaper->type == TF_TYPE_BYPASS));
     CONFIG_CACHE(func_shaper, stream_ctx, vpe_priv->init.debug.disable_lut_caching, bypass,
-        mpc->funcs->program_shaper(mpc, shaper_lut));
+        mpc->funcs->program_shaper(mpc, shaper_lut), mpc->inst);
 
     bypass       = (!lut3d_func || !lut3d_func->state.bits.initialized);
     lut3d_params = (bypass) ? (NULL) : (&lut3d_func->lut_3d);
     CONFIG_CACHE(lut3d_func, stream_ctx, vpe_priv->init.debug.disable_lut_caching, bypass,
-        mpc->funcs->program_3dlut(mpc, lut3d_params));
+        mpc->funcs->program_3dlut(mpc, lut3d_params), mpc->inst);
 
     return;
 }
@@ -1298,7 +1298,7 @@ void vpe10_mpc_set_output_transfer_func(struct mpc *mpc, struct output_ctx *outp
               vpe_priv->init.debug.cm_in_bypass || vpe_priv->init.debug.bypass_ogam);
 
     CONFIG_CACHE(output_ctx->output_tf, output_ctx, vpe_priv->init.debug.disable_lut_caching,
-        bypass, mpc->funcs->set_output_gamma(mpc, params));
+        bypass, mpc->funcs->set_output_gamma(mpc, params), mpc->inst);
 }
 
 void vpe10_mpc_set_blend_lut(struct mpc *mpc, struct transfer_func *blend_tf)
@@ -1328,7 +1328,7 @@ void vpe10_mpc_set_blend_lut(struct mpc *mpc, struct transfer_func *blend_tf)
         ((!blend_tf) || (blend_tf->type == TF_TYPE_BYPASS) || vpe_priv->init.debug.bypass_blndgam);
 
     CONFIG_CACHE(blend_tf, stream_ctx, vpe_priv->init.debug.disable_lut_caching, bypass,
-        mpc->funcs->program_1dlut(mpc, blend_lut, gamma_type));
+        mpc->funcs->program_1dlut(mpc, blend_lut, gamma_type), mpc->inst);
 }
 
 bool vpe10_mpc_program_movable_cm(struct mpc *mpc, struct transfer_func *func_shaper,
diff --git a/src/amd/vpelib/src/core/color.c b/src/amd/vpelib/src/core/color.c
index 3b5ea03a507..3e385b6858c 100644
--- a/src/amd/vpelib/src/core/color.c
+++ b/src/amd/vpelib/src/core/color.c
@@ -181,29 +181,33 @@ static bool color_update_regamma_tf(struct vpe_priv *vpe_priv,
         break;
     }
 
-    if (vpe_priv->init.debug.disable_lut_caching ||
-        (output_tf->cache_info.cm_gamma_type != output_tf->cm_gamma_type) ||
-        (output_tf->cache_info.tf != output_tf->tf) ||
-        (output_tf->cache_info.x_scale.value != x_scale.value) ||
-        (output_tf->cache_info.y_scale.value != y_scale.value) ||
-        (output_tf->cache_info.y_bias.value != y_bias.value)) {
-        // if gamma points have been previously generated,
-        // skip the re-gen no matter it was config cached or not
-        update = true;
+    for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_dpp; i++) {
+        if (vpe_priv->init.debug.disable_lut_caching ||
+            (output_tf->cache_info[i].cm_gamma_type != output_tf->cm_gamma_type) ||
+            (output_tf->cache_info[i].tf != output_tf->tf) ||
+            (output_tf->cache_info[i].x_scale.value != x_scale.value) ||
+            (output_tf->cache_info[i].y_scale.value != y_scale.value) ||
+            (output_tf->cache_info[i].y_bias.value != y_bias.value)) {
+            // if gamma points have been previously generated,
+            // skip the re-gen no matter it was config cached or not
+            update = true;
+        }
     }
 
     if (update) {
         ret = vpe_color_calculate_regamma_params(
             vpe_priv, x_scale, y_scale, &vpe_priv->cal_buffer, output_tf);
         if (ret) {
-            // reset the cache status and mark as dirty to let hw layer to re-cache
-            output_tf->dirty                    = true;
-            output_tf->config_cache.cached      = false;
-            output_tf->cache_info.cm_gamma_type = output_tf->cm_gamma_type;
-            output_tf->cache_info.tf            = output_tf->tf;
-            output_tf->cache_info.x_scale       = x_scale;
-            output_tf->cache_info.y_scale       = y_scale;
-            output_tf->cache_info.y_bias        = y_bias;
+            for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_dpp; i++) {
+                // reset the cache status and mark as dirty to let hw layer to re-cache
+                output_tf->dirty[i]                    = true;
+                output_tf->config_cache[i].cached      = false;
+                output_tf->cache_info[i].cm_gamma_type = output_tf->cm_gamma_type;
+                output_tf->cache_info[i].tf            = output_tf->tf;
+                output_tf->cache_info[i].x_scale       = x_scale;
+                output_tf->cache_info[i].y_scale       = y_scale;
+                output_tf->cache_info[i].y_bias        = y_bias;
+            }
         }
     }
     return ret;
@@ -240,28 +244,32 @@ static bool color_update_degamma_tf(struct vpe_priv *vpe_priv,
         break;
     }
 
-    if (vpe_priv->init.debug.disable_lut_caching ||
-        (input_tf->cache_info.cm_gamma_type != input_tf->cm_gamma_type) ||
-        (input_tf->cache_info.tf != input_tf->tf) ||
-        (input_tf->cache_info.x_scale.value != x_scale.value) ||
-        (input_tf->cache_info.y_scale.value != y_scale.value) ||
-        (input_tf->cache_info.y_bias.value != y_bias.value)) {
-        // if gamma points have been previously generated,
-        // skip the re-gen no matter it was config cached or not
-        update = true;
+    for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_dpp; i++) {
+        if (vpe_priv->init.debug.disable_lut_caching ||
+            (input_tf->cache_info[i].cm_gamma_type != input_tf->cm_gamma_type) ||
+            (input_tf->cache_info[i].tf != input_tf->tf) ||
+            (input_tf->cache_info[i].x_scale.value != x_scale.value) ||
+            (input_tf->cache_info[i].y_scale.value != y_scale.value) ||
+            (input_tf->cache_info[i].y_bias.value != y_bias.value)) {
+            // if gamma points have been previously generated,
+            // skip the re-gen no matter it was config cached or not
+            update = true;
+        }
     }
 
     if (update) {
         ret = vpe_color_calculate_degamma_params(vpe_priv, x_scale, y_scale, input_tf);
         if (ret) {
-            // reset the cache status and mark as dirty to let hw layer to re-cache
-            input_tf->dirty                    = true;
-            input_tf->config_cache.cached      = false;
-            input_tf->cache_info.cm_gamma_type = input_tf->cm_gamma_type;
-            input_tf->cache_info.tf            = color_input_tf;
-            input_tf->cache_info.x_scale       = x_scale;
-            input_tf->cache_info.y_scale       = y_scale;
-            input_tf->cache_info.y_bias        = y_bias;
+            for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_dpp; i++) {
+                // reset the cache status and mark as dirty to let hw layer to re-cache
+                input_tf->dirty[i]                    = true;
+                input_tf->config_cache[i].cached      = false;
+                input_tf->cache_info[i].cm_gamma_type = input_tf->cm_gamma_type;
+                input_tf->cache_info[i].tf            = color_input_tf;
+                input_tf->cache_info[i].x_scale       = x_scale;
+                input_tf->cache_info[i].y_scale       = y_scale;
+                input_tf->cache_info[i].y_bias        = y_bias;
+            }
         }
     }
     return ret;
@@ -673,13 +681,22 @@ enum vpe_status vpe_color_update_3dlut(
     if (!enable_3dlut) {
         stream_ctx->lut3d_func->state.bits.initialized = 0;
     } else {
-        if (vpe_priv->init.debug.disable_lut_caching ||
-            (stream_ctx->lut3d_func->cache_info.uid_3dlut != stream_ctx->stream.tm_params.UID)) {
+        bool update = false;
+
+        for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_mpc_3dlut; i++)
+            if (vpe_priv->init.debug.disable_lut_caching ||
+                (stream_ctx->lut3d_func->cache_info[i].uid_3dlut !=
+                    stream_ctx->stream.tm_params.UID))
+                update = true;
+
+        if (update) {
             vpe_convert_to_tetrahedral(
                 vpe_priv, stream_ctx->stream.tm_params.lut_data, stream_ctx->lut3d_func);
-            stream_ctx->lut3d_func->dirty                = true;
-            stream_ctx->lut3d_func->config_cache.cached  = false;
-            stream_ctx->lut3d_func->cache_info.uid_3dlut = stream_ctx->stream.tm_params.UID;
+            for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_mpc_3dlut; i++) {
+                stream_ctx->lut3d_func->dirty[i]                = true;
+                stream_ctx->lut3d_func->config_cache[i].cached  = false;
+                stream_ctx->lut3d_func->cache_info[i].uid_3dlut = stream_ctx->stream.tm_params.UID;
+            }
         }
         stream_ctx->lut3d_func->state.bits.initialized = 1;
     }
@@ -812,10 +829,12 @@ enum vpe_status vpe_color_update_shaper(const struct vpe_priv *vpe_priv, uint16_
     }
 
     // right now shaper is always programmed with linear, once cached, it is always reused.
-    if (vpe_priv->init.debug.disable_lut_caching ||
-        (shaper_func && shaper_func->cache_info.tf != tf)) {
-        // if the caching has the required data cached, skip the update
-        update = true;
+    for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_mpc_3dlut; i++) {
+        if (vpe_priv->init.debug.disable_lut_caching ||
+            (shaper_func && shaper_func->cache_info[i].tf != tf)) {
+            // if the caching has the required data cached, skip the update
+            update = true;
+        }
     }
 
     shaper_func->type = TF_TYPE_HWPWL;
@@ -829,9 +848,11 @@ enum vpe_status vpe_color_update_shaper(const struct vpe_priv *vpe_priv, uint16_
 
         ret = vpe_build_shaper(&shaper_in, &shaper_func->pwl);
         if (ret == VPE_STATUS_OK) {
-            shaper_func->dirty               = true;
-            shaper_func->config_cache.cached = false;
-            shaper_func->cache_info.tf       = tf;
+            for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_mpc_3dlut; i++) {
+                shaper_func->dirty[i]               = true;
+                shaper_func->config_cache[i].cached = false;
+                shaper_func->cache_info[i].tf       = tf;
+            }
         }
     }
     return ret;
diff --git a/src/amd/vpelib/src/core/inc/cdc.h b/src/amd/vpelib/src/core/inc/cdc.h
index 7f519325228..154e2b42117 100644
--- a/src/amd/vpelib/src/core/inc/cdc.h
+++ b/src/amd/vpelib/src/core/inc/cdc.h
@@ -61,6 +61,7 @@ struct cdc_funcs {
 struct cdc {
     struct vpe_priv  *vpe_priv;
     struct cdc_funcs *funcs;
+    unsigned int      inst;
 };
 
 #ifdef __cplusplus
diff --git a/src/amd/vpelib/src/core/inc/color.h b/src/amd/vpelib/src/core/inc/color.h
index b2d084a9e39..5640aea293c 100644
--- a/src/amd/vpelib/src/core/inc/color.h
+++ b/src/amd/vpelib/src/core/inc/color.h
@@ -147,6 +147,14 @@ struct transfer_func_distributed_points {
     uint16_t x_point_at_y1_blue;
 };
 
+struct cache_info {
+    enum color_transfer_func tf;
+    enum cm_type             cm_gamma_type;
+    struct fixed31_32        x_scale;
+    struct fixed31_32        y_scale;
+    struct fixed31_32        y_bias;
+};
+
 struct transfer_func {
     enum transfer_func_type  type;
     enum color_transfer_func tf;
@@ -161,16 +169,10 @@ struct transfer_func {
     };
 
     // the followings are for optimization: skip if no change
-    bool                dirty;        /*< indicate this object is updated or not */
-    struct config_cache config_cache; /*< used by the hw hook layer to do the caching */
+    bool                dirty[MAX_PIPE];        /*< indicate this object is updated or not */
+    struct config_cache config_cache[MAX_PIPE]; /*< used by the hw hook layer to do the caching */
 
-    struct {
-        enum color_transfer_func tf;
-        enum cm_type             cm_gamma_type;
-        struct fixed31_32        x_scale;
-        struct fixed31_32        y_scale;
-        struct fixed31_32        y_bias;
-    } cache_info;
+    struct cache_info cache_info[MAX_PIPE];
 };
 
 enum color_white_point_type {
@@ -237,12 +239,12 @@ struct vpe_3dlut {
     union vpe_3dlut_state     state;
 
     // the followings are for optimization: skip if no change
-    bool                dirty;        /*< indicate this object is updated or not */
-    struct config_cache config_cache; /*< used by the hw hook layer to do the caching */
+    bool                dirty[MAX_3DLUT];        /*< indicate this object is updated or not */
+    struct config_cache config_cache[MAX_3DLUT]; /*< used by the hw hook layer to do the caching */
 
     struct {
         uint64_t uid_3dlut; /*< UID for current 3D LUT params */
-    } cache_info;
+    } cache_info[MAX_3DLUT];
 };
 
 enum vpe_status vpe_color_update_color_space_and_tf(
diff --git a/src/amd/vpelib/src/core/inc/config_cache.h b/src/amd/vpelib/src/core/inc/config_cache.h
index 5ca6c389311..489d3f3c80c 100644
--- a/src/amd/vpelib/src/core/inc/config_cache.h
+++ b/src/amd/vpelib/src/core/inc/config_cache.h
@@ -73,7 +73,6 @@ struct config_cache {
     bool     cached;
 };
 
-
 /* A macro that helps cache the config packet, it won't cache if it is in bypass mode
  * as bypass mode is not heavy lifting programming.
  *
@@ -82,26 +81,28 @@ struct config_cache {
  * /param   disable_cache       a flag that controls a caching is needed
  * /param   is_bypass           if it is in bypass, it doesn't cache the bypass config
  * /param   program_func_call   the program call that generate config packet content
+ * /param   inst                index to address the config_cache array
  */
-#define CONFIG_CACHE(obj_cache, obj_cfg_array, disable_cache, is_bypass, program_func_call)        \
+#define CONFIG_CACHE(obj_cache, obj_cfg_array, disable_cache, is_bypass, program_func_call, inst)  \
     {                                                                                              \
         bool use_cache = false;                                                                    \
                                                                                                    \
         /* make sure it opens a new config packet */                                               \
         config_writer_force_new_with_type(config_writer, CONFIG_TYPE_DIRECT);                      \
                                                                                                    \
-        if ((obj_cache) && !disable_cache && (obj_cache)->config_cache.p_buffer &&                 \
-            (obj_cache)->config_cache.cached && !((obj_cache)->dirty) && !is_bypass) {             \
+        if ((obj_cache) && !disable_cache && (obj_cache)->config_cache[inst].p_buffer &&           \
+            (obj_cache)->config_cache[inst].cached && !((obj_cache)->dirty[inst]) && !is_bypass) { \
             /* reuse the cache */                                                                  \
-            if (config_writer->buf->size >= (obj_cache)->config_cache.size) {                      \
+            if (config_writer->buf->size >= (obj_cache)->config_cache[inst].size) {                \
                 memcpy((void *)(uintptr_t)config_writer->base_cpu_va,                              \
-                    (obj_cache)->config_cache.p_buffer,                                            \
-                    (size_t)(obj_cache)->config_cache.size);                                       \
+                    (obj_cache)->config_cache[inst].p_buffer,                                      \
+                    (size_t)(obj_cache)->config_cache[inst].size);                                 \
                 config_writer->buf->cpu_va =                                                       \
-                    config_writer->base_cpu_va + (obj_cache)->config_cache.size;                   \
+                    config_writer->base_cpu_va + (obj_cache)->config_cache[inst].size;             \
                 config_writer->buf->gpu_va =                                                       \
-                    config_writer->base_gpu_va + (obj_cache)->config_cache.size;                   \
-                config_writer->buf->size -= ((obj_cache)->config_cache.size - sizeof(uint32_t));   \
+                    config_writer->base_gpu_va + (obj_cache)->config_cache[inst].size;             \
+                config_writer->buf->size -=                                                        \
+                    ((obj_cache)->config_cache[inst].size - sizeof(uint32_t));                     \
                 use_cache = true;                                                                  \
             }                                                                                      \
         }                                                                                          \
@@ -117,21 +118,21 @@ struct config_cache {
             if (!disable_cache && !is_bypass) {                                                    \
                 /* only cache when it is not crossing config packets */                            \
                 if (config_num == (obj_cfg_array)->num_configs) {                                  \
-                    if ((obj_cache)->dirty) {                                                      \
+                    if ((obj_cache)->dirty[inst]) {                                                \
                         uint64_t size = end - start;                                               \
                                                                                                    \
-                        if ((obj_cache)->config_cache.size < size) {                               \
-                            if ((obj_cache)->config_cache.p_buffer)                                \
-                                vpe_free((obj_cache)->config_cache.p_buffer);                      \
+                        if ((obj_cache)->config_cache[inst].size < size) {                         \
+                            if ((obj_cache)->config_cache[inst].p_buffer)                          \
+                                vpe_free((obj_cache)->config_cache[inst].p_buffer);                \
                                                                                                    \
-                            (obj_cache)->config_cache.p_buffer = vpe_zalloc((size_t)size);         \
-                            if ((obj_cache)->config_cache.p_buffer) {                              \
-                                memcpy((obj_cache)->config_cache.p_buffer,                         \
+                            (obj_cache)->config_cache[inst].p_buffer = vpe_zalloc((size_t)size);   \
+                            if ((obj_cache)->config_cache[inst].p_buffer) {                        \
+                                memcpy((obj_cache)->config_cache[inst].p_buffer,                   \
                                     (void *)(uintptr_t)start, (size_t)size);                       \
-                                (obj_cache)->config_cache.size   = size;                           \
-                                (obj_cache)->config_cache.cached = true;                           \
+                                (obj_cache)->config_cache[inst].size   = size;                     \
+                                (obj_cache)->config_cache[inst].cached = true;                     \
                             } else {                                                               \
-                                (obj_cache)->config_cache.size = 0;                                \
+                                (obj_cache)->config_cache[inst].size = 0;                          \
                             }                                                                      \
                         }                                                                          \
                     }                                                                              \
@@ -139,7 +140,7 @@ struct config_cache {
             }                                                                                      \
         }                                                                                          \
         if ((obj_cache))                                                                           \
-            (obj_cache)->dirty = false;                                                            \
+            (obj_cache)->dirty[inst] = false;                                                      \
     }
 
 /* the following macro requires a local variable vpr_priv to be present */
diff --git a/src/amd/vpelib/src/core/inc/dpp.h b/src/amd/vpelib/src/core/inc/dpp.h
index 5edf1a15f1e..97cbb302599 100644
--- a/src/amd/vpelib/src/core/inc/dpp.h
+++ b/src/amd/vpelib/src/core/inc/dpp.h
@@ -99,6 +99,7 @@ struct dpp_funcs {
 struct dpp {
     struct vpe_priv  *vpe_priv;
     struct dpp_funcs *funcs;
+    unsigned int      inst;
 
     struct pwl_params degamma_params;
 };
diff --git a/src/amd/vpelib/src/core/inc/hw_shared.h b/src/amd/vpelib/src/core/inc/hw_shared.h
index 464358dd7ec..7df534ee471 100644
--- a/src/amd/vpelib/src/core/inc/hw_shared.h
+++ b/src/amd/vpelib/src/core/inc/hw_shared.h
@@ -26,6 +26,11 @@
 
 #include "fixed31_32.h"
 
+#define MAX_3DLUT 1
+
+#define MAX_PIPE        2
+#define MAX_OUTPUT_PIPE 1
+
 #ifdef __cplusplus
 extern "C" {
 #endif
diff --git a/src/amd/vpelib/src/core/inc/mpc.h b/src/amd/vpelib/src/core/inc/mpc.h
index 5663dd36d6f..d9dd0aa7cf0 100644
--- a/src/amd/vpelib/src/core/inc/mpc.h
+++ b/src/amd/vpelib/src/core/inc/mpc.h
@@ -171,6 +171,7 @@ struct mpc_funcs {
 struct mpc {
     struct vpe_priv  *vpe_priv;
     struct mpc_funcs *funcs;
+    unsigned int      inst;
     struct pwl_params regamma_params;
     struct pwl_params blender_params;
     struct pwl_params shaper_params;
diff --git a/src/amd/vpelib/src/core/inc/opp.h b/src/amd/vpelib/src/core/inc/opp.h
index c202228f948..0180fa53d68 100644
--- a/src/amd/vpelib/src/core/inc/opp.h
+++ b/src/amd/vpelib/src/core/inc/opp.h
@@ -121,6 +121,7 @@ struct opp_funcs {
 struct opp {
     struct vpe_priv  *vpe_priv;
     struct opp_funcs *funcs;
+    unsigned int      inst;
 };
 
 #ifdef __cplusplus
diff --git a/src/amd/vpelib/src/core/inc/resource.h b/src/amd/vpelib/src/core/inc/resource.h
index 99d1319b063..57d63516b75 100644
--- a/src/amd/vpelib/src/core/inc/resource.h
+++ b/src/amd/vpelib/src/core/inc/resource.h
@@ -32,6 +32,7 @@
 #include "mpc.h"
 #include "opp.h"
 #include "vector.h"
+#include "hw_shared.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -41,8 +42,6 @@ struct vpe_priv;
 struct vpe_cmd_info;
 struct segment_ctx;
 
-#define MAX_PIPE 2
-#define MAX_OUTPUT_PIPE 2
 #define MIN_VPE_CMD     1024
 
 enum vpe_cmd_ops;
diff --git a/src/amd/vpelib/src/core/resource.c b/src/amd/vpelib/src/core/resource.c
index afb5def0af7..12c769ff7da 100644
--- a/src/amd/vpelib/src/core/resource.c
+++ b/src/amd/vpelib/src/core/resource.c
@@ -199,7 +199,8 @@ void vpe_free_stream_ctx(struct vpe_priv *vpe_priv)
     for (i = 0; i < vpe_priv->num_streams; i++) {
         ctx = &vpe_priv->stream_ctx[i];
         if (ctx->input_tf) {
-            CONFIG_CACHE_FREE(ctx->input_tf->config_cache);
+            for (int j = 0; j < MAX_PIPE; j++)
+                CONFIG_CACHE_FREE(ctx->input_tf->config_cache[j]);
             vpe_free(ctx->input_tf);
             ctx->input_tf = NULL;
         }
@@ -220,19 +221,22 @@ void vpe_free_stream_ctx(struct vpe_priv *vpe_priv)
         }
 
         if (ctx->in_shaper_func) {
-            CONFIG_CACHE_FREE(ctx->in_shaper_func->config_cache);
+            for (int j = 0; j < MAX_PIPE; j++)
+                CONFIG_CACHE_FREE(ctx->in_shaper_func->config_cache[j]);
             vpe_free(ctx->in_shaper_func);
             ctx->in_shaper_func = NULL;
         }
 
         if (ctx->blend_tf) {
-            CONFIG_CACHE_FREE(ctx->blend_tf->config_cache);
+            for (int j = 0; j < MAX_PIPE; j++)
+                CONFIG_CACHE_FREE(ctx->blend_tf->config_cache[j]);
             vpe_free(ctx->blend_tf);
             ctx->blend_tf = NULL;
         }
 
         if (ctx->lut3d_func) {
-            CONFIG_CACHE_FREE(ctx->lut3d_func->config_cache);
+            for (int j = 0; j < MAX_3DLUT; j++)
+                CONFIG_CACHE_FREE(ctx->lut3d_func->config_cache[j]);
             vpe_free(ctx->lut3d_func);
             ctx->lut3d_func = NULL;
         }
@@ -264,6 +268,7 @@ void vpe_pipe_reset(struct vpe_priv *vpe_priv)
 
     for (i = 0; i < vpe_priv->num_pipe; i++) {
         pipe_ctx               = &vpe_priv->pipe_ctx[i];
+        pipe_ctx->pipe_idx     = i;
         pipe_ctx->is_top_pipe  = true;
         pipe_ctx->owner        = PIPE_CTX_NO_OWNER;
         pipe_ctx->top_pipe_idx = 0xff;