amd/vpelib: Multiple instance support in caching framework

Generalize the caching to work with multiple instances of objects.
Change some static functions to public functions to maximize function
re-use possibilities.

Reviewed-by: Roy Chan <Roy.Chan@amd.com>
Acked-by: Chih-Wei Chien <Chih-Wei.Chien@amd.com>
Signed-off-by: Brendan <brendanSteve.leder@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31605>
This commit is contained in:
Brendan 2024-06-27 14:56:54 -04:00 committed by Marge Bot
parent 7a293a812a
commit ded1a2b3f0
15 changed files with 222 additions and 158 deletions

View file

@ -887,6 +887,69 @@ void vpe10_dpp_set_segment_scaler(struct dpp *dpp, const struct scaler_data *scl
void vpe10_dpp_set_frame_scaler(struct dpp *dpp, const struct scaler_data *scl_data);
/*Scalar helper functions*/
enum vpe10_coef_filter_type_sel {
SCL_COEF_LUMA_VERT_FILTER = 0,
SCL_COEF_LUMA_HORZ_FILTER = 1,
SCL_COEF_CHROMA_VERT_FILTER = 2,
SCL_COEF_CHROMA_HORZ_FILTER = 3,
SCL_COEF_ALPHA_VERT_FILTER = 4,
SCL_COEF_ALPHA_HORZ_FILTER = 5,
};
enum vpe10_dscl_autocal_mode {
AUTOCAL_MODE_OFF = 0,
/* Autocal calculate the scaling ratio and initial phase and the
* DSCL_MODE_SEL must be set to 1
*/
AUTOCAL_MODE_AUTOSCALE = 1,
/* Autocal perform auto centering without replication and the
* DSCL_MODE_SEL must be set to 0
*/
AUTOCAL_MODE_AUTOCENTER = 2,
/* Autocal perform auto centering and auto replication and the
* DSCL_MODE_SEL must be set to 0
*/
AUTOCAL_MODE_AUTOREPLICATE = 3
};
enum vpe10_dscl_mode_sel {
DSCL_MODE_SCALING_444_BYPASS = 0,
DSCL_MODE_SCALING_444_RGB_ENABLE = 1,
DSCL_MODE_SCALING_444_YCBCR_ENABLE = 2,
DSCL_MODE_SCALING_420_YCBCR_ENABLE = 3,
DSCL_MODE_SCALING_420_LUMA_BYPASS = 4,
DSCL_MODE_SCALING_420_CHROMA_BYPASS = 5,
DSCL_MODE_DSCL_BYPASS = 6
};
void vpe10_dpp_dscl_set_h_blank(struct dpp *dpp, uint16_t start, uint16_t end);
void vpe10_dpp_dscl_set_v_blank(struct dpp *dpp, uint16_t start, uint16_t end);
void vpe10_dpp_power_on_dscl(struct dpp *dpp, bool power_on);
void vpe10_dpp_dscl_set_lb(struct dpp *dpp, const struct line_buffer_params *lb_params,
enum lb_memory_config mem_size_config);
void vpe10_dpp_dscl_set_scale_ratio(struct dpp *dpp, const struct scaler_data *data);
void vpe10_dpp_dscl_set_taps(struct dpp *dpp, const struct scaler_data *scl_data);
void vpe10_dpp_dscl_set_scl_filter(struct dpp *dpp, const struct scaler_data *scl_data,
enum vpe10_dscl_mode_sel scl_mode, bool chroma_coef_mode);
void vpe10_dpp_dscl_set_dscl_mode(struct dpp *dpp, enum vpe10_dscl_mode_sel dscl_mode);
enum vpe10_dscl_mode_sel vpe10_dpp_dscl_get_dscl_mode(const struct scaler_data *data);
void vpe10_dpp_dscl_set_scaler_filter(struct dpp *dpp, uint32_t taps,
enum vpe10_coef_filter_type_sel filter_type, const uint16_t *filter);
bool vpe10_dpp_dscl_is_ycbcr(const enum vpe_surface_pixel_format format);
void vpe10_dpp_program_gamcor_lut(struct dpp *dpp, const struct pwl_params *params);
uint32_t vpe10_get_line_buffer_size(void);
bool vpe10_dpp_validate_number_of_taps(struct dpp *dpp, struct scaler_data *scl_data);

View file

@ -233,7 +233,7 @@ bool vpe10_cm_helper_translate_curve_to_hw_format(
uint32_t j, k, seg_distr[MAX_REGIONS_NUMBER], increment, start_index, hw_points;
if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS ||
!output_tf->dirty)
(!output_tf->dirty && (lut_params->hw_points_num != 0)))
return false;
corner_points = lut_params->corner_points;
@ -419,7 +419,7 @@ bool vpe10_cm_helper_translate_curve_to_degamma_hw_format(
uint32_t k, seg_distr[MAX_REGIONS_NUMBER_DEGAMMA], num_segments, hw_points;
if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS ||
!output_tf->dirty)
(!output_tf->dirty && (lut_params->hw_points_num != 0)))
return false;
corner_points = lut_params->corner_points;

View file

@ -132,7 +132,7 @@ static void vpe10_dpp_program_gammcor_lut(
}
}
static void vpe10_dpp_program_gamcor_lut(struct dpp *dpp, const struct pwl_params *params)
void vpe10_dpp_program_gamcor_lut(struct dpp *dpp, const struct pwl_params *params)
{
struct vpe10_xfer_func_reg gam_regs = {0};
@ -204,7 +204,7 @@ void vpe10_dpp_program_input_transfer_func(struct dpp *dpp, struct transfer_func
bypass = ((input_tf->type == TF_TYPE_BYPASS) || dpp->vpe_priv->init.debug.bypass_gamcor);
CONFIG_CACHE(input_tf, stream_ctx, vpe_priv->init.debug.disable_lut_caching, bypass,
vpe10_dpp_program_gamcor_lut(dpp, params));
vpe10_dpp_program_gamcor_lut(dpp, params), dpp->inst);
}
void vpe10_dpp_program_gamut_remap(struct dpp *dpp, struct colorspace_transform *gamut_remap)

View file

@ -34,43 +34,7 @@
#define LB_MAX_PARTITION 12
enum vpe10_coef_filter_type_sel {
SCL_COEF_LUMA_VERT_FILTER = 0,
SCL_COEF_LUMA_HORZ_FILTER = 1,
SCL_COEF_CHROMA_VERT_FILTER = 2,
SCL_COEF_CHROMA_HORZ_FILTER = 3,
SCL_COEF_ALPHA_VERT_FILTER = 4,
SCL_COEF_ALPHA_HORZ_FILTER = 5
};
enum dscl_autocal_mode {
AUTOCAL_MODE_OFF = 0,
/* Autocal calculate the scaling ratio and initial phase and the
* DSCL_MODE_SEL must be set to 1
*/
AUTOCAL_MODE_AUTOSCALE = 1,
/* Autocal perform auto centering without replication and the
* DSCL_MODE_SEL must be set to 0
*/
AUTOCAL_MODE_AUTOCENTER = 2,
/* Autocal perform auto centering and auto replication and the
* DSCL_MODE_SEL must be set to 0
*/
AUTOCAL_MODE_AUTOREPLICATE = 3
};
enum dscl_mode_sel {
DSCL_MODE_SCALING_444_BYPASS = 0,
DSCL_MODE_SCALING_444_RGB_ENABLE = 1,
DSCL_MODE_SCALING_444_YCBCR_ENABLE = 2,
DSCL_MODE_SCALING_420_YCBCR_ENABLE = 3,
DSCL_MODE_SCALING_420_LUMA_BYPASS = 4,
DSCL_MODE_SCALING_420_CHROMA_BYPASS = 5,
DSCL_MODE_DSCL_BYPASS = 6
};
static bool dpp1_dscl_is_ycbcr(const enum vpe_surface_pixel_format format)
bool vpe10_dpp_dscl_is_ycbcr(const enum vpe_surface_pixel_format format)
{
return format >= VPE_SURFACE_PIXEL_FORMAT_VIDEO_BEGIN &&
format <= VPE_SURFACE_PIXEL_FORMAT_VIDEO_END;
@ -82,7 +46,7 @@ static bool dpp1_dscl_is_video_subsampled(const enum vpe_surface_pixel_format fo
format <= VPE_SURFACE_PIXEL_FORMAT_SUBSAMPLE_END);
}
static enum dscl_mode_sel dpp1_dscl_get_dscl_mode(const struct scaler_data *data)
enum vpe10_dscl_mode_sel vpe10_dpp_dscl_get_dscl_mode(const struct scaler_data *data)
{
// TODO Check if bypass bit enabled
@ -92,7 +56,7 @@ static enum dscl_mode_sel dpp1_dscl_get_dscl_mode(const struct scaler_data *data
data->ratios.horz_c.value == one && data->ratios.vert_c.value == one)
return DSCL_MODE_DSCL_BYPASS;
if (!dpp1_dscl_is_ycbcr(data->format))
if (!vpe10_dpp_dscl_is_ycbcr(data->format))
return DSCL_MODE_SCALING_444_RGB_ENABLE;
if (!dpp1_dscl_is_video_subsampled(data->format))
@ -104,7 +68,7 @@ static enum dscl_mode_sel dpp1_dscl_get_dscl_mode(const struct scaler_data *data
return DSCL_MODE_SCALING_420_YCBCR_ENABLE;
}
static void dpp1_dscl_set_dscl_mode(struct dpp *dpp, enum dscl_mode_sel dscl_mode)
void vpe10_dpp_dscl_set_dscl_mode(struct dpp *dpp, enum vpe10_dscl_mode_sel dscl_mode)
{
PROGRAM_ENTRY();
@ -130,21 +94,21 @@ static void dpp1_dscl_set_mpc_size(struct dpp *dpp, const struct scaler_data *sc
REG_SET_2(VPMPC_SIZE, 0, VPMPC_WIDTH, scl_data->h_active, VPMPC_HEIGHT, scl_data->v_active);
}
static void dpp1_dscl_set_h_blank(struct dpp *dpp, uint16_t start, uint16_t end)
void vpe10_dpp_dscl_set_h_blank(struct dpp *dpp, uint16_t start, uint16_t end)
{
PROGRAM_ENTRY();
REG_SET_2(VPOTG_H_BLANK, 0, OTG_H_BLANK_END, end, OTG_H_BLANK_START, start);
}
static void dpp1_dscl_set_v_blank(struct dpp *dpp, uint16_t start, uint16_t end)
void vpe10_dpp_dscl_set_v_blank(struct dpp *dpp, uint16_t start, uint16_t end)
{
PROGRAM_ENTRY();
REG_SET_2(VPOTG_V_BLANK, 0, OTG_V_BLANK_END, end, OTG_V_BLANK_START, start);
}
static void dpp1_dscl_set_taps(struct dpp *dpp, const struct scaler_data *scl_data)
void vpe10_dpp_dscl_set_taps(struct dpp *dpp, const struct scaler_data *scl_data)
{
PROGRAM_ENTRY();
@ -172,7 +136,7 @@ static const uint16_t *dpp1_dscl_get_filter_coeffs_64p(int taps, struct fixed31_
}
}
static void dpp1_dscl_set_scaler_filter(struct dpp *dpp, uint32_t taps,
void vpe10_dpp_dscl_set_scaler_filter(struct dpp *dpp, uint32_t taps,
enum vpe10_coef_filter_type_sel filter_type, const uint16_t *filter)
{
const int tap_pairs = (taps + 1) / 2;
@ -206,8 +170,8 @@ static void dpp1_dscl_set_scaler_filter(struct dpp *dpp, uint32_t taps,
}
}
static void dpp1_dscl_set_scl_filter(struct dpp *dpp, const struct scaler_data *scl_data,
enum dscl_mode_sel scl_mode, bool chroma_coef_mode)
void vpe10_dpp_dscl_set_scl_filter(struct dpp *dpp, const struct scaler_data *scl_data,
enum vpe10_dscl_mode_sel scl_mode, bool chroma_coef_mode)
{
const uint16_t *filter_h = NULL;
@ -228,11 +192,11 @@ static void dpp1_dscl_set_scl_filter(struct dpp *dpp, const struct scaler_data *
filter_v = (const uint16_t *)&scl_data->polyphase_filter_coeffs->vert_polyphase_coeffs;
}
if (filter_h != NULL)
dpp1_dscl_set_scaler_filter(
vpe10_dpp_dscl_set_scaler_filter(
dpp, scl_data->taps.h_taps, SCL_COEF_LUMA_HORZ_FILTER, filter_h);
if (filter_v != NULL)
dpp1_dscl_set_scaler_filter(
vpe10_dpp_dscl_set_scaler_filter(
dpp, scl_data->taps.v_taps, SCL_COEF_LUMA_VERT_FILTER, filter_v);
if (chroma_coef_mode) {
@ -243,18 +207,18 @@ static void dpp1_dscl_set_scl_filter(struct dpp *dpp, const struct scaler_data *
dpp1_dscl_get_filter_coeffs_64p((int)scl_data->taps.v_taps_c, scl_data->ratios.vert_c);
if (filter_h_c != NULL)
dpp1_dscl_set_scaler_filter(
vpe10_dpp_dscl_set_scaler_filter(
dpp, scl_data->taps.h_taps_c, SCL_COEF_CHROMA_HORZ_FILTER, filter_h_c);
if (filter_v_c != NULL)
dpp1_dscl_set_scaler_filter(
vpe10_dpp_dscl_set_scaler_filter(
dpp, scl_data->taps.v_taps_c, SCL_COEF_CHROMA_VERT_FILTER, filter_v_c);
}
REG_UPDATE(VPDSCL_MODE, SCL_CHROMA_COEF_MODE, chroma_coef_mode);
}
static void dpp1_dscl_set_lb(struct dpp *dpp, const struct line_buffer_params *lb_params,
void vpe10_dpp_dscl_set_lb(struct dpp *dpp, const struct line_buffer_params *lb_params,
enum lb_memory_config mem_size_config)
{
@ -266,7 +230,7 @@ static void dpp1_dscl_set_lb(struct dpp *dpp, const struct line_buffer_params *l
VPLB_MEMORY_CTRL, 0, MEMORY_CONFIG, mem_size_config, LB_MAX_PARTITIONS, LB_MAX_PARTITION);
}
static void dpp1_dscl_set_scale_ratio(struct dpp *dpp, const struct scaler_data *data)
void vpe10_dpp_dscl_set_scale_ratio(struct dpp *dpp, const struct scaler_data *data)
{
PROGRAM_ENTRY();
@ -313,7 +277,7 @@ static void dpp1_dscl_set_scaler_position(struct dpp *dpp, const struct scaler_d
VPDSCL_VERT_FILTER_INIT_C, 0, SCL_V_INIT_FRAC_C, init_frac, SCL_V_INIT_INT_C, init_int);
}
static void dpp1_power_on_dscl(struct dpp *dpp, bool power_on)
void vpe10_dpp_power_on_dscl(struct dpp *dpp, bool power_on)
{
PROGRAM_ENTRY();
@ -346,7 +310,7 @@ static void dpp1_power_on_dscl(struct dpp *dpp, bool power_on)
void vpe10_dpp_set_segment_scaler(struct dpp *dpp, const struct scaler_data *scl_data)
{
enum dscl_mode_sel dscl_mode = dpp1_dscl_get_dscl_mode(scl_data);
enum vpe10_dscl_mode_sel dscl_mode = vpe10_dpp_dscl_get_dscl_mode(scl_data);
dpp1_dscl_set_recout(dpp, &scl_data->recout);
dpp1_dscl_set_mpc_size(dpp, scl_data);
@ -360,24 +324,24 @@ void vpe10_dpp_set_segment_scaler(struct dpp *dpp, const struct scaler_data *scl
void vpe10_dpp_set_frame_scaler(struct dpp *dpp, const struct scaler_data *scl_data)
{
enum dscl_mode_sel dscl_mode = dpp1_dscl_get_dscl_mode(scl_data);
bool ycbcr = dpp1_dscl_is_ycbcr(scl_data->format);
enum vpe10_dscl_mode_sel dscl_mode = vpe10_dpp_dscl_get_dscl_mode(scl_data);
bool ycbcr = vpe10_dpp_dscl_is_ycbcr(scl_data->format);
dpp1_dscl_set_h_blank(dpp, 1, 0);
dpp1_dscl_set_v_blank(dpp, 1, 0);
vpe10_dpp_dscl_set_h_blank(dpp, 1, 0);
vpe10_dpp_dscl_set_v_blank(dpp, 1, 0);
if (dscl_mode != DSCL_MODE_DSCL_BYPASS)
dpp1_power_on_dscl(dpp, true);
vpe10_dpp_power_on_dscl(dpp, true);
dpp1_dscl_set_dscl_mode(dpp, dscl_mode);
vpe10_dpp_dscl_set_dscl_mode(dpp, dscl_mode);
if (dscl_mode == DSCL_MODE_DSCL_BYPASS) {
dpp1_power_on_dscl(dpp, false);
vpe10_dpp_power_on_dscl(dpp, false);
return;
}
dpp1_dscl_set_lb(dpp, &scl_data->lb_params, LB_MEMORY_CONFIG_0);
dpp1_dscl_set_scale_ratio(dpp, scl_data);
dpp1_dscl_set_taps(dpp, scl_data);
dpp1_dscl_set_scl_filter(dpp, scl_data, dscl_mode, ycbcr);
vpe10_dpp_dscl_set_lb(dpp, &scl_data->lb_params, LB_MEMORY_CONFIG_0);
vpe10_dpp_dscl_set_scale_ratio(dpp, scl_data);
vpe10_dpp_dscl_set_taps(dpp, scl_data);
vpe10_dpp_dscl_set_scl_filter(dpp, scl_data, dscl_mode, ycbcr);
}

View file

@ -1263,12 +1263,12 @@ void vpe10_mpc_set_mpc_shaper_3dlut(
bypass = (!shaper_lut || (func_shaper && func_shaper->type == TF_TYPE_BYPASS));
CONFIG_CACHE(func_shaper, stream_ctx, vpe_priv->init.debug.disable_lut_caching, bypass,
mpc->funcs->program_shaper(mpc, shaper_lut));
mpc->funcs->program_shaper(mpc, shaper_lut), mpc->inst);
bypass = (!lut3d_func || !lut3d_func->state.bits.initialized);
lut3d_params = (bypass) ? (NULL) : (&lut3d_func->lut_3d);
CONFIG_CACHE(lut3d_func, stream_ctx, vpe_priv->init.debug.disable_lut_caching, bypass,
mpc->funcs->program_3dlut(mpc, lut3d_params));
mpc->funcs->program_3dlut(mpc, lut3d_params), mpc->inst);
return;
}
@ -1298,7 +1298,7 @@ void vpe10_mpc_set_output_transfer_func(struct mpc *mpc, struct output_ctx *outp
vpe_priv->init.debug.cm_in_bypass || vpe_priv->init.debug.bypass_ogam);
CONFIG_CACHE(output_ctx->output_tf, output_ctx, vpe_priv->init.debug.disable_lut_caching,
bypass, mpc->funcs->set_output_gamma(mpc, params));
bypass, mpc->funcs->set_output_gamma(mpc, params), mpc->inst);
}
void vpe10_mpc_set_blend_lut(struct mpc *mpc, struct transfer_func *blend_tf)
@ -1328,7 +1328,7 @@ void vpe10_mpc_set_blend_lut(struct mpc *mpc, struct transfer_func *blend_tf)
((!blend_tf) || (blend_tf->type == TF_TYPE_BYPASS) || vpe_priv->init.debug.bypass_blndgam);
CONFIG_CACHE(blend_tf, stream_ctx, vpe_priv->init.debug.disable_lut_caching, bypass,
mpc->funcs->program_1dlut(mpc, blend_lut, gamma_type));
mpc->funcs->program_1dlut(mpc, blend_lut, gamma_type), mpc->inst);
}
bool vpe10_mpc_program_movable_cm(struct mpc *mpc, struct transfer_func *func_shaper,

View file

@ -181,29 +181,33 @@ static bool color_update_regamma_tf(struct vpe_priv *vpe_priv,
break;
}
if (vpe_priv->init.debug.disable_lut_caching ||
(output_tf->cache_info.cm_gamma_type != output_tf->cm_gamma_type) ||
(output_tf->cache_info.tf != output_tf->tf) ||
(output_tf->cache_info.x_scale.value != x_scale.value) ||
(output_tf->cache_info.y_scale.value != y_scale.value) ||
(output_tf->cache_info.y_bias.value != y_bias.value)) {
// if gamma points have been previously generated,
// skip the re-gen no matter it was config cached or not
update = true;
for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_dpp; i++) {
if (vpe_priv->init.debug.disable_lut_caching ||
(output_tf->cache_info[i].cm_gamma_type != output_tf->cm_gamma_type) ||
(output_tf->cache_info[i].tf != output_tf->tf) ||
(output_tf->cache_info[i].x_scale.value != x_scale.value) ||
(output_tf->cache_info[i].y_scale.value != y_scale.value) ||
(output_tf->cache_info[i].y_bias.value != y_bias.value)) {
// if gamma points have been previously generated,
// skip the re-gen no matter it was config cached or not
update = true;
}
}
if (update) {
ret = vpe_color_calculate_regamma_params(
vpe_priv, x_scale, y_scale, &vpe_priv->cal_buffer, output_tf);
if (ret) {
// reset the cache status and mark as dirty to let hw layer to re-cache
output_tf->dirty = true;
output_tf->config_cache.cached = false;
output_tf->cache_info.cm_gamma_type = output_tf->cm_gamma_type;
output_tf->cache_info.tf = output_tf->tf;
output_tf->cache_info.x_scale = x_scale;
output_tf->cache_info.y_scale = y_scale;
output_tf->cache_info.y_bias = y_bias;
for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_dpp; i++) {
// reset the cache status and mark as dirty to let hw layer to re-cache
output_tf->dirty[i] = true;
output_tf->config_cache[i].cached = false;
output_tf->cache_info[i].cm_gamma_type = output_tf->cm_gamma_type;
output_tf->cache_info[i].tf = output_tf->tf;
output_tf->cache_info[i].x_scale = x_scale;
output_tf->cache_info[i].y_scale = y_scale;
output_tf->cache_info[i].y_bias = y_bias;
}
}
}
return ret;
@ -240,28 +244,32 @@ static bool color_update_degamma_tf(struct vpe_priv *vpe_priv,
break;
}
if (vpe_priv->init.debug.disable_lut_caching ||
(input_tf->cache_info.cm_gamma_type != input_tf->cm_gamma_type) ||
(input_tf->cache_info.tf != input_tf->tf) ||
(input_tf->cache_info.x_scale.value != x_scale.value) ||
(input_tf->cache_info.y_scale.value != y_scale.value) ||
(input_tf->cache_info.y_bias.value != y_bias.value)) {
// if gamma points have been previously generated,
// skip the re-gen no matter it was config cached or not
update = true;
for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_dpp; i++) {
if (vpe_priv->init.debug.disable_lut_caching ||
(input_tf->cache_info[i].cm_gamma_type != input_tf->cm_gamma_type) ||
(input_tf->cache_info[i].tf != input_tf->tf) ||
(input_tf->cache_info[i].x_scale.value != x_scale.value) ||
(input_tf->cache_info[i].y_scale.value != y_scale.value) ||
(input_tf->cache_info[i].y_bias.value != y_bias.value)) {
// if gamma points have been previously generated,
// skip the re-gen no matter it was config cached or not
update = true;
}
}
if (update) {
ret = vpe_color_calculate_degamma_params(vpe_priv, x_scale, y_scale, input_tf);
if (ret) {
// reset the cache status and mark as dirty to let hw layer to re-cache
input_tf->dirty = true;
input_tf->config_cache.cached = false;
input_tf->cache_info.cm_gamma_type = input_tf->cm_gamma_type;
input_tf->cache_info.tf = color_input_tf;
input_tf->cache_info.x_scale = x_scale;
input_tf->cache_info.y_scale = y_scale;
input_tf->cache_info.y_bias = y_bias;
for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_dpp; i++) {
// reset the cache status and mark as dirty to let hw layer to re-cache
input_tf->dirty[i] = true;
input_tf->config_cache[i].cached = false;
input_tf->cache_info[i].cm_gamma_type = input_tf->cm_gamma_type;
input_tf->cache_info[i].tf = color_input_tf;
input_tf->cache_info[i].x_scale = x_scale;
input_tf->cache_info[i].y_scale = y_scale;
input_tf->cache_info[i].y_bias = y_bias;
}
}
}
return ret;
@ -673,13 +681,22 @@ enum vpe_status vpe_color_update_3dlut(
if (!enable_3dlut) {
stream_ctx->lut3d_func->state.bits.initialized = 0;
} else {
if (vpe_priv->init.debug.disable_lut_caching ||
(stream_ctx->lut3d_func->cache_info.uid_3dlut != stream_ctx->stream.tm_params.UID)) {
bool update = false;
for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_mpc_3dlut; i++)
if (vpe_priv->init.debug.disable_lut_caching ||
(stream_ctx->lut3d_func->cache_info[i].uid_3dlut !=
stream_ctx->stream.tm_params.UID))
update = true;
if (update) {
vpe_convert_to_tetrahedral(
vpe_priv, stream_ctx->stream.tm_params.lut_data, stream_ctx->lut3d_func);
stream_ctx->lut3d_func->dirty = true;
stream_ctx->lut3d_func->config_cache.cached = false;
stream_ctx->lut3d_func->cache_info.uid_3dlut = stream_ctx->stream.tm_params.UID;
for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_mpc_3dlut; i++) {
stream_ctx->lut3d_func->dirty[i] = true;
stream_ctx->lut3d_func->config_cache[i].cached = false;
stream_ctx->lut3d_func->cache_info[i].uid_3dlut = stream_ctx->stream.tm_params.UID;
}
}
stream_ctx->lut3d_func->state.bits.initialized = 1;
}
@ -812,10 +829,12 @@ enum vpe_status vpe_color_update_shaper(const struct vpe_priv *vpe_priv, uint16_
}
// right now shaper is always programmed with linear, once cached, it is always reused.
if (vpe_priv->init.debug.disable_lut_caching ||
(shaper_func && shaper_func->cache_info.tf != tf)) {
// if the caching has the required data cached, skip the update
update = true;
for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_mpc_3dlut; i++) {
if (vpe_priv->init.debug.disable_lut_caching ||
(shaper_func && shaper_func->cache_info[i].tf != tf)) {
// if the caching has the required data cached, skip the update
update = true;
}
}
shaper_func->type = TF_TYPE_HWPWL;
@ -829,9 +848,11 @@ enum vpe_status vpe_color_update_shaper(const struct vpe_priv *vpe_priv, uint16_
ret = vpe_build_shaper(&shaper_in, &shaper_func->pwl);
if (ret == VPE_STATUS_OK) {
shaper_func->dirty = true;
shaper_func->config_cache.cached = false;
shaper_func->cache_info.tf = tf;
for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_mpc_3dlut; i++) {
shaper_func->dirty[i] = true;
shaper_func->config_cache[i].cached = false;
shaper_func->cache_info[i].tf = tf;
}
}
}
return ret;

View file

@ -61,6 +61,7 @@ struct cdc_funcs {
struct cdc {
struct vpe_priv *vpe_priv;
struct cdc_funcs *funcs;
unsigned int inst;
};
#ifdef __cplusplus

View file

@ -147,6 +147,14 @@ struct transfer_func_distributed_points {
uint16_t x_point_at_y1_blue;
};
struct cache_info {
enum color_transfer_func tf;
enum cm_type cm_gamma_type;
struct fixed31_32 x_scale;
struct fixed31_32 y_scale;
struct fixed31_32 y_bias;
};
struct transfer_func {
enum transfer_func_type type;
enum color_transfer_func tf;
@ -161,16 +169,10 @@ struct transfer_func {
};
// the followings are for optimization: skip if no change
bool dirty; /*< indicate this object is updated or not */
struct config_cache config_cache; /*< used by the hw hook layer to do the caching */
bool dirty[MAX_PIPE]; /*< indicate this object is updated or not */
struct config_cache config_cache[MAX_PIPE]; /*< used by the hw hook layer to do the caching */
struct {
enum color_transfer_func tf;
enum cm_type cm_gamma_type;
struct fixed31_32 x_scale;
struct fixed31_32 y_scale;
struct fixed31_32 y_bias;
} cache_info;
struct cache_info cache_info[MAX_PIPE];
};
enum color_white_point_type {
@ -237,12 +239,12 @@ struct vpe_3dlut {
union vpe_3dlut_state state;
// the followings are for optimization: skip if no change
bool dirty; /*< indicate this object is updated or not */
struct config_cache config_cache; /*< used by the hw hook layer to do the caching */
bool dirty[MAX_3DLUT]; /*< indicate this object is updated or not */
struct config_cache config_cache[MAX_3DLUT]; /*< used by the hw hook layer to do the caching */
struct {
uint64_t uid_3dlut; /*< UID for current 3D LUT params */
} cache_info;
} cache_info[MAX_3DLUT];
};
enum vpe_status vpe_color_update_color_space_and_tf(

View file

@ -73,7 +73,6 @@ struct config_cache {
bool cached;
};
/* A macro that helps cache the config packet, it won't cache if it is in bypass mode
* as bypass mode is not heavy lifting programming.
*
@ -82,26 +81,28 @@ struct config_cache {
* /param disable_cache a flag that controls a caching is needed
* /param is_bypass if it is in bypass, it doesn't cache the bypass config
* /param program_func_call the program call that generate config packet content
* /param inst index to address the config_cache array
*/
#define CONFIG_CACHE(obj_cache, obj_cfg_array, disable_cache, is_bypass, program_func_call) \
#define CONFIG_CACHE(obj_cache, obj_cfg_array, disable_cache, is_bypass, program_func_call, inst) \
{ \
bool use_cache = false; \
\
/* make sure it opens a new config packet */ \
config_writer_force_new_with_type(config_writer, CONFIG_TYPE_DIRECT); \
\
if ((obj_cache) && !disable_cache && (obj_cache)->config_cache.p_buffer && \
(obj_cache)->config_cache.cached && !((obj_cache)->dirty) && !is_bypass) { \
if ((obj_cache) && !disable_cache && (obj_cache)->config_cache[inst].p_buffer && \
(obj_cache)->config_cache[inst].cached && !((obj_cache)->dirty[inst]) && !is_bypass) { \
/* reuse the cache */ \
if (config_writer->buf->size >= (obj_cache)->config_cache.size) { \
if (config_writer->buf->size >= (obj_cache)->config_cache[inst].size) { \
memcpy((void *)(uintptr_t)config_writer->base_cpu_va, \
(obj_cache)->config_cache.p_buffer, \
(size_t)(obj_cache)->config_cache.size); \
(obj_cache)->config_cache[inst].p_buffer, \
(size_t)(obj_cache)->config_cache[inst].size); \
config_writer->buf->cpu_va = \
config_writer->base_cpu_va + (obj_cache)->config_cache.size; \
config_writer->base_cpu_va + (obj_cache)->config_cache[inst].size; \
config_writer->buf->gpu_va = \
config_writer->base_gpu_va + (obj_cache)->config_cache.size; \
config_writer->buf->size -= ((obj_cache)->config_cache.size - sizeof(uint32_t)); \
config_writer->base_gpu_va + (obj_cache)->config_cache[inst].size; \
config_writer->buf->size -= \
((obj_cache)->config_cache[inst].size - sizeof(uint32_t)); \
use_cache = true; \
} \
} \
@ -117,21 +118,21 @@ struct config_cache {
if (!disable_cache && !is_bypass) { \
/* only cache when it is not crossing config packets */ \
if (config_num == (obj_cfg_array)->num_configs) { \
if ((obj_cache)->dirty) { \
if ((obj_cache)->dirty[inst]) { \
uint64_t size = end - start; \
\
if ((obj_cache)->config_cache.size < size) { \
if ((obj_cache)->config_cache.p_buffer) \
vpe_free((obj_cache)->config_cache.p_buffer); \
if ((obj_cache)->config_cache[inst].size < size) { \
if ((obj_cache)->config_cache[inst].p_buffer) \
vpe_free((obj_cache)->config_cache[inst].p_buffer); \
\
(obj_cache)->config_cache.p_buffer = vpe_zalloc((size_t)size); \
if ((obj_cache)->config_cache.p_buffer) { \
memcpy((obj_cache)->config_cache.p_buffer, \
(obj_cache)->config_cache[inst].p_buffer = vpe_zalloc((size_t)size); \
if ((obj_cache)->config_cache[inst].p_buffer) { \
memcpy((obj_cache)->config_cache[inst].p_buffer, \
(void *)(uintptr_t)start, (size_t)size); \
(obj_cache)->config_cache.size = size; \
(obj_cache)->config_cache.cached = true; \
(obj_cache)->config_cache[inst].size = size; \
(obj_cache)->config_cache[inst].cached = true; \
} else { \
(obj_cache)->config_cache.size = 0; \
(obj_cache)->config_cache[inst].size = 0; \
} \
} \
} \
@ -139,7 +140,7 @@ struct config_cache {
} \
} \
if ((obj_cache)) \
(obj_cache)->dirty = false; \
(obj_cache)->dirty[inst] = false; \
}
/* the following macro requires a local variable vpr_priv to be present */

View file

@ -99,6 +99,7 @@ struct dpp_funcs {
struct dpp {
struct vpe_priv *vpe_priv;
struct dpp_funcs *funcs;
unsigned int inst;
struct pwl_params degamma_params;
};

View file

@ -26,6 +26,11 @@
#include "fixed31_32.h"
#define MAX_3DLUT 1
#define MAX_PIPE 2
#define MAX_OUTPUT_PIPE 1
#ifdef __cplusplus
extern "C" {
#endif

View file

@ -171,6 +171,7 @@ struct mpc_funcs {
struct mpc {
struct vpe_priv *vpe_priv;
struct mpc_funcs *funcs;
unsigned int inst;
struct pwl_params regamma_params;
struct pwl_params blender_params;
struct pwl_params shaper_params;

View file

@ -121,6 +121,7 @@ struct opp_funcs {
struct opp {
struct vpe_priv *vpe_priv;
struct opp_funcs *funcs;
unsigned int inst;
};
#ifdef __cplusplus

View file

@ -32,6 +32,7 @@
#include "mpc.h"
#include "opp.h"
#include "vector.h"
#include "hw_shared.h"
#ifdef __cplusplus
extern "C" {
@ -41,8 +42,6 @@ struct vpe_priv;
struct vpe_cmd_info;
struct segment_ctx;
#define MAX_PIPE 2
#define MAX_OUTPUT_PIPE 2
#define MIN_VPE_CMD 1024
enum vpe_cmd_ops;

View file

@ -199,7 +199,8 @@ void vpe_free_stream_ctx(struct vpe_priv *vpe_priv)
for (i = 0; i < vpe_priv->num_streams; i++) {
ctx = &vpe_priv->stream_ctx[i];
if (ctx->input_tf) {
CONFIG_CACHE_FREE(ctx->input_tf->config_cache);
for (int j = 0; j < MAX_PIPE; j++)
CONFIG_CACHE_FREE(ctx->input_tf->config_cache[j]);
vpe_free(ctx->input_tf);
ctx->input_tf = NULL;
}
@ -220,19 +221,22 @@ void vpe_free_stream_ctx(struct vpe_priv *vpe_priv)
}
if (ctx->in_shaper_func) {
CONFIG_CACHE_FREE(ctx->in_shaper_func->config_cache);
for (int j = 0; j < MAX_PIPE; j++)
CONFIG_CACHE_FREE(ctx->in_shaper_func->config_cache[j]);
vpe_free(ctx->in_shaper_func);
ctx->in_shaper_func = NULL;
}
if (ctx->blend_tf) {
CONFIG_CACHE_FREE(ctx->blend_tf->config_cache);
for (int j = 0; j < MAX_PIPE; j++)
CONFIG_CACHE_FREE(ctx->blend_tf->config_cache[j]);
vpe_free(ctx->blend_tf);
ctx->blend_tf = NULL;
}
if (ctx->lut3d_func) {
CONFIG_CACHE_FREE(ctx->lut3d_func->config_cache);
for (int j = 0; j < MAX_3DLUT; j++)
CONFIG_CACHE_FREE(ctx->lut3d_func->config_cache[j]);
vpe_free(ctx->lut3d_func);
ctx->lut3d_func = NULL;
}
@ -264,6 +268,7 @@ void vpe_pipe_reset(struct vpe_priv *vpe_priv)
for (i = 0; i < vpe_priv->num_pipe; i++) {
pipe_ctx = &vpe_priv->pipe_ctx[i];
pipe_ctx->pipe_idx = i;
pipe_ctx->is_top_pipe = true;
pipe_ctx->owner = PIPE_CTX_NO_OWNER;
pipe_ctx->top_pipe_idx = 0xff;