From 2e46c414481269c2f928da6d2e439d9cf895febd Mon Sep 17 00:00:00 2001 From: Peyton Lee Date: Tue, 25 Feb 2025 16:51:21 +0800 Subject: [PATCH] amd/gmlib: add gmlib for radeonsi radeonsi drivers can use gmlib to generate 3dlut used to do tonemapping. Signed-off-by: Peyton Lee Part-of: --- src/amd/gmlib/README.md | 1 + .../ToneMapGenerator/inc/ToneMapGenerator.h | 45 + .../gmlib/ToneMapGenerator/inc/ToneMapTypes.h | 73 + .../ToneMapGenerator/src/inc/AGMGenerator.h | 39 + .../ToneMapGenerator/src/inc/CSCGenerator.h | 176 ++ .../ToneMapGenerator/src/src/AGMGenerator.c | 261 +++ .../src/src/ToneMapGenerator.c | 354 ++++ src/amd/gmlib/gm/cs_funcs.c | 1418 ++++++++++++++++ src/amd/gmlib/gm/cs_funcs.h | 273 +++ src/amd/gmlib/gm/csc_api_funcs.c | 75 + src/amd/gmlib/gm/csc_api_funcs.h | 41 + src/amd/gmlib/gm/csc_funcs.c | 56 + src/amd/gmlib/gm/csc_funcs.h | 41 + src/amd/gmlib/gm/cvd_api_funcs.c | 85 + src/amd/gmlib/gm/cvd_api_funcs.h | 42 + src/amd/gmlib/gm/cvd_funcs.c | 132 ++ src/amd/gmlib/gm/cvd_funcs.h | 57 + src/amd/gmlib/gm/gm_api_funcs.c | 194 +++ src/amd/gmlib/gm/gm_api_funcs.h | 79 + src/amd/gmlib/gm/gm_funcs.c | 1492 +++++++++++++++++ src/amd/gmlib/gm/gm_funcs.h | 299 ++++ src/amd/gmlib/gm/mat_funcs.c | 918 ++++++++++ src/amd/gmlib/gm/mat_funcs.h | 143 ++ src/amd/gmlib/meson.build | 65 + src/amd/gmlib/tonemap_adaptor.c | 78 + src/amd/gmlib/tonemap_adaptor.h | 33 + src/amd/meson.build | 1 + 27 files changed, 6471 insertions(+) create mode 100755 src/amd/gmlib/README.md create mode 100755 src/amd/gmlib/ToneMapGenerator/inc/ToneMapGenerator.h create mode 100755 src/amd/gmlib/ToneMapGenerator/inc/ToneMapTypes.h create mode 100755 src/amd/gmlib/ToneMapGenerator/src/inc/AGMGenerator.h create mode 100755 src/amd/gmlib/ToneMapGenerator/src/inc/CSCGenerator.h create mode 100755 src/amd/gmlib/ToneMapGenerator/src/src/AGMGenerator.c create mode 100755 src/amd/gmlib/ToneMapGenerator/src/src/ToneMapGenerator.c create mode 100755 src/amd/gmlib/gm/cs_funcs.c create mode 100755 src/amd/gmlib/gm/cs_funcs.h create mode 100755 src/amd/gmlib/gm/csc_api_funcs.c create mode 100755 src/amd/gmlib/gm/csc_api_funcs.h create mode 100755 src/amd/gmlib/gm/csc_funcs.c create mode 100755 src/amd/gmlib/gm/csc_funcs.h create mode 100755 src/amd/gmlib/gm/cvd_api_funcs.c create mode 100755 src/amd/gmlib/gm/cvd_api_funcs.h create mode 100755 src/amd/gmlib/gm/cvd_funcs.c create mode 100755 src/amd/gmlib/gm/cvd_funcs.h create mode 100755 src/amd/gmlib/gm/gm_api_funcs.c create mode 100755 src/amd/gmlib/gm/gm_api_funcs.h create mode 100755 src/amd/gmlib/gm/gm_funcs.c create mode 100755 src/amd/gmlib/gm/gm_funcs.h create mode 100755 src/amd/gmlib/gm/mat_funcs.c create mode 100755 src/amd/gmlib/gm/mat_funcs.h create mode 100755 src/amd/gmlib/meson.build create mode 100755 src/amd/gmlib/tonemap_adaptor.c create mode 100755 src/amd/gmlib/tonemap_adaptor.h diff --git a/src/amd/gmlib/README.md b/src/amd/gmlib/README.md new file mode 100755 index 00000000000..8f84f009122 --- /dev/null +++ b/src/amd/gmlib/README.md @@ -0,0 +1 @@ +# GMLib \ No newline at end of file diff --git a/src/amd/gmlib/ToneMapGenerator/inc/ToneMapGenerator.h b/src/amd/gmlib/ToneMapGenerator/inc/ToneMapGenerator.h new file mode 100755 index 00000000000..228ec295889 --- /dev/null +++ b/src/amd/gmlib/ToneMapGenerator/inc/ToneMapGenerator.h @@ -0,0 +1,45 @@ +/* Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + * Authors: AMD + * + */ + +#pragma once +#include "ToneMapTypes.h" +#include "AGMGenerator.h" + +struct SrcTmParams { + struct ToneMapHdrMetaData streamMetaData; + enum ToneMapTransferFunction inputContainerGamma; +}; + +struct DstTmParams { + struct ToneMapHdrMetaData dstMetaData; + enum ToneMapTransferFunction outputContainerGamma; + enum ToneMapColorPrimaries outputContainerPrimaries; +}; + +struct ToneMapGenerator { + struct AGMGenerator agmGenerator; + enum ToneMapAlgorithm tmAlgo; + bool memAllocSet; + struct SrcTmParams cachedSrcTmParams; + struct DstTmParams cachedDstTmParams; +}; + +enum TMGReturnCode ToneMapGenerator_GenerateToneMappingParameters( + struct ToneMapGenerator* p_tmGenerator, + const struct ToneMapHdrMetaData* streamMetaData, + const struct ToneMapHdrMetaData* dstMetaData, + enum ToneMapTransferFunction inputContainerGamma, + enum ToneMapTransferFunction outputContainerGamma, + enum ToneMapColorPrimaries outputContainerPrimaries, + unsigned short lutDim, + struct ToneMappingParameters* tmParams); + +enum TMGReturnCode ToneMapGenerator_SetInternalAllocators( + struct ToneMapGenerator* p_tmGenerator, + TMGAlloc allocFunc, + TMGFree freeFunc, + void* memCtx); diff --git a/src/amd/gmlib/ToneMapGenerator/inc/ToneMapTypes.h b/src/amd/gmlib/ToneMapGenerator/inc/ToneMapTypes.h new file mode 100755 index 00000000000..ad08b6942b4 --- /dev/null +++ b/src/amd/gmlib/ToneMapGenerator/inc/ToneMapTypes.h @@ -0,0 +1,73 @@ +/* Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + * Authors: AMD + * + */ + +#pragma once +#include + +#define MAX_LUMINANCE 10000.0 +#define INPUT_NORMALIZATION_FACTOR 4000 //nits +typedef void* (*TMGAlloc)(unsigned int, void*); +typedef void (*TMGFree)(void*, void*); + +struct ToneMapHdrMetaData +{ + unsigned short redPrimaryX; + unsigned short redPrimaryY; + unsigned short greenPrimaryX; + unsigned short greenPrimaryY; + unsigned short bluePrimaryX; + unsigned short bluePrimaryY; + unsigned short whitePointX; + unsigned short whitePointY; + unsigned int maxMasteringLuminance; + unsigned int minMasteringLuminance; + unsigned short maxContentLightLevel; + unsigned short maxFrameAverageLightLevel; +}; + +enum ToneMapTransferFunction { + TMG_TF_SRGB, + TMG_TF_BT709, + TMG_TF_G24, + TMG_TF_PQ, + TMG_TF_NormalizedPQ, + TMG_TF_ModifiedPQ, + TMG_TF_Linear, + TMG_TF_HLG +}; + +enum ToneMapColorPrimaries { + TMG_CP_BT601, + TMG_CP_BT709, + TMG_CP_BT2020, + TMG_CP_DCIP3 +}; + +enum ToneMapAlgorithm { + TMG_A_AGM, + TMG_A_BT2390, + TMG_A_BT2390_4 +}; + +struct ToneMappingParameters { + enum ToneMapColorPrimaries lutColorIn; + enum ToneMapColorPrimaries lutColorOut; + enum ToneMapTransferFunction shaperTf; + enum ToneMapTransferFunction lutOutTf; + unsigned short lutDim; + unsigned short* lutData; + void* formattedLutData; + unsigned short inputNormalizationFactor; +}; + +enum TMGReturnCode { + TMG_RET_OK, + TMG_RET_ERROR_DUPLICATE_INIT, + TMG_RET_ERROR_INVALID_PARAM, + TMG_RET_ERROR_NOT_INITIALIZED, + TMG_RET_ERROR_GMLIB +}; diff --git a/src/amd/gmlib/ToneMapGenerator/src/inc/AGMGenerator.h b/src/amd/gmlib/ToneMapGenerator/src/inc/AGMGenerator.h new file mode 100755 index 00000000000..031d54219d5 --- /dev/null +++ b/src/amd/gmlib/ToneMapGenerator/src/inc/AGMGenerator.h @@ -0,0 +1,39 @@ +/* Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + * Authors: AMD + * + */ + +#pragma once +#include "ToneMapTypes.h" +#include "gm_api_funcs.h" + +/* Replace CPP class: AGMGenerator */ +struct AGMGenerator { + TMGAlloc allocFunc; + TMGFree freeFunc; + void* memoryContext; + bool initalized; + struct s_gamut_map agmParams; + struct s_gm_opts gamutMapParams; +}; + +enum TMGReturnCode AGMGenerator_ApplyToneMap( + struct AGMGenerator* p_agm_generator, + const struct ToneMapHdrMetaData* streamMetaData, + const struct ToneMapHdrMetaData* dtMetaData, + const enum ToneMapAlgorithm tmAlgorithm, + const struct ToneMappingParameters* tmParams, + bool updateSrcParams, + bool updateDstParams, + bool enableMerge3DLUT); + +enum TMGReturnCode AGMGenerator_SetGMAllocator( + struct AGMGenerator* p_agm_generator, + TMGAlloc allocFunc, + TMGFree freeFunc, + void* memCtx); + +/* Replace ~AGMGenerator() */ +void AGMGenerator_Exit(struct AGMGenerator* p_agm_generator); diff --git a/src/amd/gmlib/ToneMapGenerator/src/inc/CSCGenerator.h b/src/amd/gmlib/ToneMapGenerator/src/inc/CSCGenerator.h new file mode 100755 index 00000000000..9c0b795c564 --- /dev/null +++ b/src/amd/gmlib/ToneMapGenerator/src/inc/CSCGenerator.h @@ -0,0 +1,176 @@ +/* Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + * Authors: AMD + * + */ + +#pragma once +#include "csc_api_funcs.h" +#include "ToneMapTypes.h" + +static bool TranslateTfEnum( + enum ToneMapTransferFunction inTf, + enum cs_gamma_type* outTf) +{ + + switch (inTf) { + case(TMG_TF_SRGB): + *outTf = EGT_sRGB; + break; + case(TMG_TF_BT709): + *outTf = EGT_709; + break; + case(TMG_TF_G24): + *outTf = EGT_2_4; + break; + case(TMG_TF_HLG): + *outTf = EGT_HLG; + break; + case(TMG_TF_NormalizedPQ): + case(TMG_TF_PQ): + *outTf = EGT_PQ; + break; + default: + return false; + } + return true; +} + +static void CSCCtor(struct s_csc_map* csc_map) +{ + csc_ctor(csc_map); +} + +static enum TMGReturnCode CSCSetOptions( + const struct ToneMapHdrMetaData* srcMetaData, + enum ToneMapTransferFunction inTf, + const struct ToneMapHdrMetaData* dstMetaData, + enum ToneMapTransferFunction outTf, + const struct ToneMappingParameters* tmParams, + bool merge3DLUT, + struct s_csc_api_opts* csc_opts) +{ + + enum TMGReturnCode ret = TMG_RET_OK; + enum cs_gamma_type inGamma; + enum cs_gamma_type outGamma; + + if (!TranslateTfEnum(inTf, &inGamma)) { + ret = TMG_RET_ERROR_INVALID_PARAM; + goto exit; + } + + if(!TranslateTfEnum(outTf, &outGamma)) { + ret = TMG_RET_ERROR_INVALID_PARAM; + goto exit; + } + + csc_opts->ptr_3dlut_rgb = tmParams->lutData; + csc_opts->num_pnts_3dlut = tmParams->lutDim; + csc_opts->bitwidth_3dlut = 12; + csc_opts->en_merge_3dlut = merge3DLUT; + + + csc_opts->cs_opts_src.color_space_type = ECST_CUSTOM; + csc_opts->cs_opts_src.rgbw_xy[0] = + srcMetaData->redPrimaryX / 50000.0; + csc_opts->cs_opts_src.rgbw_xy[1] = + srcMetaData->redPrimaryY / 50000.0; + csc_opts->cs_opts_src.rgbw_xy[2] = + srcMetaData->greenPrimaryX / 50000.0; + csc_opts->cs_opts_src.rgbw_xy[3] = + srcMetaData->greenPrimaryY / 50000.0; + csc_opts->cs_opts_src.rgbw_xy[4] = + srcMetaData->bluePrimaryX / 50000.0; + csc_opts->cs_opts_src.rgbw_xy[5] = + srcMetaData->bluePrimaryY / 50000.0; + csc_opts->cs_opts_src.rgbw_xy[6] = + srcMetaData->whitePointX / 50000.0; + csc_opts->cs_opts_src.rgbw_xy[7] = + srcMetaData->whitePointY / 50000.0; + + csc_opts->cs_opts_src.gamma_type = inGamma; + csc_opts->cs_opts_src.luminance_limits[0] = 0.0; + csc_opts->cs_opts_src.luminance_limits[1] = + (double)srcMetaData->maxMasteringLuminance; + + if (inTf == TMG_TF_NormalizedPQ) + csc_opts->cs_opts_src.pq_norm = (double)tmParams->inputNormalizationFactor; + else + csc_opts->cs_opts_src.pq_norm = MAX_LUMINANCE; + + + csc_opts->cs_opts_dst.color_space_type = ECST_CUSTOM; + csc_opts->cs_opts_dst.rgbw_xy[0] = + dstMetaData->redPrimaryX / 50000.0; + csc_opts->cs_opts_dst.rgbw_xy[1] = + dstMetaData->redPrimaryY / 50000.0; + csc_opts->cs_opts_dst.rgbw_xy[2] = + dstMetaData->greenPrimaryX / 50000.0; + csc_opts->cs_opts_dst.rgbw_xy[3] = + dstMetaData->greenPrimaryY / 50000.0; + csc_opts->cs_opts_dst.rgbw_xy[4] = + dstMetaData->bluePrimaryX / 50000.0; + csc_opts->cs_opts_dst.rgbw_xy[5] = + dstMetaData->bluePrimaryY / 50000.0; + csc_opts->cs_opts_dst.rgbw_xy[6] = + dstMetaData->whitePointX / 50000.0; + csc_opts->cs_opts_dst.rgbw_xy[7] = + dstMetaData->whitePointY / 50000.0; + + csc_opts->cs_opts_dst.gamma_type = outGamma; + csc_opts->cs_opts_dst.luminance_limits[0] = 0.0; + csc_opts->cs_opts_dst.luminance_limits[1] = + (double)dstMetaData->maxMasteringLuminance; + + if (outTf == TMG_TF_NormalizedPQ) + csc_opts->cs_opts_dst.pq_norm = (double)tmParams->inputNormalizationFactor; + else + csc_opts->cs_opts_dst.pq_norm = MAX_LUMINANCE; + + exit: + return ret; +} + +static void CSCSetDefault(struct s_csc_api_opts* csc_opts) +{ + csc_api_set_def(csc_opts); +} + +static void CSCGenerateMap(struct s_csc_api_opts* csc_opts, struct s_csc_map* csc_map) +{ + csc_api_gen_map(csc_opts, csc_map); +} + +static enum TMGReturnCode CSCGenerate3DLUT(struct s_csc_api_opts* csc_opts, struct s_csc_map* csc_map) +{ + int retcode = csc_api_gen_3dlut(csc_opts, csc_map); + + return retcode ? TMG_RET_ERROR_GMLIB : TMG_RET_OK; +} + +static enum TMGReturnCode CSCGenerator_ApplyCSC( + const struct ToneMapHdrMetaData* srcMetaData, + enum ToneMapTransferFunction inTf, + const struct ToneMapHdrMetaData* dstMetaData, + enum ToneMapTransferFunction outTf, + struct ToneMappingParameters* tmParams, + bool enable3DLUTMerge) +{ + struct s_csc_map csc_map; + struct s_csc_api_opts csc_opts; + + CSCCtor(&csc_map); + CSCSetDefault(&csc_opts); + CSCSetOptions(srcMetaData, + inTf, + dstMetaData, + outTf, + tmParams, + enable3DLUTMerge, + &csc_opts); + CSCGenerateMap(&csc_opts, &csc_map); + + return CSCGenerate3DLUT(&csc_opts, &csc_map); +} diff --git a/src/amd/gmlib/ToneMapGenerator/src/src/AGMGenerator.c b/src/amd/gmlib/ToneMapGenerator/src/src/AGMGenerator.c new file mode 100755 index 00000000000..4a002163794 --- /dev/null +++ b/src/amd/gmlib/ToneMapGenerator/src/src/AGMGenerator.c @@ -0,0 +1,261 @@ +/* Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + * Authors: AMD + * + */ + +#include "AGMGenerator.h" + +// Function declaration +void AGMGenerator_GMCtor(struct AGMGenerator* p_agm_generator); +void AGMGenerator_GMSetDefault(struct AGMGenerator* p_agm_generator); +enum TMGReturnCode AGMGenerator_SetAgmOptions( + struct AGMGenerator* p_agm_generator, + const struct ToneMapHdrMetaData* srcMetaData, + const struct ToneMapHdrMetaData* dstMetaData, + const enum ToneMapAlgorithm tmAlgorithm, + const struct ToneMappingParameters* tmParams, + bool updateSrcParams, + bool updateDstParams, + bool enableMerge3DLUT); +enum TMGReturnCode AGMGenerator_GMGenerateMap(struct AGMGenerator* p_agm_generator); +enum TMGReturnCode AGMGenerator_GMGenerate3DLUT(struct AGMGenerator* p_agm_generator); + +static bool TranslateTfEnum( + enum ToneMapTransferFunction inTf, + enum cs_gamma_type* outTf) +{ + + switch (inTf) { + case(TMG_TF_SRGB): + *outTf = EGT_sRGB; + break; + case(TMG_TF_BT709): + *outTf = EGT_709; + break; + case(TMG_TF_G24): + *outTf = EGT_2_4; + break; + case(TMG_TF_HLG): + *outTf = EGT_HLG; + break; + case(TMG_TF_NormalizedPQ): + case(TMG_TF_PQ): + *outTf = EGT_PQ; + break; + default: + return false; + } + return true; +} + +enum TMGReturnCode AGMGenerator_SetGMAllocator( + struct AGMGenerator* p_agm_generator, + TMGAlloc allocFunc, + TMGFree freeFunc, + void* memCtx) +{ + p_agm_generator->allocFunc = allocFunc; + p_agm_generator->freeFunc = freeFunc; + p_agm_generator->memoryContext = memCtx; + return TMG_RET_OK; +} + +enum TMGReturnCode AGMGenerator_ApplyToneMap( + struct AGMGenerator* p_agm_generator, + const struct ToneMapHdrMetaData* streamMetaData, + const struct ToneMapHdrMetaData* dstMetaData, + const enum ToneMapAlgorithm tmAlgorithm, + const struct ToneMappingParameters* tmParams, + bool updateSrcParams, + bool updateDstParams, + bool enableMerge3DLUT) +{ + enum TMGReturnCode ret = TMG_RET_OK; + + if (!p_agm_generator->initalized) { + AGMGenerator_GMCtor(p_agm_generator); + AGMGenerator_GMSetDefault(p_agm_generator); + p_agm_generator->initalized = true; + } + + if ((ret = AGMGenerator_SetAgmOptions( + p_agm_generator, + streamMetaData, + dstMetaData, + tmAlgorithm, + tmParams, + updateSrcParams, + updateDstParams, + enableMerge3DLUT)) != TMG_RET_OK) + goto exit; + + if ((ret = AGMGenerator_GMGenerateMap(p_agm_generator)) != TMG_RET_OK) + goto exit; + + if ((ret = AGMGenerator_GMGenerate3DLUT(p_agm_generator)) != TMG_RET_OK) + goto exit; + +exit: + return ret; +} + +enum TMGReturnCode AGMGenerator_SetAgmOptions( + struct AGMGenerator* p_agm_generator, + const struct ToneMapHdrMetaData* srcMetaData, + const struct ToneMapHdrMetaData* dstMetaData, + const enum ToneMapAlgorithm tmAlgorithm, + const struct ToneMappingParameters* tmParams, + bool updateSrcParams, + bool updateDstParams, + bool enableMerge3DLUT) +{ + enum TMGReturnCode ret = TMG_RET_OK; + enum cs_gamma_type inGamma; + enum cs_gamma_type outGamma; + + if (!TranslateTfEnum(tmParams->shaperTf, &inGamma)) { + ret = TMG_RET_ERROR_INVALID_PARAM; + goto exit; + } + + if (!TranslateTfEnum(tmParams->lutOutTf, &outGamma)) { + ret = TMG_RET_ERROR_INVALID_PARAM; + goto exit; + } + + if (tmAlgorithm == TMG_A_AGM) { + p_agm_generator->gamutMapParams.gamut_map_mode = EGMM_TM_CHTO; + p_agm_generator->gamutMapParams.hue_rot_mode = EHRM_HR; + } + else { + p_agm_generator->gamutMapParams.gamut_map_mode = EGMM_TM; + p_agm_generator->gamutMapParams.hue_rot_mode = EHRM_NONE; + } + + p_agm_generator->gamutMapParams.update_msk = updateSrcParams ? GM_UPDATE_SRC : 0; + p_agm_generator->gamutMapParams.update_msk = updateDstParams ? (p_agm_generator->gamutMapParams.update_msk | GM_UPDATE_DST) : p_agm_generator->gamutMapParams.update_msk; + + p_agm_generator->gamutMapParams.ptr_3dlut_rgb = tmParams->lutData; + p_agm_generator->gamutMapParams.num_pnts_3dlut = tmParams->lutDim; + p_agm_generator->gamutMapParams.bitwidth_3dlut = 12; + p_agm_generator->gamutMapParams.en_merge_3dlut = enableMerge3DLUT; + p_agm_generator->gamutMapParams.mode = GM_PQTAB_GBD; + p_agm_generator->gamutMapParams.en_tm_scale_color = 1; + p_agm_generator->gamutMapParams.num_hue_pnts = GM_NUM_HUE; + p_agm_generator->gamutMapParams.num_edge_pnts = GM_NUM_EDGE; + p_agm_generator->gamutMapParams.num_int_pnts = GM_NUM_INT; + p_agm_generator->gamutMapParams.org2_perc_c = GM_ORG2_PERC; + p_agm_generator->gamutMapParams.step_samp = 0.0005; // GM_STEP_SAMP = 0.0001; + p_agm_generator->gamutMapParams.show_pix_mode = ESPM_NONE; + + for (int i = 0; i < GM_NUM_PRIM; i++) { + p_agm_generator->gamutMapParams.vec_org1_factor[i] = gm_vec_org13_factor_def[i][0]; + p_agm_generator->gamutMapParams.vec_org3_factor[i] = gm_vec_org13_factor_def[i][1]; + } + + p_agm_generator->gamutMapParams.cs_opts_src.color_space_type = ECST_CUSTOM; + p_agm_generator->gamutMapParams.cs_opts_src.rgbw_xy[0] = + srcMetaData->redPrimaryX / 50000.0; + p_agm_generator->gamutMapParams.cs_opts_src.rgbw_xy[1] = + srcMetaData->redPrimaryY / 50000.0; + p_agm_generator->gamutMapParams.cs_opts_src.rgbw_xy[2] = + srcMetaData->greenPrimaryX / 50000.0; + p_agm_generator->gamutMapParams.cs_opts_src.rgbw_xy[3] = + srcMetaData->greenPrimaryY / 50000.0; + p_agm_generator->gamutMapParams.cs_opts_src.rgbw_xy[4] = + srcMetaData->bluePrimaryX / 50000.0; + p_agm_generator->gamutMapParams.cs_opts_src.rgbw_xy[5] = + srcMetaData->bluePrimaryY / 50000.0; + p_agm_generator->gamutMapParams.cs_opts_src.rgbw_xy[6] = + srcMetaData->whitePointX / 50000.0; + p_agm_generator->gamutMapParams.cs_opts_src.rgbw_xy[7] = + srcMetaData->whitePointY / 50000.0; + + p_agm_generator->gamutMapParams.cs_opts_src.gamma_type = inGamma; + p_agm_generator->gamutMapParams.cs_opts_src.luminance_limits[0] = 0; + p_agm_generator->gamutMapParams.cs_opts_src.luminance_limits[1] = + (double)srcMetaData->maxMasteringLuminance; + + if (tmParams->shaperTf == TMG_TF_NormalizedPQ) { + p_agm_generator->gamutMapParams.cs_opts_src.pq_norm = (double)tmParams->inputNormalizationFactor; + } + else { + p_agm_generator->gamutMapParams.cs_opts_src.pq_norm = MAX_LUMINANCE; + } + + + p_agm_generator->gamutMapParams.cs_opts_dst.color_space_type = ECST_CUSTOM; + p_agm_generator->gamutMapParams.cs_opts_dst.rgbw_xy[0] = + dstMetaData->redPrimaryX / 50000.0; + p_agm_generator->gamutMapParams.cs_opts_dst.rgbw_xy[1] = + dstMetaData->redPrimaryY / 50000.0; + p_agm_generator->gamutMapParams.cs_opts_dst.rgbw_xy[2] = + dstMetaData->greenPrimaryX / 50000.0; + p_agm_generator->gamutMapParams.cs_opts_dst.rgbw_xy[3] = + dstMetaData->greenPrimaryY / 50000.0; + p_agm_generator->gamutMapParams.cs_opts_dst.rgbw_xy[4] = + dstMetaData->bluePrimaryX / 50000.0; + p_agm_generator->gamutMapParams.cs_opts_dst.rgbw_xy[5] = + dstMetaData->bluePrimaryY / 50000.0; + p_agm_generator->gamutMapParams.cs_opts_dst.rgbw_xy[6] = + dstMetaData->whitePointX / 50000.0; + p_agm_generator->gamutMapParams.cs_opts_dst.rgbw_xy[7] = + dstMetaData->whitePointY / 50000.0; + + p_agm_generator->gamutMapParams.cs_opts_dst.gamma_type = outGamma; + p_agm_generator->gamutMapParams.cs_opts_dst.mode = 0; + p_agm_generator->gamutMapParams.cs_opts_dst.luminance_limits[0] = 0; + p_agm_generator->gamutMapParams.cs_opts_dst.luminance_limits[1] = + (double)dstMetaData->maxMasteringLuminance; + + if (tmParams->lutOutTf == TMG_TF_NormalizedPQ) { + p_agm_generator->gamutMapParams.cs_opts_dst.pq_norm = (double)tmParams->inputNormalizationFactor; + } + else { + p_agm_generator->gamutMapParams.cs_opts_dst.pq_norm = MAX_LUMINANCE; + } + + // Correct Luminance Bounds if Neccessary + if (p_agm_generator->gamutMapParams.cs_opts_src.luminance_limits[0] > p_agm_generator->gamutMapParams.cs_opts_dst.luminance_limits[0]) { + p_agm_generator->gamutMapParams.cs_opts_src.luminance_limits[0] = p_agm_generator->gamutMapParams.cs_opts_dst.luminance_limits[0]; + p_agm_generator->gamutMapParams.update_msk |= GM_UPDATE_SRC; + } + if (p_agm_generator->gamutMapParams.cs_opts_src.luminance_limits[1] < p_agm_generator->gamutMapParams.cs_opts_dst.luminance_limits[1]) { + p_agm_generator->gamutMapParams.cs_opts_src.luminance_limits[1] = p_agm_generator->gamutMapParams.cs_opts_dst.luminance_limits[1]; + p_agm_generator->gamutMapParams.update_msk |= GM_UPDATE_SRC; + } + +exit: + return ret; +} + +void AGMGenerator_GMSetDefault(struct AGMGenerator* p_agm_generator) +{ + gm_api_set_def(&p_agm_generator->gamutMapParams); +} + +enum TMGReturnCode AGMGenerator_GMGenerateMap(struct AGMGenerator* p_agm_generator) +{ + int retcode = gm_api_gen_map(&p_agm_generator->gamutMapParams, &p_agm_generator->agmParams); + + return retcode ? TMG_RET_ERROR_GMLIB : TMG_RET_OK; +} + +enum TMGReturnCode AGMGenerator_GMGenerate3DLUT(struct AGMGenerator* p_agm_generator) +{ + int retcode = gm_api_gen_3dlut(&p_agm_generator->gamutMapParams, &p_agm_generator->agmParams); + + return retcode ? TMG_RET_ERROR_GMLIB : TMG_RET_OK; +} + +void AGMGenerator_GMCtor(struct AGMGenerator* p_agm_generator) +{ + gm_ctor(&p_agm_generator->agmParams, p_agm_generator->allocFunc, p_agm_generator->freeFunc, p_agm_generator->memoryContext); +} + +void AGMGenerator_Exit(struct AGMGenerator* p_agm_generator) +{ + gm_dtor(&p_agm_generator->agmParams); +} \ No newline at end of file diff --git a/src/amd/gmlib/ToneMapGenerator/src/src/ToneMapGenerator.c b/src/amd/gmlib/ToneMapGenerator/src/src/ToneMapGenerator.c new file mode 100755 index 00000000000..8677f5d3354 --- /dev/null +++ b/src/amd/gmlib/ToneMapGenerator/src/src/ToneMapGenerator.c @@ -0,0 +1,354 @@ +/* Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + * Authors: AMD + * + */ + +#include "ToneMapGenerator.h" +#include "AGMGenerator.h" +#include "CSCGenerator.h" +#include +#include + +/* Defines comes from ColorPrimaryTable.h */ +struct ToneMapHdrMetaData BT2020Container = { + (unsigned short)(0.708 * 50000), (unsigned short)(0.292 * 50000), + (unsigned short)(0.17 * 50000), (unsigned short)(0.797 * 50000), + (unsigned short)(0.131 * 50000), (unsigned short)(0.046 * 50000), + (unsigned short)(0.3127 * 50000), (unsigned short)(0.3290 * 50000), + (unsigned int)(10000 * 10000), (unsigned int)(0.05 * 10000), + (unsigned short)10000, + (unsigned short)10000 +}; + +struct ToneMapHdrMetaData DCIP3Container = { + (unsigned short)(0.68 * 50000), (unsigned short)(0.32 * 50000), + (unsigned short)(0.265 * 50000), (unsigned short)(0.69 * 50000), + (unsigned short)(0.15 * 50000), (unsigned short)(0.06 * 50000), + (unsigned short)(0.3127 * 50000), (unsigned short)(0.3290 * 50000), + (unsigned int)(10000 * 10000), (unsigned int)(0.05 * 10000), + (unsigned short)10000, + (unsigned short)10000 +}; + +struct ToneMapHdrMetaData BT709Container = { + (unsigned short)(0.64 * 50000), (unsigned short)(0.33 * 50000), + (unsigned short)(0.30 * 50000), (unsigned short)(0.60 * 50000), + (unsigned short)(0.15 * 50000), (unsigned short)(0.06 * 50000), + (unsigned short)(0.3127 * 50000), (unsigned short)(0.3290 * 50000), + (unsigned int)(10000 * 10000), (unsigned int)(0.05 * 10000), + (unsigned short)10000, + (unsigned short)10000 +}; + +struct ToneMapHdrMetaData BT601Container = { + (unsigned short)(0.63 * 50000), (unsigned short)(0.34 * 50000), + (unsigned short)(0.31 * 50000), (unsigned short)(0.595 * 50000), + (unsigned short)(0.155 * 50000), (unsigned short)(0.07 * 50000), + (unsigned short)(0.3127 * 50000), (unsigned short)(0.3290 * 50000), + (unsigned int)(10000 * 10000), (unsigned int)(0.05 * 10000), + (unsigned short)10000, + (unsigned short)10000 +}; + + +//Function declaration +enum ToneMapColorPrimaries ToneMapGenerator_GetLutColorIn(void); +enum ToneMapColorPrimaries ToneMapGenerator_GetLutColorOut( + enum ToneMapTransferFunction outputContainerGamma, + enum ToneMapColorPrimaries outputContainerPrimaries); +enum ToneMapTransferFunction ToneMapGenerator_GetShaperTf( + enum ToneMapTransferFunction inputContainerGamma); +enum ToneMapTransferFunction ToneMapGenerator_GetLutOutTf( + enum ToneMapTransferFunction outputContainerGamma, + enum ToneMapColorPrimaries outputContainerPrimaries); +unsigned short ToneMapGenerator_GetInputNormFactor( + const struct ToneMapHdrMetaData* streamMetaData); +bool ToneMapGenerator_CacheSrcTmParams( + struct ToneMapGenerator* p_tmGenerator, + const struct ToneMapHdrMetaData* streamMetaData, + enum ToneMapTransferFunction inputContainerGamma); +bool ToneMapGenerator_CacheDstTmParams( + struct ToneMapGenerator* p_tmGenerator, + const struct ToneMapHdrMetaData* dstMetaData, + enum ToneMapTransferFunction outputContainerGamma, + enum ToneMapColorPrimaries outputContainerPrimaries); +enum TMGReturnCode ToneMapGenerator_GenerateLutData( + struct ToneMapGenerator* p_tmGenerator, + const struct ToneMapHdrMetaData* streamMetaData, + const struct ToneMapHdrMetaData* dstMetaData, + enum ToneMapAlgorithm tmAlgorithm, + bool updateSrcParams, + bool updateDstParams, + struct ToneMappingParameters* tmParams); +struct ToneMapHdrMetaData ToneMapGenerator_GetColorContainerData( + enum ToneMapColorPrimaries containerColor); +bool ToneMapGenerator_ContentEqualsContainer( + const struct ToneMapHdrMetaData* contentMetaData, + const struct ToneMapHdrMetaData* containerPrimaries); + + +enum TMGReturnCode ToneMapGenerator_GenerateToneMappingParameters( + struct ToneMapGenerator* p_tmGenerator, + const struct ToneMapHdrMetaData* streamMetaData, + const struct ToneMapHdrMetaData* dstMetaData, + enum ToneMapTransferFunction inputContainerGamma, + enum ToneMapTransferFunction outputContainerGamma, + enum ToneMapColorPrimaries outputContainerPrimaries, + unsigned short lutDim, + struct ToneMappingParameters* tmParams) +{ + + enum TMGReturnCode ret = TMG_RET_OK; + bool updateSrcParams; + bool updateDstParams; + + if (!p_tmGenerator->memAllocSet) { + ret = TMG_RET_ERROR_NOT_INITIALIZED; + goto exit; + } + + tmParams->lutOutTf = ToneMapGenerator_GetLutOutTf(outputContainerGamma, outputContainerPrimaries); + tmParams->lutColorIn = ToneMapGenerator_GetLutColorIn(); + tmParams->lutColorOut = ToneMapGenerator_GetLutColorOut(outputContainerGamma, outputContainerPrimaries); + tmParams->shaperTf = ToneMapGenerator_GetShaperTf(inputContainerGamma); + tmParams->formattedLutData = NULL; + tmParams->lutDim = lutDim; + tmParams->inputNormalizationFactor = ToneMapGenerator_GetInputNormFactor(streamMetaData); + + updateSrcParams = ToneMapGenerator_CacheSrcTmParams(p_tmGenerator, streamMetaData, inputContainerGamma); + updateDstParams = ToneMapGenerator_CacheDstTmParams(p_tmGenerator, dstMetaData, outputContainerGamma, outputContainerPrimaries); + + ret = ToneMapGenerator_GenerateLutData(p_tmGenerator, streamMetaData, dstMetaData, p_tmGenerator->tmAlgo, updateSrcParams, updateDstParams, tmParams); + +exit: + return ret; +} + +enum ToneMapColorPrimaries ToneMapGenerator_GetLutColorIn() +{ + return TMG_CP_BT2020; +} + +enum ToneMapColorPrimaries ToneMapGenerator_GetLutColorOut( + enum ToneMapTransferFunction outputContainerGamma, + enum ToneMapColorPrimaries outputContainerPrimaries) +{ + enum ToneMapColorPrimaries lutOutPrimaries; + + if (outputContainerGamma == TMG_TF_Linear) + lutOutPrimaries = TMG_CP_BT2020; + else + lutOutPrimaries = outputContainerPrimaries; + + return lutOutPrimaries; +} + +enum ToneMapTransferFunction ToneMapGenerator_GetShaperTf( + enum ToneMapTransferFunction inputContainerGamma) +{ + enum ToneMapTransferFunction shaperTf; + + switch (inputContainerGamma) { + case(TMG_TF_PQ): + case(TMG_TF_Linear): + shaperTf = TMG_TF_NormalizedPQ; + break; + default: + shaperTf = inputContainerGamma; + break; + } + + return shaperTf; +} + +enum ToneMapTransferFunction ToneMapGenerator_GetLutOutTf( + enum ToneMapTransferFunction outputContainerGamma, + enum ToneMapColorPrimaries outputContainerPrimaries) +{ + enum ToneMapTransferFunction lutOutTf; + + if (outputContainerGamma == TMG_TF_Linear || + outputContainerGamma == TMG_TF_PQ) + lutOutTf = TMG_TF_PQ; + else + lutOutTf = outputContainerGamma; + + return lutOutTf; +} + +struct ToneMapHdrMetaData ToneMapGenerator_GetColorContainerData(enum ToneMapColorPrimaries containerColor) { + + switch (containerColor) { + case (TMG_CP_BT601): + return BT601Container; + break; + case (TMG_CP_BT709): + return BT709Container; + break; + case (TMG_CP_BT2020): + return BT2020Container; + break; + case (TMG_CP_DCIP3): + return DCIP3Container; + break; + default: + return BT2020Container; + break; + } + +} + +unsigned short ToneMapGenerator_GetInputNormFactor(const struct ToneMapHdrMetaData* streamMetaData) { + + unsigned short normFactor; + + if (streamMetaData->maxMasteringLuminance < INPUT_NORMALIZATION_FACTOR) + normFactor = INPUT_NORMALIZATION_FACTOR; + else + normFactor = streamMetaData->maxMasteringLuminance; + + return normFactor; +} + +bool ToneMapGenerator_ContentEqualsContainer( + const struct ToneMapHdrMetaData* contentMetaData, + const struct ToneMapHdrMetaData* containerPrimaries) +{ + + if (abs(contentMetaData->bluePrimaryX - containerPrimaries->redPrimaryX) < 2 && + abs(contentMetaData->redPrimaryY - containerPrimaries->redPrimaryY) < 2 && + abs(contentMetaData->greenPrimaryX - containerPrimaries->greenPrimaryX) < 2 && + abs(contentMetaData->greenPrimaryY - containerPrimaries->greenPrimaryY) < 2 && + abs(contentMetaData->bluePrimaryX - containerPrimaries->bluePrimaryX) < 2 && + abs(contentMetaData->bluePrimaryY - containerPrimaries->bluePrimaryY) < 2) + return true; + else + return false; +} + +/* + Tone map generation consists of three steps: + 1. Container to content color space conversion. + 2. Tone mapping and gamut mapping operation. + 3. Content to output container color space conversion. + + These operations are cascaded one after the other. The enable3DLUTMerge will tell each module + whether or not to start from scratch, or use the previous blocks output as the nextbloack input. + + The terminology "Content Color Space / Container Color Space" is used to distinguish + between the color volume of the content and the color volume of the container. + For example, the content color volume might be DCIP3 and the Container might be BT2020. + CSC step changes the representation of the content to align with its color volume. +*/ +enum TMGReturnCode ToneMapGenerator_GenerateLutData( + struct ToneMapGenerator* p_tmGenerator, + const struct ToneMapHdrMetaData* streamMetaData, + const struct ToneMapHdrMetaData* dstMetaData, + enum ToneMapAlgorithm tmAlgorithm, + bool updateSrcParams, + bool updateDstParams, + struct ToneMappingParameters* tmParams) +{ + + bool enable3DLUTMerge = false; + struct ToneMapHdrMetaData lutContainer = ToneMapGenerator_GetColorContainerData(tmParams->lutColorIn); + + if (!ToneMapGenerator_ContentEqualsContainer(streamMetaData, &lutContainer)) { + lutContainer.maxMasteringLuminance = streamMetaData->maxMasteringLuminance; + lutContainer.minMasteringLuminance = streamMetaData->minMasteringLuminance; + + CSCGenerator_ApplyCSC( + &lutContainer, + tmParams->shaperTf, + streamMetaData, + tmParams->shaperTf, + tmParams, + enable3DLUTMerge); + + enable3DLUTMerge = true; + } + + AGMGenerator_ApplyToneMap( + &p_tmGenerator->agmGenerator, + streamMetaData, + dstMetaData, + tmAlgorithm, + tmParams, + updateSrcParams, + updateDstParams, + enable3DLUTMerge); + + enable3DLUTMerge = true; + + lutContainer = ToneMapGenerator_GetColorContainerData(tmParams->lutColorOut); + if (!ToneMapGenerator_ContentEqualsContainer(dstMetaData, &lutContainer)) { + lutContainer.maxMasteringLuminance = dstMetaData->maxMasteringLuminance; + lutContainer.minMasteringLuminance = dstMetaData->minMasteringLuminance; + + CSCGenerator_ApplyCSC( + dstMetaData, + tmParams->lutOutTf, + &lutContainer, + tmParams->lutOutTf, + tmParams, + enable3DLUTMerge + ); + } + + return TMG_RET_OK; +} + +bool ToneMapGenerator_CacheSrcTmParams( + struct ToneMapGenerator* p_tmGenerator, + const struct ToneMapHdrMetaData* streamMetaData, + enum ToneMapTransferFunction inputContainerGamma) +{ + bool updateSrcParams = memcmp(streamMetaData, &p_tmGenerator->cachedSrcTmParams.streamMetaData, sizeof(struct ToneMapHdrMetaData)) || + inputContainerGamma != p_tmGenerator->cachedSrcTmParams.inputContainerGamma; + + if (updateSrcParams) { + memcpy(&p_tmGenerator->cachedSrcTmParams.streamMetaData, streamMetaData, sizeof(struct ToneMapHdrMetaData)); + p_tmGenerator->cachedSrcTmParams.inputContainerGamma = inputContainerGamma; + } + + return updateSrcParams; +} + +bool ToneMapGenerator_CacheDstTmParams( + struct ToneMapGenerator* p_tmGenerator, + const struct ToneMapHdrMetaData* dstMetaData, + enum ToneMapTransferFunction outputContainerGamma, + enum ToneMapColorPrimaries outputContainerPrimaries) +{ + bool updateDstParams = memcmp(dstMetaData, &p_tmGenerator->cachedDstTmParams.dstMetaData, sizeof(struct ToneMapHdrMetaData)) || + outputContainerGamma != p_tmGenerator->cachedDstTmParams.outputContainerGamma || + outputContainerPrimaries != p_tmGenerator->cachedDstTmParams.outputContainerPrimaries; + + if (updateDstParams){ + memcpy(&p_tmGenerator->cachedDstTmParams.dstMetaData, dstMetaData, sizeof(struct ToneMapHdrMetaData)); + p_tmGenerator->cachedDstTmParams.outputContainerGamma = outputContainerGamma; + p_tmGenerator->cachedDstTmParams.outputContainerPrimaries = outputContainerPrimaries; + p_tmGenerator->cachedDstTmParams.outputContainerPrimaries = outputContainerPrimaries; + } + + return updateDstParams; +} + +enum TMGReturnCode ToneMapGenerator_SetInternalAllocators( + struct ToneMapGenerator* p_tmGenerator, + TMGAlloc allocFunc, + TMGFree freeFunc, + void* memCtx) +{ + enum TMGReturnCode ret = AGMGenerator_SetGMAllocator( + &p_tmGenerator->agmGenerator, + allocFunc, + freeFunc, + memCtx); + + p_tmGenerator->memAllocSet = true; + + return ret; +} \ No newline at end of file diff --git a/src/amd/gmlib/gm/cs_funcs.c b/src/amd/gmlib/gm/cs_funcs.c new file mode 100755 index 00000000000..4ae443a162a --- /dev/null +++ b/src/amd/gmlib/gm/cs_funcs.c @@ -0,0 +1,1418 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + *---------------------------------------------------------------------- + * File Name : cs_funcs.c + * Purpose : Color Space functions + * Author : Vladimir Lachine (vlachine@amd.com) + * Date : September 20, 2023 + * Version : 1.4 + *---------------------------------------------------------------------- + * + */ + +#ifndef GM_SIM +#pragma code_seg("PAGED3PC") +#pragma data_seg("PAGED3PD") +#pragma const_seg("PAGED3PR") +#endif + +#include "cs_funcs.h" + +static const MATFLOAT cs_vec_gamma[EGT_CUSTOM][4] = { + /* c1 c2 c3 c4 */ + {1.0000, 1.00, 0.00, 0.000}, /* linear */ + {1.0990, 0.45, 4.50, 0.018}, /* 709 (SD/HD) */ + {1.0000, 1.0 / 2.1992, 0.0, 0.0}, /* Adobe RGB 1998 */ + {1.0000, 1.0 / 2.6, 0.0, 0.0}, /* DCI-P3 (SMPTE-231-2) */ + {1.0000, 1.0 / 1.8, 0.0, 0.0}, /* Apple Trinitron */ + {1.0550, 1.0 / 2.4, 12.92, 0.0031308}, /* sRGB */ + {0.0000, 0.0, 0.0, 0.0}, /* PQ */ + {0.5000, 0.0, 0.0, 0.0}, /* HLG */ + {1.0000, 1.0 / 2.2, 0.0, 0.0}, /* Gamma 2.2 */ + {1.0000, 1.0 / 2.4, 0.0, 0.0} /* Gamma 2.4 */ +}; + +static const MATFLOAT cs_vec_color_space[ECST_CUSTOM][8] = { + /* Red (x, y), Green (x,y), Blue (x,y), White (x,y) */ + {0.6400, 0.3300, 0.3000, 0.6000, 0.1500, 0.0600, 0.312710, 0.329020}, /* ITU_R BT.709-5/sRGB (HDTV) */ + {0.6300, 0.3400, 0.3100, 0.5950, 0.1550, 0.0700, 0.312710, 0.329020}, /* SMPTE RP 145 (SDTV) */ + {0.6400, 0.3300, 0.2100, 0.7100, 0.1500, 0.0600, 0.312710, 0.329020}, /* Adobe RGB (1998) */ + {0.6800, 0.3200, 0.2650, 0.6900, 0.1500, 0.0600, 0.312710, 0.329020}, /* DCI P3 (SMPTE-231-2) P3D65 */ +/* {0.6800, 0.3200, 0.2650, 0.6900, 0.1500, 0.0600, 0.314000, 0.351000}, // DCI P3 (SMPTE-231-2) P3D60 */ +/* {0.6800, 0.3200, 0.2650, 0.6900, 0.1500, 0.0600, 0.314000, 0.351000}, // DCI P3 (SMPTE-231-2) P3DCI */ + {0.6250, 0.3400, 0.2800, 0.5950, 0.1550, 0.0700, 0.312710, 0.329020}, /* Apple */ + {0.6400, 0.3300, 0.2900, 0.6000, 0.1500, 0.0600, 0.312710, 0.329020}, /* EBU 3213/ITU (PAL/SEQAM) */ + {0.6700, 0.3300, 0.2100, 0.7100, 0.1400, 0.0800, 0.310100, 0.316200}, /* NTSC 1953 */ + {0.7350, 0.2650, 0.2740, 0.7170, 0.1660, 0.0090, 0.333300, 0.333300}, /* CIE RGB */ + {0.7080, 0.2920, 0.1700, 0.7970, 0.1310, 0.0460, 0.312710, 0.329020} /* BT.2020 */ +}; + +static MATFLOAT cs_vec_white_point[EWPT_NUM][3] = { + /* x, y, z */ + {1.000000, 1.000000, 1.000000}, /* NONE */ + {0.447570, 0.407440, 0.144990}, /* A - Tungsten or Incandescent, 2856K */ + {0.348400, 0.351600, 0.300000}, /* B - Direct Sunlight at Noon, 4874K (obsolete) */ + {0.310060, 0.316150, 0.373790}, /* C - North Sky Daylight, 6774K */ + {0.345670, 0.358500, 0.295830}, /* D50 - Daylight, used for COlor Rendering, 500K */ + {0.332420, 0.347430, 0.320150}, /* D55 - Daylight, used for Photograph, 5500K */ + {0.312710, 0.329020, 0.358270}, /* D65 - New version of North Sky Daylight, 6504K */ + {0.299020, 0.314850, 0.386130}, /* D75 - Daylight, 7500K */ + {0.284800, 0.293200, 0.422000}, /* 9300K - High eff. blue phosphor monitors, 9300K */ + {0.333330, 0.333330, 0.333340}, /* E - Uniform energy illuminant, 5400K */ + {0.372070, 0.375120, 0.252810}, /* F2 - Cool White Fluorescent (CWF), 4200K */ + {0.312850, 0.329180, 0.357970}, /* F7 - Broad-band Daylight Fluorescent, 6500K */ + {0.380540, 0.376910, 0.242540}, /* F11 - Narrow-band White Fluorescent, 4000K */ + {0.314000, 0.351000, 0.335000}, /* DCI-P3 */ + {0.277400, 0.283600, 0.438660} /* 11000K - blue sky, 11000K */ +}; + +static const MATFLOAT cs_vec_cct_xy[2 * CS_CCT_SIZE] = { + 0.652750, 0.344462, 0.638755, 0.356498, 0.625043, 0.367454, 0.611630, 0.377232, 0.598520, 0.385788, /* 1000 */ + 0.585716, 0.393121, 0.573228, 0.399264, 0.561066, 0.404274, 0.549243, 0.408225, 0.537776, 0.411202, + 0.526676, 0.413297, 0.515956, 0.414601, 0.505624, 0.415207, 0.495685, 0.415201, 0.486142, 0.414665, /* 2000 */ + 0.476993, 0.413675, 0.468234, 0.412299, 0.459857, 0.410598, 0.451855, 0.408629, 0.444216, 0.406440, + 0.436929, 0.404073, 0.429981, 0.401566, 0.423358, 0.398951, 0.417046, 0.396255, 0.411032, 0.393503, /* 3000 */ + 0.405302, 0.390715, 0.399841, 0.387907, 0.394638, 0.385095, 0.389677, 0.382291, 0.384948, 0.379505, + 0.380438, 0.376746, 0.376135, 0.374019, 0.372029, 0.371332, 0.368108, 0.368687, 0.364364, 0.366090, /* 4000 */ + 0.360786, 0.363543, 0.357366, 0.361048, 0.354095, 0.358605, 0.350965, 0.356217, 0.347969, 0.353884, + 0.345100, 0.351607, 0.342350, 0.349384, 0.339715, 0.347215, 0.337187, 0.345102, 0.334761, 0.343041, /* 5000 */ + 0.332433, 0.341034, 0.330196, 0.339078, 0.328047, 0.337173, 0.325981, 0.335317, 0.323994, 0.333511, + 0.322082, 0.331752, 0.320241, 0.330039, 0.318468, 0.328371, 0.316760, 0.326747, 0.315113, 0.325166, /* 6000 */ + 0.313524, 0.323626, 0.311992, 0.322127, 0.310513, 0.320667, 0.309085, 0.319245, 0.307705, 0.317860, + 0.306372, 0.316511, 0.305083, 0.315196, 0.303837, 0.313915, 0.302631, 0.312667, 0.301463, 0.311450, /* 7000 */ + 0.300333, 0.310264, 0.299238, 0.309108, 0.298178, 0.307981, 0.297149, 0.306881, 0.296153, 0.305809, + 0.295186, 0.304763, 0.294247, 0.303743, 0.293337, 0.302747, 0.292453, 0.301775, 0.291594, 0.300826, /* 8000 */ + 0.290760, 0.299899, 0.289949, 0.298995, 0.289161, 0.298111, 0.288395, 0.297248, 0.287649, 0.296405, + 0.286924, 0.295581, 0.286218, 0.294776, 0.285531, 0.293989, 0.284862, 0.293220, 0.284211, 0.292467, /* 9000 */ + 0.283576, 0.291732, 0.282957, 0.291012, 0.282354, 0.290308, 0.281765, 0.289619, 0.281192, 0.288945, + 0.280632, 0.288286, 0.280086, 0.287640, 0.279553, 0.287007, 0.279033, 0.286388, 0.278525, 0.285782, /* 10000 */ + 0.278029, 0.285188, 0.277544, 0.284606, 0.277071, 0.284036, 0.276608, 0.283477, 0.276156, 0.282930, + 0.275714, 0.282393, 0.275281, 0.281867, 0.274858, 0.281351, 0.274444, 0.280845, 0.274039, 0.280349, /* 11000 */ + 0.273643, 0.279862, 0.273255, 0.279384, 0.272875, 0.278915, 0.272503, 0.278455, 0.272139, 0.278004, + 0.271782, 0.277561, 0.271433, 0.277126, 0.271090, 0.276699, 0.270755, 0.276279, 0.270426, 0.275867, /* 12000 */ + 0.270103, 0.275462, 0.269787, 0.275065, 0.269476, 0.274674, 0.269172, 0.274290, 0.268874, 0.273913, + 0.268581, 0.273542, 0.268293, 0.273178, 0.268011, 0.272820, 0.267734, 0.272467, 0.267462, 0.272121, /* 13000 */ + 0.267195, 0.271780, 0.266933, 0.271445, 0.266676, 0.271116, 0.266423, 0.270791, 0.266174, 0.270472, + 0.265930, 0.270158, 0.265690, 0.269849, 0.265454, 0.269545, 0.265223, 0.269246, 0.264995, 0.268952, /* 14000 */ + 0.264771, 0.268662, 0.264550, 0.268376, 0.264334, 0.268095, 0.264121, 0.267818, 0.263911, 0.267545, + 0.263705, 0.267277, 0.263502, 0.267012, 0.263302, 0.266751, 0.263106, 0.266495, 0.262912, 0.266241, /* 15000 */ + 0.262722, 0.265992, 0.262534, 0.265746, 0.262350, 0.265504, 0.262168, 0.265265, 0.261989, 0.265030, + 0.261813, 0.264798, 0.261640, 0.264569, 0.261469, 0.264343, 0.261300, 0.264121, 0.261134, 0.263901, /* 16000 */ + 0.260971, 0.263685, 0.260809, 0.263471, 0.260651, 0.263261, 0.260494, 0.263053, 0.260340, 0.262848, + 0.260188, 0.262646, 0.260038, 0.262446, 0.259890, 0.262249, 0.259744, 0.262055, 0.259600, 0.261863, /* 17000 */ + 0.259458, 0.261674, 0.259318, 0.261487, 0.259180, 0.261302, 0.259044, 0.261120, 0.258910, 0.260940, + 0.258778, 0.260762, 0.258647, 0.260587, 0.258518, 0.260414, 0.258390, 0.260243, 0.258265, 0.260074, /* 18000 */ + 0.258141, 0.259907, 0.258018, 0.259742, 0.257897, 0.259579, 0.257778, 0.259418, 0.257660, 0.259259, + 0.257544, 0.259102, 0.257429, 0.258947, 0.257315, 0.258793, 0.257203, 0.258642, 0.257093, 0.258492, /* 19000 */ + 0.256983, 0.258344, 0.256875, 0.258197, 0.256768, 0.258052, 0.256663, 0.257909, 0.256559, 0.257768, + 0.256456, 0.257628 /* 20000 */ +}; + +const MATFLOAT *cs_get_gamma(enum cs_gamma_type gamma_type) +{ + return cs_vec_gamma[(gamma_type < EGT_CUSTOM) ? gamma_type : EGT_LINEAR]; +} + +const MATFLOAT *cs_get_color_space(enum cs_color_space_type color_space_type) +{ + return cs_vec_color_space[(color_space_type < ECST_CUSTOM) ? color_space_type : ECST_709]; +} + +const MATFLOAT *cs_get_white_point(enum cs_white_point_type white_point_type) +{ + return cs_vec_white_point[(white_point_type < EWPT_NUM) ? white_point_type : EWPT_NONE]; +} + +void cs_set_opts_def(struct s_cs_opts *ptr_cs_opts) +{ + int ni; + + ptr_cs_opts->color_space_type = ECST_709; + ptr_cs_opts->gamma_type = EGT_709; + ptr_cs_opts->mode = 0; + ptr_cs_opts->pq_norm = 0.0; + ptr_cs_opts->luminance_limits[0] = 0.0; + ptr_cs_opts->luminance_limits[1] = 400.0; + for (ni = 0; ni < 8; ni++) + ptr_cs_opts->rgbw_xy[ni] = cs_get_color_space(ECST_709)[ni]; + for (ni = 0; ni < 4; ni++) + ptr_cs_opts->gamma_parm[ni] = cs_get_gamma(EGT_LINEAR)[ni]; +} + +void cs_init(struct s_cs_opts *ptr_cs_opts, struct s_color_space *ptr_color_space) +{ + int ni; + + ptr_color_space->color_space_type = ptr_cs_opts->color_space_type; + ptr_color_space->gamma_type = ptr_cs_opts->gamma_type; + ptr_color_space->mode = ptr_cs_opts->mode; + ptr_color_space->pq_norm = (ptr_cs_opts->pq_norm > 0.0) ? + cs_gamma_pq(ptr_cs_opts->pq_norm / CS_MAX_LUMINANCE, EGD_LIN_2_NONLIN) : 0.0; + + ptr_color_space->luminance_limits[0] = (MATFLOAT)ptr_cs_opts->luminance_limits[0] / CS_MAX_LUMINANCE; + ptr_color_space->luminance_limits[1] = (MATFLOAT)ptr_cs_opts->luminance_limits[1] / CS_MAX_LUMINANCE; + ptr_color_space->luminance_limits[2] = ptr_color_space->luminance_limits[1] - + ptr_color_space->luminance_limits[0]; + + for (int ni = 0; ni < 8; ni++) + ptr_color_space->rgbw_xy[ni] = (ptr_cs_opts->color_space_type < ECST_CUSTOM) ? + cs_get_color_space(ptr_cs_opts->color_space_type)[ni] : ptr_cs_opts->rgbw_xy[ni]; + + for (ni = 0; ni < 4; ni++) + ptr_color_space->gamma_parm[ni] = (ptr_cs_opts->gamma_type < EGT_CUSTOM) ? + cs_get_gamma(ptr_cs_opts->gamma_type)[ni] : (MATFLOAT)ptr_cs_opts->gamma_parm[ni]; + + cs_init_private(ptr_color_space); +} + +void cs_init_private(struct s_color_space *ptr_color_space) +{ + static MATFLOAT mat_xyz2lms[3][3] = { + /* ITU-R BT.2390-4, p36. */ + { 0.3592, 0.6976, -0.0358}, + {-0.1922, 1.1004, 0.0755}, + { 0.0070, 0.0749, 0.8434} + }; + static MATFLOAT mat_lms2xyz[3][3] = { + /* ITU-R BT.2390-4, p36. */ + { 2.0701800566956132, -1.3264568761030211, 0.2066160068478551}, + { 0.3649882500326574, 0.6804673628522352, -0.0454217530758532}, + {-0.0495955422389321, -0.0494211611867575, 1.1879959417328037} + }; + static MATFLOAT mat_lms2itp[3][3] = { + /* ITU-R BT.2020, BT.2390-4, p.36 */ + { 0.5, 0.5, 0.0}, + { 6610.0 / 4096.0, -13613.0 / 4096.0, 7003.0 / 4096.0}, + {17933.0 / 4096.0, -17390.0 / 4096.0, -543.0 / 4096.0} + }; + static MATFLOAT mat_itp2lms[3][3] = { + /* ITU-R BT.2020, BT.2390-4, p.36 */ + {1.0, 0.00860903703793276, 0.11102962500302596}, + {1.0, -0.00860903703793276, -0.11102962500302596}, + {1.0, 0.56003133571067909, -0.32062717498731885} + }; + + int ni, nj; + + cs_luminance_to_luma_limits(ptr_color_space->luminance_limits, ptr_color_space->luma_limits); + mat_3x3_unity(ptr_color_space->mat_chad); + + /* set white point */ + ptr_color_space->white_xyz[0] = ptr_color_space->rgbw_xy[6]; + ptr_color_space->white_xyz[1] = ptr_color_space->rgbw_xy[7]; + ptr_color_space->white_xyz[2] = 1.0; + cs_xyy_to_xyz(ptr_color_space->white_xyz, ptr_color_space->white_xyz); + + /* generate RGB to XYZ and back matrixes */ + cs_genmat_rgb_to_xyz(ptr_color_space->rgbw_xy, ptr_color_space->mat_rgb2xyz); + if (ptr_color_space->mode & CS_CHAD_D65) { + /* Chromatic Adaptation from Color Space to D65 (BT.2020) */ + MATFLOAT mat_tmp[3][3]; + + cs_genmat_chad(&ptr_color_space->rgbw_xy[6], (MATFLOAT *)cs_get_white_point(EWPT_D65), + ptr_color_space->mat_chad); + mat_copy3x3(ptr_color_space->mat_rgb2xyz, mat_tmp); + mat_mul3x3(ptr_color_space->mat_chad, mat_tmp, ptr_color_space->mat_rgb2xyz); + } + mat_inv3x3(ptr_color_space->mat_rgb2xyz, ptr_color_space->mat_xyz2rgb); + + for (ni = 0; ni < 3; ni++) + for (nj = 0; nj < 3; nj++) { + ptr_color_space->mat_lms2itp[ni][nj] = mat_lms2itp[ni][nj]; + ptr_color_space->mat_itp2lms[ni][nj] = mat_itp2lms[ni][nj]; + } + + mat_mul3x3(mat_xyz2lms, ptr_color_space->mat_rgb2xyz, ptr_color_space->mat_rgb2lms); + mat_mul3x3(ptr_color_space->mat_xyz2rgb, mat_lms2xyz, ptr_color_space->mat_lms2rgb); + + ptr_color_space->cct = cs_xy_to_cct(&ptr_color_space->rgbw_xy[6]); + + ptr_color_space->hlg_system_gamma = cs_hlg_system_gamma(ptr_color_space->luminance_limits[1]); + ptr_color_space->hlg_beta = mat_sqrt(3.0 * mat_pow(ptr_color_space->luminance_limits[0] / + ptr_color_space->luminance_limits[1], 1.0 / ptr_color_space->hlg_system_gamma)); +} + +void cs_copy(struct s_color_space *ptr_color_space_src, struct s_color_space *ptr_color_space_dst) +{ + ptr_color_space_dst->color_space_type = ptr_color_space_src->color_space_type; + ptr_color_space_dst->gamma_type = ptr_color_space_src->gamma_type; + ptr_color_space_dst->mode = ptr_color_space_src->mode; + ptr_color_space_dst->pq_norm = ptr_color_space_src->pq_norm; + int ni, nj; + + for (ni = 0; ni < 3; ni++) + ptr_color_space_dst->luminance_limits[ni] = ptr_color_space_src->luminance_limits[ni]; + for (ni = 0; ni < 8; ni++) + ptr_color_space_dst->rgbw_xy[ni] = ptr_color_space_src->rgbw_xy[ni]; + for (ni = 0; ni < 4; ni++) + ptr_color_space_dst->gamma_parm[ni] = ptr_color_space_src->gamma_parm[ni]; + for (ni = 0; ni < 3; ni++) + for (nj = 0; nj < 3; nj++) { + ptr_color_space_dst->mat_rgb2xyz[ni][nj] = ptr_color_space_src->mat_rgb2xyz[ni][nj]; + ptr_color_space_dst->mat_xyz2rgb[ni][nj] = ptr_color_space_src->mat_xyz2rgb[ni][nj]; + ptr_color_space_dst->mat_chad[ni][nj] = ptr_color_space_src->mat_chad[ni][nj]; + ptr_color_space_dst->mat_rgb2lms[ni][nj] = ptr_color_space_src->mat_rgb2lms[ni][nj]; + ptr_color_space_dst->mat_lms2rgb[ni][nj] = ptr_color_space_src->mat_lms2rgb[ni][nj]; + ptr_color_space_dst->mat_lms2itp[ni][nj] = ptr_color_space_src->mat_lms2itp[ni][nj]; + ptr_color_space_dst->mat_itp2lms[ni][nj] = ptr_color_space_src->mat_itp2lms[ni][nj]; + } + for (ni = 0; ni < 3; ni++) + ptr_color_space_dst->white_xyz[ni] = ptr_color_space_src->white_xyz[ni]; + ptr_color_space_dst->cct = ptr_color_space_src->cct; +} + +void cs_luminance_to_luma_limits(MATFLOAT luminance_limits[2], MATFLOAT luma_limits[3]) +{ + luma_limits[0] = cs_gamma_pq(luminance_limits[0], EGD_LIN_2_NONLIN); + luma_limits[1] = cs_gamma_pq(luminance_limits[1], EGD_LIN_2_NONLIN); + luma_limits[2] = luma_limits[1] - luma_limits[0]; +} + +void cs_xyy_to_xyz(MATFLOAT xyy_inp[3], MATFLOAT xyz_out[3]) +{ /* output may be the same as input */ + MATFLOAT xyy_tmp[3]; + + mat_copy(xyy_inp, xyy_tmp, 3); + xyz_out[0] = (xyy_tmp[1] > 0.0) ? xyy_tmp[2] * xyy_tmp[0] / xyy_tmp[1] : 0.0; + xyz_out[1] = xyy_tmp[2]; + xyz_out[2] = (xyy_tmp[1] > 0.0) ? xyy_tmp[2] * (1.0 - xyy_tmp[0] - xyy_tmp[1]) / xyy_tmp[1] : 0.0; +} + +void cs_xyz_to_xyy(MATFLOAT xyz_inp[3], MATFLOAT xyy_out[3]) +{ /* output may be the same as input */ + MATFLOAT sum = xyz_inp[0] + xyz_inp[1] + xyz_inp[2]; + + xyy_out[2] = xyz_inp[1]; + xyy_out[1] = (sum > 0.0) ? xyz_inp[1] / sum : 0.0; + xyy_out[0] = (sum > 0.0) ? xyz_inp[0] / sum : 0.0; +} + +void cs_xyzc_to_xyz(MATFLOAT xyz_inp[3], MATFLOAT xyz_out[3]) +{ /* output may be the same as input */ + MATFLOAT sum = xyz_inp[0] + xyz_inp[1] + xyz_inp[2]; + + xyz_out[0] = (sum > 0.0) ? xyz_inp[0] / sum : 0.0; + xyz_out[1] = (sum > 0.0) ? xyz_inp[1] / sum : 0.0; + xyz_out[2] = 1.0 - xyz_out[0] - xyz_out[1]; +} + +void cs_xyz_to_xyzc(MATFLOAT xyz_inp[3], MATFLOAT xyz_out[3]) +{ /* output may be the same as input */ + MATFLOAT xyz_tmp[3]; + + mat_copy(xyz_inp, xyz_tmp, 3); + xyz_out[0] = (xyz_tmp[1] > 0.0) ? xyz_tmp[0] / xyz_tmp[1] : 0.0; + xyz_out[1] = 1.0; + xyz_out[2] = (xyz_tmp[1] > 0.0) ? xyz_tmp[2] / xyz_tmp[1] : 0.0; +} + +void cs_rgb_to_itp(struct s_color_space *ptr_color_space, MATFLOAT rgb_inp[3], MATFLOAT itp_out[3]) +{ /* output may be the same as input */ + MATFLOAT lms[3]; + int nc; + + mat_eval_3x3(ptr_color_space->mat_rgb2lms, rgb_inp, lms); + for (nc = 0; nc < 3; nc++) + lms[nc] = cs_gamma_pq(lms[nc], EGD_LIN_2_NONLIN); + mat_eval_3x3(ptr_color_space->mat_lms2itp, lms, itp_out); +} + +void cs_itp_to_rgb(struct s_color_space *ptr_color_space, MATFLOAT itp_inp[3], MATFLOAT rgb_out[3]) +{ /* output may be the same as input */ + MATFLOAT lms[3]; + int nc; + + mat_eval_3x3(ptr_color_space->mat_itp2lms, itp_inp, lms); + for (nc = 0; nc < 3; nc++) + lms[nc] = cs_gamma_pq(lms[nc], EGD_NONLIN_2_LIN); + mat_eval_3x3(ptr_color_space->mat_lms2rgb, lms, rgb_out); +} + +void cs_ich_to_itp(MATFLOAT ich_inp[3], MATFLOAT itp_out[3]) +{ /* output must not be the same as input */ + itp_out[0] = ich_inp[0]; + itp_out[1] = ich_inp[1] * mat_cos(ich_inp[2]); + itp_out[2] = ich_inp[1] * mat_sin(ich_inp[2]); +} + +void cs_itp_to_ich(MATFLOAT itp_inp[3], MATFLOAT ich_out[3]) +{ /* output must not be the same as input */ + ich_out[0] = itp_inp[0]; + ich_out[1] = mat_radius(itp_inp[2], itp_inp[1]); + ich_out[2] = mat_angle(itp_inp[2], itp_inp[1]); +} + +void cs_rgb_to_yuv(MATFLOAT rgb_inp[3], MATFLOAT yuv_out[3]) +{ /* RGB to YCbCr709 from Charles Poynton "Digital Video and HD: Algorithms and Interfaces", p.371 */ + static MATFLOAT vec_off_inp[3] = { 0.0, 0.0, 0.0 }; + static MATFLOAT vec_off_out[3] = { 0.0, 0.5, 0.5 }; + static MATFLOAT mat_rgb_to_yuv[3][3] = { + /* R G B */ + { 0.2126, 0.7152, 0.0722 }, + { -0.11457211, -0.38542789, 0.5 }, + { 0.5, -0.45415291, -0.04584709} + }; + + mat_eval_off_3x3_off(vec_off_inp, mat_rgb_to_yuv, vec_off_out, rgb_inp, yuv_out); + cs_clamp_rgb(yuv_out, 0.0, 1.0); +} + +void cs_yuv_to_rgb(MATFLOAT yuv_inp[3], MATFLOAT rgb_out[3]) +{ /* YCbCr709 to RGB from Charles Poynton "Digital Video and HD: Algorithms and Interfaces", p.371 */ + static MATFLOAT vec_off_inp[3] = { 0.0, -0.5, -0.5 }; + static MATFLOAT vec_off_out[3] = { 0.0, 0.0, 0.0 }; + static MATFLOAT mat_yuv_to_rgb[3][3] = { + /* Y Cb Cr */ + { 1.0, 0.0, 1.5748 }, + { 1.0, -0.187324273, -0.468124273 }, + { 1.0, 1.8556, 0.0 } + }; + + mat_eval_off_3x3_off(vec_off_inp, mat_yuv_to_rgb, vec_off_out, yuv_inp, rgb_out); + cs_clamp_rgb(rgb_out, 0.0, 1.0); +} + +void cs_nlin_to_lin_rgb(struct s_color_space *ptr_color_space, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3]) +{ + if (ptr_color_space->gamma_type == EGT_HLG) + cs_hlg_eotf(rgb_inp, rgb_out, ptr_color_space->luminance_limits, + ptr_color_space->hlg_system_gamma, ptr_color_space->hlg_beta); + else + for (int nc = 0; nc < 3; nc++) + rgb_out[nc] = cs_nlin_to_lin(ptr_color_space, rgb_inp[nc]); +} + +MATFLOAT cs_nlin_to_lin(struct s_color_space *ptr_color_space, MATFLOAT val_inp) +{ + MATFLOAT val_out; + + if (ptr_color_space->gamma_type == EGT_PQ) { + /* HDR PQ encoded signal is normilized to a range [0.0,1.0], + where 0.0 mapped to 0.0 and 1.0 mapped to PQ-1(pq_norm) */ + if (ptr_color_space->pq_norm > 0.0) + val_out = mat_denorm(val_inp, 0.0, ptr_color_space->pq_norm); + else + val_out = val_inp; + val_out = mat_clamp(val_out, 0.0, 1.0); + val_out = cs_gamma(val_out, ptr_color_space->gamma_parm, EGD_NONLIN_2_LIN); + } + else { + /* SDR encoded signal is normilized to a range [0.0,1.0], + where 0.0 mapped to Black (0,0,0) and 1.0 mapped to White (1,1,1) */ + val_out = cs_gamma(val_inp, ptr_color_space->gamma_parm, EGD_NONLIN_2_LIN); + val_out = mat_denorm(val_out, ptr_color_space->luminance_limits[0], ptr_color_space->luminance_limits[2]); + val_out = mat_clamp(val_out, 0.0, 1.0); + } + + return val_out; +} + +void cs_lin_to_nlin_rgb(struct s_color_space *ptr_color_space, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3]) +{ + if (ptr_color_space->gamma_type == EGT_HLG) + cs_hlg_oetf(rgb_inp, rgb_out, ptr_color_space->luminance_limits[1], ptr_color_space->hlg_system_gamma); + else + for (int nc = 0; nc < 3; nc++) + rgb_out[nc] = cs_lin_to_nlin(ptr_color_space, rgb_inp[nc]); +} + +MATFLOAT cs_lin_to_nlin(struct s_color_space *ptr_color_space, MATFLOAT val_inp) +{ + MATFLOAT val_out; + + if (ptr_color_space->gamma_type == EGT_PQ) { + /* HDR PQ encoded signal is normilized to a range [0.0,1.0], + where 0.0 mapped to 0.0 and 1.0 mapped to PQ-1(pq_norm) */ + val_out = cs_gamma(val_inp, ptr_color_space->gamma_parm, EGD_LIN_2_NONLIN); + if (ptr_color_space->pq_norm > 0.0) + val_out = mat_norm(val_out, 0.0, ptr_color_space->pq_norm); + val_out = mat_clamp(val_out, 0.0, 1.0); + } + else { + /* SDR encoded signal is normilized to a range [0.0,1.0], + where 0.0 mapped to Black (0,0,0) and 1.0 mapped to White (1,1,1) */ + val_out = mat_norm(val_inp, ptr_color_space->luminance_limits[0], ptr_color_space->luminance_limits[2]); + val_out = mat_clamp(val_out, 0.0, 1.0); + val_out = cs_gamma(val_out, ptr_color_space->gamma_parm, EGD_LIN_2_NONLIN); + } + + return val_out; +} + +int cs_genmat_rgb_to_xyz(MATFLOAT rgbw[8], MATFLOAT mat_rgb2xyz[3][3]) +{ + MATFLOAT white_xyz[3] = { rgbw[6], rgbw[7], 1.0 }; + MATFLOAT mat[3][3], mat_inv[3][3], white_k[3]; + int ni, nc; + int rc; + + for (ni = 0; ni < 3; ni++) { /* X, Y, Z */ + mat[0][ni] = rgbw[2 * ni + 0] / rgbw[2 * ni + 1]; + mat[1][ni] = 1.0; + mat[2][ni] = (1.0 - rgbw[2 * ni + 0] - rgbw[2 * ni + 1]) / rgbw[2 * ni + 1]; + } + rc = mat_inv3x3(mat, mat_inv); + cs_xyy_to_xyz(white_xyz, white_xyz); + mat_eval_3x3(mat_inv, white_xyz, white_k); + for (ni = 0; ni < 3; ni++) + for (nc = 0; nc < 3; nc++) + mat_rgb2xyz[nc][ni] = white_k[ni] * mat[nc][ni]; + + return rc; +} + +int cs_genmat_xyz_to_rgb(MATFLOAT rgbw_xy[8], MATFLOAT mat_xyz2rgb[3][3]) +{ + MATFLOAT mat_rgb2xyz[3][3]; + + cs_genmat_rgb_to_xyz(rgbw_xy, mat_rgb2xyz); + return mat_inv3x3(mat_rgb2xyz, mat_xyz2rgb); +} + +int cs_genmat_rgb_to_rgb(MATFLOAT rgbw_xy_src[8], MATFLOAT rgbw_xy_dst[8], MATFLOAT mat_rgb2rgb[3][3], int en_chad) +{ + MATFLOAT mat_rgb2xyz[3][3], mat_xyz2rgb[3][3], mat_chad[3][3]; + int rc; + + cs_genmat_rgb_to_xyz(rgbw_xy_src, mat_rgb2xyz); + rc = cs_genmat_xyz_to_rgb(rgbw_xy_dst, mat_xyz2rgb); + + if (en_chad) { /* Chromatic Adaptation */ + MATFLOAT mat_tmp[3][3]; + + cs_genmat_chad(&rgbw_xy_src[6], &rgbw_xy_dst[6], mat_chad); + mat_copy3x3(mat_rgb2xyz, mat_tmp); + mat_mul3x3(mat_chad, mat_tmp, mat_rgb2xyz); + } + + mat_mul3x3(mat_xyz2rgb, mat_rgb2xyz, mat_rgb2rgb); + + return rc; +} + +int cs_genmat_chad(MATFLOAT white_xy_src[2], MATFLOAT white_xy_dst[2], MATFLOAT mat_chad[3][3]) +{ + static MATFLOAT mat_bradford[3][3] = { + /* Bradford matrix */ + { 0.8951000, 0.2664000, -0.1614000}, + {-0.7502000, 1.7135000, 0.0367000}, + { 0.0389000, -0.0685000, 1.0296000} + }; + + static MATFLOAT mat_bradford_inv[3][3] = { + /* Bradford inverse matrix */ + { 0.9869929, -0.1470543, 0.1599627}, + { 0.4323053, 0.5183603, 0.0492912}, + {-0.0085287, 0.0400428, 0.9684867} + }; + +#if 0 /* Not in used */ + static MATFLOAT mat_von_kries[3][3] = { + /* Von Kries matrix */ + { 0.4002400, 0.7076000, -0.0808100}, + {-0.2263000, 1.1653200, 0.0457000}, + { 0.0000000, 0.0000000, 0.9182200} + }; + + static MATFLOAT mat_von_kries_inv[3][3] = { + /* Von Kries inverse matrix */ + {1.8599364, -1.1293816, 0.2198974}, + {0.3611914, 0.6388125, -0.0000064}, + {0.0000000, 0.0000000, 1.0890636} + }; +#endif + + MATFLOAT vec_white_xyz_src[3] = { white_xy_src[0], white_xy_src[1], 1.0 }; + MATFLOAT vec_white_xyz_dst[3] = { white_xy_dst[0], white_xy_dst[1], 1.0 }; + MATFLOAT vec_lms[3][3]; + MATFLOAT rgb_src[3], rgb_dst[3]; + MATFLOAT mat_tmp[3][3]; + int nc; + + /* convert to XYZ */ + cs_xyy_to_xyz(vec_white_xyz_src, vec_white_xyz_src); + cs_xyy_to_xyz(vec_white_xyz_dst, vec_white_xyz_dst); + /* generate scales */ + mat_3x3_unity(vec_lms); + mat_eval_3x3(mat_bradford, vec_white_xyz_src, rgb_src); + mat_eval_3x3(mat_bradford, vec_white_xyz_dst, rgb_dst); + for (nc = 0; nc < 3; nc++) + vec_lms[nc][nc] = rgb_dst[nc] / rgb_src[nc]; + /* normalize */ + mat_mul3x3(vec_lms, mat_bradford, mat_tmp); + mat_mul3x3(mat_bradford_inv, mat_tmp, mat_chad); + + return 0; +} + +MATFLOAT cs_gamma(MATFLOAT val, MATFLOAT gamma_parm[4], enum cs_gamma_dir gamma_dir) +{ + MATFLOAT val_out; + + if (gamma_parm[0] == 0.0) + val_out = cs_gamma_pq(val, gamma_dir); + else if (gamma_parm[0] == 0.5) + val_out = cs_gamma_hlg(val, gamma_dir); + else { + MATFLOAT c1 = gamma_parm[0]; + MATFLOAT c2 = gamma_parm[1]; + MATFLOAT c3 = gamma_parm[2]; + MATFLOAT c4 = gamma_parm[3]; + + if (gamma_dir == EGD_LIN_2_NONLIN) + val_out = ((val < c4) ? val * c3 : c1 * mat_pow(val, c2) + 1.0 - c1); + else + val_out = (val < c4 * c3) ? val / c3 : mat_pow((val + c1 - 1.0) / c1, 1.0 / c2); + } + + return val_out; +} + +/* R_REC-BT.2100-2-2 Table 4 */ +/* input must be in arange [0,1] normilized to [0,10000]cd/m^2 in linear or non-linear space */ +/* output must be in a range [0,1] normilized to [0,10000]cd/m^2 in linear or non-linear space */ +MATFLOAT cs_gamma_pq(MATFLOAT val, enum cs_gamma_dir gamma_dir) +{ + static const MATFLOAT s_m1 = 0.1593017578125; + static const MATFLOAT s_m2 = 78.84375; + static const MATFLOAT s_c1 = 0.8359375; + static const MATFLOAT s_c2 = 18.8515625; + static const MATFLOAT s_c3 = 18.6875; + + MATFLOAT sign = (val < 0.0) ? -1.0 : 1.0; + MATFLOAT val_out = MAT_ABS(val); + MATFLOAT t1, t2, t; + + if (gamma_dir == EGD_LIN_2_NONLIN) { /* linear to PQ */ + MATFLOAT x = mat_pow(val_out, s_m1); + + t1 = (s_c2 * x) + s_c1; + t2 = 1.0 + (s_c3 * x); + t = t1 / t2; + val_out = mat_pow(t, s_m2); + } else { /* PQ to linear */ + MATFLOAT np = mat_pow(val_out, 1.0 / s_m2); + + t1 = np - s_c1; + t1 = MAT_MAX(t1, 0.0); + t2 = s_c2 - (s_c3 * np); + t = t1 / t2; + val_out = mat_pow(t, 1.0 / s_m1); + } + val_out *= sign; + + return val_out; +} + +/* EOTF 1886 */ +/* input must be in arange [0,1] normilized to [Lb,Lw]cd/m^2 in non-linear space */ +/* output must be in arange [0,1] normilized to [0,10000]cd/m^2 in linear space */ +/* lb in a range [0,1] normalized to [0,10000]cd/m^2 in linear space */ +/* lw in a range [0,1] normalized to [0,10000]cd/m^2 in linear space */ +MATFLOAT cs_gamma_1886(MATFLOAT val, MATFLOAT lb, MATFLOAT lw, MATFLOAT gamma) +{ + MATFLOAT lb_nl = mat_pow(lb, 1.0 / gamma); + MATFLOAT lw_nl = mat_pow(lw, 1.0 / gamma); + MATFLOAT a = mat_pow(lw_nl - lb_nl, gamma); + MATFLOAT b = lb_nl / (lw_nl - lb_nl); + + return a * mat_pow(MAT_MAX(val + b, 0.0), gamma); +} + +/* rgb_inp[] in a range [0,1] normalized to [0,10000]cd/m^2 in linear space */ +/* rgb_out[] in a range [0,1] normalized to [0,10000]cd/m^2 in linear space */ +void cs_pq_ootf(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3]) +{ + int nc; + + for (nc = 0; nc < 3; nc++) { + MATFLOAT e = rgb_inp[nc] * 59.5208; + MATFLOAT e709 = (e <= 0.018) ? 4.5 * e : 1.099 * mat_pow(e, 0.45) - 0.099; /* OETF 709 */ + MATFLOAT e1886 = mat_pow(e709, 2.4) / 100.0; /* EOTF 1886 */ + + rgb_out[nc] = MAT_CLAMP(e1886, 0.0, 1.0); + } +} + +/* BT.2390 display referred */ +/* rgb_inp[] in a range [0,1] normalized to [0,100]cd/m^2 in non-linear space */ +/* rgb_out[] in a range [0,1] normalized to [0,10000]cd/m^2 in non-linear space */ +void cs_sdr_to_pq(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT en_709_2020) +{ + MATFLOAT sdr_lb = 0.0; + MATFLOAT sdr_lw = 100.0 / CS_MAX_LUMINANCE; + MATFLOAT sdr_gamma = 2.4; + MATFLOAT scale = 2.0; + MATFLOAT rgb_lin[3]; + int nc; + + for (nc = 0; nc < 3; nc++) + rgb_lin[nc] = cs_gamma_1886(rgb_inp[nc], sdr_lb, sdr_lw, sdr_gamma); /* [0,10000]cd/m^2 */ + + if (en_709_2020) { + MATFLOAT rgb_tmp[3]; + + mat_copy(rgb_lin, rgb_tmp, 3); + mat_eval_3x3(cs_mat_709_2020, rgb_tmp, rgb_lin); /* [0,10000]cd/m^2 */ + } + + for (nc = 0; nc < 3; nc++) + rgb_lin[nc] = rgb_lin[nc] * scale; /* scale to 200cd/m^2 */ + + cs_gamma_rgb(rgb_lin, rgb_out, (MATFLOAT *)cs_get_gamma(EGT_PQ), EGD_LIN_2_NONLIN); /* [0,10000]cd/m^2 */ +} + +void cs_gamma_rgb(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT gamma_parm[4], enum cs_gamma_dir gamma_dir) +{ /* output may be the same as input */ + int nc; + + for (nc = 0; nc < 3; nc++) + rgb_out[nc] = cs_gamma(rgb_inp[nc], gamma_parm, gamma_dir); +} + +int cs_min_rgb(MATFLOAT rgb[3], MATFLOAT val_min) +{ + int is_clip = 0; + int nc; + + for (nc = 0; nc < 3; nc++) { + MATFLOAT value = rgb[nc]; + + rgb[nc] = MAT_MAX(value, val_min); + is_clip |= (rgb[nc] == value) ? 0 : 1; + } + + return is_clip; +} + +int cs_max_rgb(MATFLOAT rgb[3], MATFLOAT val_max) +{ + int is_clip = 0; + int nc; + + for (nc = 0; nc < 3; nc++) { + MATFLOAT value = rgb[nc]; + + rgb[nc] = MAT_MIN(value, val_max); + is_clip |= (rgb[nc] == value) ? 0 : 1; + } + + return is_clip; +} + +int cs_is_valid_ic(struct s_color_space *ptr_color_space, MATFLOAT pnt_ic[2], MATFLOAT hue_sin_cos[2]) +{ + MATFLOAT pnt_itp[3]; + + pnt_itp[0] = pnt_ic[0]; + pnt_itp[1] = pnt_ic[1] * hue_sin_cos[1]; + pnt_itp[2] = pnt_ic[1] * hue_sin_cos[0]; + + return cs_is_valid_itp(ptr_color_space, pnt_itp); +} + +int cs_is_valid_itp(struct s_color_space *ptr_color_space, MATFLOAT itp[3]) +{ + MATFLOAT rgb[3]; + + cs_itp_to_rgb(ptr_color_space, itp, rgb); + + return cs_is_valid_rgb(rgb, ptr_color_space->luminance_limits[0], ptr_color_space->luminance_limits[1]); +} + +int cs_is_valid_rgb(MATFLOAT rgb[3], MATFLOAT val_min, MATFLOAT val_max) +{ + return mat_is_valid_vec(rgb, 3, val_min, val_max); +} + +int cs_clip_rgb(MATFLOAT rgb[3], MATFLOAT val_min, MATFLOAT val_max) +{ + int is_clip = cs_is_valid_rgb(rgb, val_min, val_max); + + if (is_clip == 0) + cs_clamp_rgb(rgb, val_min, val_max); + + return is_clip ? 0 : 1; +} + +void cs_clamp_rgb(MATFLOAT rgb[3], MATFLOAT val_min, MATFLOAT val_max) +{ + int nc; + + for (nc = 0; nc < 3; nc++) + rgb[nc] = mat_clamp(rgb[nc], val_min, val_max); +} + +void cs_norm_rgb(MATFLOAT rgb[3], MATFLOAT val_min, MATFLOAT val_rng) +{ + int nc; + + for (nc = 0; nc < 3; nc++) + rgb[nc] = mat_norm(rgb[nc], val_min, val_rng); +} + +void cs_denorm_rgb(MATFLOAT rgb[3], MATFLOAT val_min, MATFLOAT val_rng) +{ + int nc; + + for (nc = 0; nc < 3; nc++) + rgb[nc] = mat_denorm(rgb[nc], val_min, val_rng); +} + +void cs_int2flt_rgb(int rgb_inp[3], MATFLOAT rgb_out[3], int val_max) +{ + int nc; + + for (nc = 0; nc < 3; nc++) + rgb_out[nc] = mat_int2flt(rgb_inp[nc], val_max); +} + +void cs_flt2int_rgb(MATFLOAT rgb_inp[3], int rgb_out[3], int val_max) +{ + int nc; + + for (nc = 0; nc < 3; nc++) + rgb_out[nc] = mat_flt2int(rgb_inp[nc], val_max); +} + + +void cs_short2flt_rgb(unsigned short rgb_inp[3], MATFLOAT rgb_out[3], int val_max) +{ + int nc; + + for (nc = 0; nc < 3; nc++) + rgb_out[nc] = mat_int2flt(rgb_inp[nc], val_max); +} + +void cs_flt2short_rgb(MATFLOAT rgb_inp[3], unsigned short rgb_out[3], int val_max) +{ + int nc; + + for (nc = 0; nc < 3; nc++) + rgb_out[nc] = mat_flt2int(rgb_inp[nc], val_max); +} + +void cs_genprim_itp(struct s_color_space *ptr_color_space, int num_prim, + MATFLOAT *ptr_prim_rgb, MATFLOAT *ptr_prim_ich) +{ + int nk, nc; + + for (nk = 0; nk < num_prim; nk++) { + MATFLOAT rgb[3], vec_itp[3], vec_ich[3]; + + mat_copy(&ptr_prim_rgb[3 * nk], rgb, 3); + cs_denorm_rgb(rgb, ptr_color_space->luminance_limits[0], ptr_color_space->luminance_limits[2]); + cs_rgb_to_itp(ptr_color_space, rgb, vec_itp); + cs_itp_to_ich(vec_itp, vec_ich); + for (nc = 0; nc < 3; nc++) + ptr_prim_ich[num_prim * nc + nk] = vec_ich[nc]; + } +} + +MATFLOAT cs_soft_clip(MATFLOAT val, MATFLOAT limits_src[3], MATFLOAT limits_dst[3]) +{ /* Based on BT.2390 - Src must be wider then Dst */ + const MATFLOAT epsilon = 0.000001; + MATFLOAT val_min = (limits_dst[0] - limits_src[0]) / (limits_src[1] - limits_src[0]); + MATFLOAT val_max = (limits_dst[1] - limits_src[0]) / (limits_src[1] - limits_src[0]); + MATFLOAT ks = (1.5 * val_max) - 0.5; + MATFLOAT e0, e1, e2, e3, e4; + + /* Input value must be normilized to [0.0,1.0] */ + e0 = val; + e1 = mat_norm(e0, limits_src[0], limits_src[2]); + e1 = mat_clamp(e1, 0.0, 1.0); + + if (e1 < ks) + e2 = e1; + else { + MATFLOAT t = ((1.0 - ks) <= epsilon) ? (e1 - ks) : ((e1 - ks) / (1.0 - ks)); + MATFLOAT t2 = t * t; + MATFLOAT t3 = t2 * t; + + e2 = (((2.0 * t3) - (3.0 * t2) + 1.0) * ks) + ((t3 - (2.0 * t2) + t) * (1.0 - ks)) + (((-2.0 * t3) + + (3.0 * t2)) * val_max); + } + e3 = e2 + val_min * mat_pow((1.0 - e2), 4.0); + + /* Output value must be denormilized back to [limits_src[0], limits_src[1]] */ + e4 = mat_denorm(e3, limits_src[0], limits_src[2]); + e4 = mat_clamp(e4, limits_src[0], limits_src[1]); + + return e4; +} + +MATFLOAT cs_gamma_to_gamma(MATFLOAT val, enum cs_gamma_type gamma_type_src, enum cs_gamma_type gamma_type_dst, + MATFLOAT luminance_limits_dst[3], MATFLOAT luma_limits_src[3], MATFLOAT luma_limits_dst[3], + MATFLOAT(*func_pq_to_pq)(MATFLOAT), int en_norm, int en_soft_clip) +{ + MATFLOAT val_out = cs_gamma(val, (MATFLOAT *)cs_get_gamma(gamma_type_src), EGD_NONLIN_2_LIN); /* degamma */ + + if (en_norm) + val_out = mat_denorm(val_out, luminance_limits_dst[0], luminance_limits_dst[2]);/* denorm */ + val_out = mat_clamp(val_out, luminance_limits_dst[0], luminance_limits_dst[1]); /* clamp */ + val_out = cs_gamma_pq(val_out, EGD_LIN_2_NONLIN); /* LIN2PQ */ + val_out = func_pq_to_pq(val_out); /* PQ2PQ transform */ + if (en_soft_clip) + val_out = cs_soft_clip(val_out, luma_limits_src, luma_limits_dst); /* SoftClip */ + val_out = cs_gamma_pq(val_out, EGD_NONLIN_2_LIN); /* PQ2LIN */ + if (en_norm) + val_out = mat_norm(val_out, luminance_limits_dst[0], luminance_limits_dst[2]); /* norm */ + val_out = mat_clamp(val_out, 0.0, 1.0); /* clamp */ + val_out = cs_gamma(val_out, (MATFLOAT *)cs_get_gamma(gamma_type_dst), EGD_LIN_2_NONLIN); /* regamma */ + + return val_out; +} + +int cs_xy_to_cct(MATFLOAT xy[2]) +{ /* McCamy�s polynomial formula for CCT */ + MATFLOAT val = (xy[0] - 0.3320) / (xy[1] - 0.1858); + MATFLOAT val2 = val * val; + MATFLOAT val3 = val * val2; + MATFLOAT cct = -449.0 * val3 + 3525.0 * val2 - 6823.0 * val + 5520.33; + + return MAT_ROUND(cct); +} + +void cs_cct_to_xy(int cct, MATFLOAT xy[2]) +{ + int val = MAT_CLAMP(cct, CS_CCT_MIN, CS_CCT_MAX) - CS_CCT_MIN; + int vec_ind[2]; + MATFLOAT phase; + MATFLOAT vec_x[2], vec_y[2]; + + vec_ind[0] = val / CS_CCT_INC; + vec_ind[1] = MAT_MIN(vec_ind[0] + 1, CS_CCT_SIZE - 1); + phase = (MATFLOAT)(val - vec_ind[0] * CS_CCT_INC) / (MATFLOAT)CS_CCT_INC; + + vec_x[0] = cs_vec_cct_xy[2 * vec_ind[0] + 0]; + vec_x[1] = cs_vec_cct_xy[2 * vec_ind[1] + 0]; + vec_y[0] = cs_vec_cct_xy[2 * vec_ind[0] + 1]; + vec_y[1] = cs_vec_cct_xy[2 * vec_ind[1] + 1]; + + xy[0] = mat_linear(vec_x, phase); + xy[1] = mat_linear(vec_y, phase); +} + +void cs_csc(struct s_color_space *ptr_cs_src, struct s_color_space *ptr_cs_dst, + MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], int en_chad) +{ + MATFLOAT rgb_tmp[3]; + MATFLOAT mat_remap[3][3]; + + cs_genmat_rgb_to_rgb(ptr_cs_src->rgbw_xy, ptr_cs_dst->rgbw_xy, mat_remap, en_chad); + + cs_nlin_to_lin_rgb(ptr_cs_src, rgb_inp, rgb_tmp); + mat_eval_3x3(mat_remap, rgb_tmp, rgb_out); + cs_clamp_rgb(rgb_out, 0.0, 1.0); + cs_lin_to_nlin_rgb(ptr_cs_dst, rgb_out, rgb_out); +} + +int cs_is_space(struct s_color_space *ptr_color_space, + enum cs_color_space_type color_space_type, enum cs_gamma_type gamma_type) +{ + return ((ptr_color_space->color_space_type == color_space_type) && + (ptr_color_space->gamma_type == gamma_type)) ? 1 : 0; +} + +void cs_init_type(MATFLOAT luminance_limits[2], + enum cs_color_space_type color_space_type, enum cs_gamma_type gamma_type, + struct s_color_space *ptr_color_space) +{ + struct s_cs_opts cs_opts = {0}; + + cs_opts.color_space_type = color_space_type; + cs_opts.gamma_type = gamma_type; + cs_opts.mode = 0; + cs_opts.pq_norm = 0.0; + cs_opts.luminance_limits[0] = luminance_limits[0]; + cs_opts.luminance_limits[1] = luminance_limits[1]; + + cs_init(&cs_opts, ptr_color_space); +} + +void cs_init_BT709(MATFLOAT luminance_limits[2], struct s_color_space *ptr_color_space) +{ + cs_init_type(luminance_limits, ECST_709, EGT_709, ptr_color_space); +} + +void cs_init_BT2100(MATFLOAT luminance_limits[2], struct s_color_space *ptr_color_space) +{ + cs_init_type(luminance_limits, ECST_BT2020, EGT_PQ, ptr_color_space); +} + +void cs_rgb_to_ycbcr2020(MATFLOAT rgb_inp[3], MATFLOAT ycbcr_out[3]) +{ /* ITU-R BT.2020 */ + ycbcr_out[0] = 0.2627 * rgb_inp[0] + 0.678 * rgb_inp[1] + 0.0593 * rgb_inp[2]; + ycbcr_out[1] = (rgb_inp[2] - ycbcr_out[0]) / 1.8814; + ycbcr_out[2] = (rgb_inp[0] - ycbcr_out[0]) / 1.4746; +} + +/* gamma = 1.2 - for reference display (1000 cd/m^2) and reference ambient light (5 cd/m^2) */ +/* luminance_peak in a range [0,1] normilized to [0,10000]cd/m^2 in linear space */ +MATFLOAT cs_ootf_gamma_peak(MATFLOAT gamma, MATFLOAT luminance_peak) +{ /* gamma correction for peak luminance of the display */ + return gamma * mat_pow(1.111, mat_log2(luminance_peak / 0.1)); /* normzlized to 1000 nits */ +} + +/* gamma = 1.2 - for reference display (1000 cd/m^2) and reference ambient light (5 cd/m^2) */ +/* luminance_ambient in a range [0,1] normalized to [0,10000]cd/m^2 in linear space - ambient light in linear space */ +MATFLOAT cs_ootf_gamma_amb(MATFLOAT gamma, MATFLOAT luminance_ambient) +{ /* gamma correction for ambient light */ + return gamma * mat_pow(0.98, mat_log2(luminance_ambient / 0.0005)); /* normalized to 5 nits */ +} + +MATFLOAT cs_gamma_adjust_sdr(MATFLOAT gamma, MATFLOAT luminance_peak) +{ + /* gamma correction for peak luminance of the display */ + if (luminance_peak <= 0.1) + gamma = gamma * mat_pow(1.111, mat_log2(luminance_peak / 0.01)); + else if ((luminance_peak > 0.1) && (luminance_peak < 0.2)) + gamma = gamma + ((luminance_peak > 0.1) ? 0.42 * mat_log10(luminance_peak / 0.1) : 0.0); + else + gamma = gamma * mat_pow(1.111, mat_log2(luminance_peak / 0.1)); + + return gamma; +} + +void cs_chad_gains(MATFLOAT rgbw_xy[8], MATFLOAT w_xy[2], MATFLOAT rgb_gain[3]) +{ + MATFLOAT rgb_white[3] = { 1.0, 1.0, 1.0 }; + MATFLOAT max_gain = 0.0; + MATFLOAT mat_rgb2xyz[3][3], mat_xyz2rgb[3][3]; + MATFLOAT mat_chad[3][3]; + MATFLOAT xyz_inp[3], xyz_out[3]; + int nc; + + /* generate RGB to XYZ and back transformation matrixes */ + cs_genmat_rgb_to_xyz(rgbw_xy, mat_rgb2xyz); + mat_inv3x3(mat_rgb2xyz, mat_xyz2rgb); + /* generate matrix of white point conversion from display to target */ + cs_genmat_chad(&rgbw_xy[6], w_xy, mat_chad); + /* map white to gains */ + mat_eval_3x3(mat_rgb2xyz, rgb_white, xyz_inp); + mat_eval_3x3(mat_chad, xyz_inp, xyz_out); + mat_eval_3x3(mat_xyz2rgb, xyz_out, rgb_gain); + /* normalize gains to max */ + for (nc = 0; nc < 3; nc++) + max_gain = MAT_MAX(max_gain, rgb_gain[nc]); + for (nc = 0; nc < 3; nc++) + rgb_gain[nc] = rgb_gain[nc] / max_gain; +} + +void cs_genmat_cct(struct s_color_space *ptr_cs, int cct_shift, int norm, MATFLOAT mat_cct[3][3]) +{ + MATFLOAT xy[2]; + MATFLOAT mat_chad[3][3]; + MATFLOAT mat_tmp[3][3]; + + cs_cct_to_xy(ptr_cs->cct + cct_shift, xy); + cs_genmat_chad(&ptr_cs->rgbw_xy[6], xy, mat_chad); + mat_mul3x3(mat_chad, ptr_cs->mat_rgb2xyz, mat_tmp); + mat_mul3x3(ptr_cs->mat_xyz2rgb, mat_tmp, mat_cct); + + if (norm) { + MATFLOAT rgb_white[3] = { 1.0, 1.0, 1.0 }; + MATFLOAT max_gain = 0.0; + MATFLOAT rgb_gain[3]; + int nc, ni; + + mat_eval_3x3(mat_cct, rgb_white, rgb_gain); + for (nc = 0; nc < 3; nc++) + max_gain = MAT_MAX(max_gain, rgb_gain[nc]); + for (nc = 0; nc < 3; nc++) + for (ni = 0; ni < 3; ni++) + mat_cct[nc][ni] = mat_cct[nc][ni] / max_gain; + } +} + +int cs_rgb_to_vsh(MATFLOAT rgb[3], MATFLOAT vsh[3]) +{ + MATFLOAT r = rgb[0]; + MATFLOAT g = rgb[1]; + MATFLOAT b = rgb[2]; + MATFLOAT val_min, val_max, delta; + + val_max = (g > b) ? g : b; + if (r > val_max) + val_max = r; + + val_min = (g < b) ? g : b; + if (r < val_min) + val_min = r; + + vsh[0] = val_max; + delta = val_max - val_min; + + if ((val_max != 0.0) && (delta != 0.0)) + vsh[1] = delta / val_max; + else { + vsh[2] = 0.0; + vsh[1] = 0.0; + return 1; + } + + if (r == val_max) + vsh[2] = (g - b) / delta; + else if (g == val_max) + vsh[2] = 2.0 + (b - r) / delta; + else + vsh[2] = 4.0 + (r - g) / delta; + + vsh[2] = vsh[2] * mat_get_pi() / 3.0; + vsh[2] = mat_norm_angle(vsh[2]); /* [0.0, 2PI) */ + + return 0; +} + +void cs_vsh_to_rgb(MATFLOAT vsh[3], MATFLOAT rgb[3]) +{ + MATFLOAT v = vsh[0]; + MATFLOAT s = vsh[1]; + + MATFLOAT r = v; + MATFLOAT g = v; + MATFLOAT b = v; + + if (s > 0.0) { + MATFLOAT h = 3.0 * vsh[2] / mat_get_pi(); + int ni = MAT_CLAMP((int)h, 0, 5); + MATFLOAT f = h - (MATFLOAT)ni; + MATFLOAT p = v * (1.0 - s); + MATFLOAT q = v * (1.0 - s * f); + MATFLOAT t = v * (1.0 - s * (1.0 - f)); + + switch (ni) { + case 0: + r = v; + g = t; + b = p; + break; + case 1: + r = q; + g = v; + b = p; + break; + case 2: + r = p; + g = v; + b = t; + break; + case 3: + r = p; + g = q; + b = v; + break; + case 4: + r = t; + g = p; + b = v; + break; + case 5: + r = v; + g = p; + b = q; + break; + } + } + + rgb[0] = r; + rgb[1] = g; + rgb[2] = b; +} + +/* YUV functions */ +void cs_yuv_to_ysh(MATFLOAT yuv_inp[3], MATFLOAT ysh_out[3]) +{ + ysh_out[0] = yuv_inp[0]; + ysh_out[1] = mat_radius(yuv_inp[2] - 0.5, yuv_inp[1] - 0.5); + ysh_out[2] = mat_angle(yuv_inp[2] - 0.5, yuv_inp[1] - 0.5); +} + +void cs_ysh_to_yuv(MATFLOAT ysh_inp[3], MATFLOAT yuv_out[3]) +{ + yuv_out[0] = ysh_inp[0]; + yuv_out[1] = ysh_inp[1] * mat_cos(ysh_inp[2]) + 0.5; + yuv_out[2] = ysh_inp[1] * mat_sin(ysh_inp[2]) + 0.5; +} + +/* CIE LAB functions */ +void cs_rgb_to_lab(MATFLOAT rgb[3], MATFLOAT lab[3], struct s_color_space *ptr_color_space) +{ + MATFLOAT xyz[3]; + + cs_gamma_rgb(rgb, rgb, ptr_color_space->gamma_parm, EGD_NONLIN_2_LIN); + mat_eval_3x3(ptr_color_space->mat_rgb2xyz, rgb, xyz); + cs_xyz_to_lab(xyz, lab, ptr_color_space->white_xyz); +} + +void cs_lab_to_rgb(MATFLOAT lab[3], MATFLOAT rgb[3], struct s_color_space *ptr_color_space) +{ + MATFLOAT xyz[3]; + + cs_lab_to_xyz(lab, xyz, ptr_color_space->white_xyz); + mat_eval_3x3(ptr_color_space->mat_xyz2rgb, xyz, rgb); + cs_clip_rgb(rgb, 0.0, 1.0); + cs_gamma_rgb(rgb, rgb, ptr_color_space->gamma_parm, EGD_LIN_2_NONLIN); +} + +void cs_xyz_to_lab(MATFLOAT xyz[3], MATFLOAT lab[3], MATFLOAT white_xyz[3]) +{ + int nc; + MATFLOAT f[3], ft; + + for (nc = 0; nc < 3; nc++) { + ft = xyz[nc] / white_xyz[nc]; + f[nc] = (ft > CS_LAB_E) ? mat_pow(ft, 1.0 / 3.0) : (CS_LAB_K * ft + 16.0) / 116.0; + } + + lab[0] = 116.0f * f[1] - 16.0; + lab[1] = 500.0f * (f[0] - f[1]); + lab[2] = 200.0f * (f[1] - f[2]); +} + +void cs_lab_to_xyz(MATFLOAT lab[3], MATFLOAT xyz[3], MATFLOAT white_xyz[3]) +{ + int nc; + MATFLOAT f[3]; + MATFLOAT ft = (lab[0] + 16.0) / 116.0; + + f[0] = ft + lab[1] / 500.0; + f[1] = ft; + f[2] = ft - lab[2] / 200.0; + + xyz[0] = mat_pow(f[0], 3.0); + if (xyz[0] <= CS_LAB_E) + xyz[0] = (116.0 * f[0] - 16.0) / CS_LAB_K; + + if (lab[0] > CS_LAB_K * CS_LAB_E) + xyz[1] = mat_pow((lab[0] + 16.0) / 116.0, 3.0); + else + xyz[1] = lab[0] / CS_LAB_K; + + xyz[2] = mat_pow(f[2], 3.0); + if (xyz[2] <= CS_LAB_E) + xyz[2] = (116.0 * f[2] - 16.0) / CS_LAB_K; + + for (nc = 0; nc < 3; nc++) + xyz[nc] *= white_xyz[nc]; +} + +MATFLOAT cs_de94(MATFLOAT lab0[3], MATFLOAT lab1[3]) +{ + static const MATFLOAT Kc = 1.0; + static const MATFLOAT Kh = 1.0; + static const MATFLOAT Kl = 1.0; + static const MATFLOAT K1 = 0.045; + static const MATFLOAT K2 = 0.015; + + MATFLOAT dL = lab0[0] - lab1[0]; + MATFLOAT C1 = mat_sqrt(lab0[1] * lab0[1] + lab0[2] * lab0[2]); + MATFLOAT C2 = mat_sqrt(lab1[1] * lab1[1] + lab1[2] * lab1[2]); + MATFLOAT dC = C1 - C2; + + MATFLOAT da = lab0[1] - lab1[1]; + MATFLOAT db = lab0[2] - lab1[2]; + MATFLOAT tmp = da * da + db * db - dC * dC; + MATFLOAT dH = (tmp > 0) ? mat_sqrt(tmp) : 0.0; + + MATFLOAT Sl = 1.0; + MATFLOAT Sc = 1.0 + K1 * C1; + MATFLOAT Sh = 1.0 + K2 * C1; + + dL /= (Kl * Sl); + dC /= (Kc * Sc); + dH /= (Kh * Sh); + + return mat_sqrt(dL * dL + dC * dC + dH * dH); +} + +/* gamma = 1.2 - for reference display (1000 cd/m^2) and reference ambient light (5 cd/m^2) */ +/* luminance_peak in a range [0,1] normilized to [0,10000]cd/m^2 in linear space */ +/* luminance_amb in a range [0,1] normalized to [0,10000]cd/m^2 in linear space - ambient light in linear space */ +MATFLOAT cs_gamma_adjust(MATFLOAT gamma, MATFLOAT luminance_peak, MATFLOAT luminance_amb) +{ + /* gamma correction for peak luminance of the display */ + if (luminance_peak < 0.2) + gamma = gamma + ((luminance_peak > 0.1) ? 0.42 * mat_log10(luminance_peak / 0.1) : 0.0); + else + gamma = gamma * mat_pow(1.111, mat_log2(luminance_peak / 0.1)); + /* gamma correction for ambient light */ + gamma = gamma - 0.076 * mat_log10(luminance_amb / 5.0); + + return gamma; +} + +/* BT.2100 */ +/* input must be in arange [0,1] normilized to [0,Lw]cd/m^2 in linear or non-linear space */ +/* output must be in a range [0,1] normilized to [0,Lw]cd/m^2 in linear or non-linear space */ +MATFLOAT cs_gamma_hlg(MATFLOAT val, enum cs_gamma_dir gamma_dir) +{ + static const MATFLOAT s_a = 0.17883277; + static const MATFLOAT s_b = 0.28466892; + static const MATFLOAT s_c = 0.55991073; + + MATFLOAT val_out; + + if (gamma_dir == EGD_LIN_2_NONLIN) + val_out = (val <= (1.0 / 12.0)) ? mat_sqrt(3.0 * val) : s_a * mat_log(12.0 * val - s_b) + s_c; + else + val_out = (val <= 0.5) ? val * val / 3.0 : (mat_exp((val - s_c) / s_a) + s_b) / 12.0; + + return MAT_CLAMP(val_out, 0.0, 1.0); +} + +/* HLG OOTF */ +/* rgb_inp[] in a range [0,1] normalized to [0,Lw]cd/m^2 in linear space */ +/* rgb_out[] in a range [0,1] normalized to [0,10000]cd/m^2 in linear space */ +/* luminance_peak in a range [0,1] normalized to [0,10000]cd/m^2 in linear space - mastering Lb and Lw */ +/* system_gamma = 1.2 - for reference display (1000 cd/m^2) and reference ambient light (5 cd/m^2) */ +void cs_hlg_ootf(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT luminance_peak, MATFLOAT system_gamma) +{ /* output may be the same as input */ + MATFLOAT ys = 0.2627 * rgb_inp[0] + 0.6780 * rgb_inp[1] + 0.0593 * rgb_inp[2]; + MATFLOAT scale = mat_pow(ys, system_gamma - 1.0); + int nc; + + for (nc = 0; nc < 3; nc++) { + rgb_out[nc] = rgb_inp[nc] * scale * luminance_peak; + rgb_out[nc] = MAT_CLAMP(rgb_out[nc], 0.0, 1.0); + } +} + +/* HLG OOTF_INV */ +/* rgb_inp[] in a range [0,1] normalized to [0,10000]cd/m^2 in linear space */ +/* rgb_out[] in a range [0,1] normalized to [0,Lw]cd/m^2 in linear space */ +/* luminance_peak in a range [0,1] normalized to [0,10000]cd/m^2 in linear space - mastering Lb and Lw */ +/* system_gamma = 1.2 - for reference display (1000 cd/m^2) and reference ambient light (5 cd/m^2) */ +void cs_hlg_ootf_inv(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT luminance_peak, MATFLOAT system_gamma) +{ /* output may be the same as input */ + MATFLOAT yd = (0.2627 * rgb_inp[0] + 0.6780 * rgb_inp[1] + 0.0593 * rgb_inp[2]) / luminance_peak; + MATFLOAT scale = mat_pow(yd, (1.0 - system_gamma) / system_gamma) / luminance_peak; + int nc; + + for (nc = 0; nc < 3; nc++) { + rgb_out[nc] = rgb_inp[nc] * scale; + rgb_out[nc] = MAT_CLAMP(rgb_out[nc], 0.0, 1.0); + } +} + +/* HLG OETF */ +/* rgb_inp[] in a range [0,1] normalized to [0,Lw]cd/m^2 in linear space */ +/* rgb_out[] in a range [0,1] normalized to [0,Lw]cd/m^2 in non-linear space */ +/* luminance_peak in a range [0,1] normalized to [0,10000]cd/m^2 in linear space - mastering Lb and Lw */ +/* system_gamma = 1.2 - for reference display (1000 cd/m^2) and reference ambient light (5 cd/m^2) */ +void cs_hlg_oetf(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT luminance_peak, MATFLOAT system_gamma) +{ /* output may be the same as input */ + int nc; + + cs_hlg_ootf_inv(rgb_inp, rgb_out, luminance_peak, system_gamma); + for (nc = 0; nc < 3; nc++) + rgb_out[nc] = cs_gamma_hlg(rgb_out[nc], EGD_LIN_2_NONLIN); +} + +/* HLG EOTF */ +/* rgb_inp[] in a range [0,1] normalized to [0,Lw]cd/m^2 in non-linear space */ +/* rgb_out[] in a range [0,1] normalized to [0,Lw]cd/m^2 in linear space */ +/* vec_luminace in a range [0,1] normalized to [0,10000]cd/m^2 in linear space - mastering Lb and Lw */ +/* system_gamma = 1.2 - for reference display (1000 cd/m^2) and reference ambient light (5 cd/m^2) */ +/* beta - user black level lift (= 0.0) */ +void cs_hlg_eotf(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT luminance_limits[3], + MATFLOAT system_gamma, MATFLOAT beta) +{ /* output may be the same as input */ + int nc; + + for (nc = 0; nc < 3; nc++) { + rgb_out[nc] = MAT_MAX((1.0 - beta) * rgb_inp[nc] + beta, 0.0); + rgb_out[nc] = cs_gamma_hlg(rgb_out[nc], EGD_NONLIN_2_LIN); + } + cs_hlg_ootf(rgb_out, rgb_out, luminance_limits[1], system_gamma); +} + +/* HLG system gamma calculation */ +/* peak_luminance - Lw */ +MATFLOAT cs_hlg_system_gamma(MATFLOAT peak_luminance) +{ + MATFLOAT norm_peak = peak_luminance / (1000.0 / CS_MAX_LUMINANCE); + MATFLOAT system_gamma; + + if ((peak_luminance < 400.0 / CS_MAX_LUMINANCE) || (peak_luminance > 2000.0 / CS_MAX_LUMINANCE)) + system_gamma = 1.2 * mat_pow(1.111, mat_log2(norm_peak)); + else + system_gamma = 1.2 + 0.42 * mat_log10(norm_peak); + + return system_gamma; +} + +#if 0 +/* PQ to HLG Transcode */ +/* rgb_inp[] in a range [0,1] normalized to [0,10000]cd/m^2 in non-linear space */ +/* rgb_out[] in a range [0,1] normalized to [0,Lw]cd/m^2 in non-linear space */ +/* luminance_peak in a range [0,1] normalized to [0,10000]cd/m^2 in linear space - mastering Lb and Lw */ +/* gamma = 1.2 - for reference display (1000 cd/m^2) and reference ambient light (5 cd/m^2) */ +void cs_pq_to_hlg(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT luminance_peak, MATFLOAT gamma) +{ + MATFLOAT rgb_lin[3]; + int nc; + + for (nc = 0; nc < 3; nc++) + rgb_lin[nc] = cs_gamma_pq(rgb_inp[nc], EGD_NONLIN_2_LIN); /* PQ to Linear [0,10000]->[0,10000] */ + + cs_hlg_ootf_inv(rgb_lin, rgb_lin, luminance_peak, gamma); /* OOTF-1 - [0,10000]->[0,Lw] */ + cs_hlg_oetf(rgb_lin, rgb_out, luminance_peak, gamma); /* Linear to HLG - [0,Lw]->[0,Lw] */ +} + +/* HLG to PQ Transcode */ +/* rgb_inp[] in a range [0,1] normalized to [0,Lw]cd/m^2 in non-linear space */ +/* rgb_out[] in a range [0,1] normalized to [0,10000]cd/m^2 in non-linear space */ +/* vec_luminace in a range [0,1] normalized to [0,10000]cd/m^2 in linear space - mastering Lb and Lw */ +/* gamma = 1.2 - for reference display (1000 cd/m^2) and reference ambient light (5 cd/m^2) */ +void cs_hlg_to_pq(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT vec_luminance[3], MATFLOAT gamma) +{ + MATFLOAT rgb_lin[3]; + int nc; + + cs_hlg_eotf(rgb_inp, rgb_lin, vec_luminance, gamma); /* HLG to Linear - [0,Lw]->[0,Lw] */ + cs_hlg_ootf(rgb_lin, rgb_lin, vec_luminance[1], gamma); /* OOTF - [0,Lw]->[0,10000] */ + + for (nc = 0; nc < 3; nc++) + rgb_out[nc] = cs_gamma_pq(rgb_lin[nc], EGD_LIN_2_NONLIN); /* Linear to PQ [0,10000]->[0,1000] */ +} + +/* BT.2390 display referred simplified */ +/* rgb_inp[] in a range [0,1] normalized to [0,100]cd/m^2 in non-linear space */ +/* rgb_out[] in a range [0,1] normalized to [0,1000]cd/m^2 in non-linear space */ +void cs_sdr_to_hlg(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT en_709_2020) +{ + MATFLOAT sdr_lb = 0.0; + MATFLOAT sdr_lw = 100.0 / 10000.0; + MATFLOAT sdr_gamma = 2.4; + MATFLOAT scale = 0.2546; /* 0.75HLG = 392cd/m^2 */ + MATFLOAT hlg_lw = 1000.0 / 10000.0; + MATFLOAT hlg_amb = 5.0 / 10000.0; + MATFLOAT hlg_gamma = cs_gamma_adjust(1.2, hlg_lw, hlg_amb); + MATFLOAT gamma = 1.03; + MATFLOAT rgb_lin[3]; + int nc; + + for (nc = 0; nc < 3; nc++) { + rgb_lin[nc] = cs_gamma_1886(rgb_inp[nc], sdr_lb, sdr_lw, sdr_gamma); /* [0,10000]cd/m^2 */ + rgb_lin[nc] = rgb_lin[nc] / sdr_lw; /* [0,sdr_lw]cd/m^2 */ + rgb_lin[nc] = MAT_CLAMP(rgb_lin[nc], 0.0, 1.0); + } + + if (en_709_2020) { + MATFLOAT rgb_tmp[3]; + + mat_copy(rgb_lin, rgb_tmp, 3); + mat_eval_3x3(cs_mat_709_2020, rgb_tmp, rgb_lin); /* [0,sdr_lw]cd/m^2 */ + } + + for (nc = 0; nc < 3; nc++) { + rgb_lin[nc] = rgb_lin[nc] * scale; /* scale to 392cd/m^2 [0,hlg_lw] */ + rgb_lin[nc] = mat_pow(rgb_lin[nc], 1.0 / gamma); /* [0,hlg_lw] */ + } + + cs_hlg_oetf(rgb_lin, rgb_out, hlg_lw, hlg_gamma); /* Linear to HLG - [0,hlg_lw]cd/m^2->[0,hlg_lw]cd/m^2 */ +} +#endif diff --git a/src/amd/gmlib/gm/cs_funcs.h b/src/amd/gmlib/gm/cs_funcs.h new file mode 100755 index 00000000000..2565087d806 --- /dev/null +++ b/src/amd/gmlib/gm/cs_funcs.h @@ -0,0 +1,273 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + *---------------------------------------------------------------------- + * File Name : cs_funcs.h + * Purpose : Color Space functions + * Author : Vladimir Lachine (vlachine@amd.com) + * Date : September 20, 2023 + * Version : 1.4 + *------------------------------------------------------------------------- + * + */ + +#pragma once + +#include "mat_funcs.h" + +#ifdef __cplusplus + extern "C" { +#endif + +#define CS_MAX_LUMINANCE 10000.0 +#define CS_SCALE_CCCS 125.0 +#define CS_CHAD_D65 0x01 /* apply chromatic adaptation */ + +static MATFLOAT cs_mat_709_2020[3][3] = { /* BT.2087 */ + {0.6274, 0.3293, 0.0433}, + {0.0691, 0.9195, 0.0114}, + {0.0164, 0.0880, 0.8956} +}; + +enum cs_white_point_type { + EWPT_NONE = 0, /* NATIVE */ + EWPT_A = 1, + EWPT_B = 2, + EWPT_C = 3, + EWPT_D50 = 4, + EWPT_D55 = 5, + EWPT_D65 = 6, /* 709, sRRGB, ADOBE, APPLE */ + EWPT_D75 = 7, + EWPT_9300 = 8, + EWPT_E = 9, + EWPT_F2 = 10, + EWPT_F7 = 11, + EWPT_F11 = 12, + EWPT_DCIP3 = 13, /* DCI-P3 */ + EWPT_11000 = 14, /* 11000K */ + EWPT_NUM = 15 /* CUSTOM */ +}; + +enum cs_gamma_type { + EGT_LINEAR = 0, /* LINEAR */ + EGT_709 = 1, /* 709 (SD/HD) */ + EGT_ADOBE = 2, /* ADOBE 1998 */ + EGT_DCIP3 = 3, /* DCI-P3 */ + EGT_APPLE = 4, /* APPLE */ + EGT_sRGB = 5, /* sRGB */ + EGT_PQ = 6, /* PQ */ + EGT_HLG = 7, /* HLG */ + EGT_2_2 = 8, /* 2.2 */ + EGT_2_4 = 9, /* 2.4 */ + EGT_CUSTOM = 10 /* CUSTOM */ +}; + +enum cs_color_space_type { + ECST_709 = 0, /* 709(HD),sRGB */ + ECST_SMPTE = 1, /* SMPTE RP125 (SD) */ + ECST_ADOBE = 2, /* ADOBE 1998 */ + ECST_DCIP3 = 3, /* DCI-P3 */ + ECST_APPLE = 4, /* APPLE */ + ECST_EBU = 5, /* EBU 3213 (576i) */ + ECST_NTSC = 6, /* NTSC 1953 */ + ECST_CIE = 7, /* CIE */ + ECST_BT2020 = 8, /* BT.2020 */ + ECST_CUSTOM = 9 /* CUSTOM */ +}; + +enum cs_gamma_dir { + EGD_NONLIN_2_LIN = 0, + EGD_LIN_2_NONLIN = 1 +}; + +struct s_cs_opts { + /* Color Space Type: [0,9]=0 : 0-709, 1-SMPTE, 2-ADOBE1998, 3-DCI-P3, 4-APPLE, + 5-EBU3213, 6-NTSC, 7-CIE, 8-BT2020, 9-CUSTOM */ + enum cs_color_space_type color_space_type; + /* Gamma Type: [0,9]=1 : 0-LINEAR, 1-709, 2-ADOBE, 3-DCI-P3, 4-APPLE, + 5-sRGB, 6-PQ, 7-HLG, 8-G2.2, 9-G2.4, 10-CUSTOM */ + enum cs_gamma_type gamma_type; + MATFLOAT luminance_limits[2]; /* luminance min/max in a range [0.0,10000.0]= {0.0,400.0} */ + MATFLOAT pq_norm; /* normalizatiion luminance for PQ: [0.0,10000.0] = 0.0 - no normalization */ + unsigned int mode; /* mode: {0,1}=0 : Enable/disable Chromatic adaptation */ + MATFLOAT rgbw_xy[8]; /* Chromaticity: Red, Green, Blue, White in xy */ + MATFLOAT gamma_parm[4]; /* Gamma parameters: (0.0,?,?,?) - PQ, (0.5,?,?,?) - HLG */ +}; + +struct s_color_space { + /* input parameters */ + /* cs_color_space_type: [0,9]=9 : 0-709, 1-SMPTE, 2-ADOBE1998, 3-DCI-P3, 4-APPLE, + 5-EBU3213, 6-NTSC, 7-CIE, 8-BT2020, 9-CUSTOM */ + enum cs_color_space_type color_space_type; + /* cs_gamma_type: [0,9]=9 : 0-LINEAR, 1-709, 2-ADOBE, 3-DCI-P3, 4-APPLE, + 5-sRGB, 6-PQ, 7-HLG, 8-Gamma2.2, 9-CUSTOM */ + enum cs_gamma_type gamma_type; + /* luminances min/max/range normilized to 10000.0 in a range [0.0,1.0]=0.0,1.0,1.0 */ + MATFLOAT luminance_limits[3]; + MATFLOAT pq_norm; /* normalizatiion luminance for PQ: [0.0,10000.0] = 0.0 - no normalization */ + unsigned int mode; /* mode: {0,1}=0 : CS_CHAD_D65 - Enable Chromatic Adaptation */ + /* custom or initialized parameters based on input parameters */ + MATFLOAT rgbw_xy[8]; /* Red, Green, Blue, White in xy */ + MATFLOAT gamma_parm[4]; /* Gamma parameters: 0.0,?,?,? - PQ, 0.5,?,?,? - HLG */ + /* calculated variables */ + MATFLOAT luma_limits[3]; /* Min/max/range luma (PQ) normilized to 10000 : [0.0,1.0]=0,1,1 */ + MATFLOAT mat_rgb2xyz[3][3]; /* RGB to XYZ matrix */ + MATFLOAT mat_xyz2rgb[3][3]; /* XYZ to RGB matrix */ + MATFLOAT mat_rgb2lms[3][3]; /* RGB to LMS matrix */ + MATFLOAT mat_lms2rgb[3][3]; /* LMS to RGB matrix */ + MATFLOAT mat_lms2itp[3][3]; /* LMS to ITP matrix */ + MATFLOAT mat_itp2lms[3][3]; /* ITP to LMS matrix */ + MATFLOAT mat_chad[3][3]; /* Chromatic Adaptation matrix */ + MATFLOAT white_xyz[3]; /* White in XYZ */ + int cct; /* Correlated Color Temperature */ + MATFLOAT hlg_system_gamma; /* HLG OOTF system gamma for */ + MATFLOAT hlg_beta; /* user black level lift */ +}; + +/* get internal constants */ +const MATFLOAT *cs_get_gamma(enum cs_gamma_type gamma_type); +const MATFLOAT *cs_get_color_space(enum cs_color_space_type color_space_type); +const MATFLOAT *cs_get_white_point(enum cs_white_point_type white_point_type); + +/* initilize color space functions */ +void cs_set_opts_def(struct s_cs_opts *ptr_cs_opts); +void cs_init(struct s_cs_opts *ptr_cs_opts, struct s_color_space *ptr_color_space); +void cs_init_private(struct s_color_space *ptr_color_space); +void cs_copy(struct s_color_space *ptr_color_space_src, struct s_color_space *ptr_color_space_dst); +void cs_luminance_to_luma_limits(MATFLOAT luminance_limits[2], MATFLOAT luma_limits[3]); + +/* color formats conversion functions */ +void cs_xyy_to_xyz(MATFLOAT xyy_inp[3], MATFLOAT xyz_out[3]); +void cs_xyz_to_xyy(MATFLOAT xyz_inp[3], MATFLOAT xyy_out[3]); + +void cs_xyzc_to_xyz(MATFLOAT xyz_inp[3], MATFLOAT xyz_out[3]); +void cs_xyz_to_xyzc(MATFLOAT xyz_inp[3], MATFLOAT xyz_out[3]); + +void cs_rgb_to_itp(struct s_color_space *ptr_color_space, MATFLOAT rgb_inp[3], MATFLOAT itp_out[3]); +void cs_itp_to_rgb(struct s_color_space *ptr_color_space, MATFLOAT itp_inp[3], MATFLOAT rgb_out[3]); + +void cs_ich_to_itp(MATFLOAT ich_inp[3], MATFLOAT itp_out[3]); +void cs_itp_to_ich(MATFLOAT itp_inp[3], MATFLOAT ich_out[3]); + +void cs_rgb_to_yuv(MATFLOAT rgb_inp[3], MATFLOAT yuv_out[3]); +void cs_yuv_to_rgb(MATFLOAT yuv_inp[3], MATFLOAT rgb_out[3]); + +MATFLOAT cs_nlin_to_lin(struct s_color_space *ptr_color_space, MATFLOAT val_inp); +void cs_nlin_to_lin_rgb(struct s_color_space *ptr_color_space, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3]); + +MATFLOAT cs_lin_to_nlin(struct s_color_space *ptr_color_space, MATFLOAT val_inp); +void cs_lin_to_nlin_rgb(struct s_color_space *ptr_color_space, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3]); + +/* internal matrixes genereation functions */ +int cs_genmat_rgb_to_xyz(MATFLOAT rgbw_xy[8], MATFLOAT mat_rgb2xyz[3][3]); +int cs_genmat_xyz_to_rgb(MATFLOAT rgbw_xy[8], MATFLOAT mat_xyz2rgb[3][3]); +int cs_genmat_rgb_to_rgb(MATFLOAT rgbw_xy_src[8], MATFLOAT rgbw_xy_dst[8], MATFLOAT mat_rgb2rgb[3][3], int en_chad); +int cs_genmat_chad(MATFLOAT white_xy_src[2], MATFLOAT white_xy_dst[2], MATFLOAT mat_chad[3][3]); + +/* gamma curves generation functions */ +MATFLOAT cs_gamma(MATFLOAT val, MATFLOAT gamma_parm[4], enum cs_gamma_dir gamma_dir); +MATFLOAT cs_gamma_pq(MATFLOAT val, enum cs_gamma_dir gamma_dir); +MATFLOAT cs_gamma_1886(MATFLOAT val, MATFLOAT lb, MATFLOAT lw, MATFLOAT gamma); + +void cs_pq_ootf(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3]); + +void cs_sdr_to_pq(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT en_709_2020); + +void cs_gamma_rgb(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT gamma_parm[4], enum cs_gamma_dir gamma_dir); + +/* signal clipping functions */ +int cs_min_rgb(MATFLOAT rgb[3], MATFLOAT val_min); +int cs_max_rgb(MATFLOAT rgb[3], MATFLOAT val_max); + +/* signal validation functions */ +int cs_is_valid_itp(struct s_color_space *ptr_color_space, MATFLOAT itp[3]); +int cs_is_valid_ic(struct s_color_space *ptr_color_space, MATFLOAT pnt_ic[2], MATFLOAT hue_sin_cos[2]); +int cs_is_valid_rgb(MATFLOAT rgb[3], MATFLOAT val_min, MATFLOAT val_max); +int cs_clip_rgb(MATFLOAT rgb[3], MATFLOAT val_min, MATFLOAT val_max); +void cs_clamp_rgb(MATFLOAT rgb[3], MATFLOAT val_min, MATFLOAT val_max); + +/* signal normalization functions */ +void cs_norm_rgb(MATFLOAT rgb[3], MATFLOAT val_min, MATFLOAT val_rng); +void cs_denorm_rgb(MATFLOAT rgb[3], MATFLOAT val_min, MATFLOAT val_rng); + +/* signal format conversion functions */ +void cs_int2flt_rgb(int rgb_inp[3], MATFLOAT rgb_out[3], int val_max); +void cs_flt2int_rgb(MATFLOAT rgb_inp[3], int rgb_out[3], int val_max); +void cs_short2flt_rgb(unsigned short rgb_inp[3], MATFLOAT rgb_out[3], int val_max); +void cs_flt2short_rgb(MATFLOAT rgb_inp[3], unsigned short rgb_out[3], int val_max); + +void cs_genprim_itp(struct s_color_space *ptr_color_space, + int num_prim, MATFLOAT *ptr_prim_rgb, MATFLOAT *ptr_prim_ich); + +/* gamma curve handling functions */ +MATFLOAT cs_soft_clip(MATFLOAT val, MATFLOAT limits_src[3], MATFLOAT limits_dst[3]); +MATFLOAT cs_gamma_to_gamma(MATFLOAT val, enum cs_gamma_type gamma_type_src, enum cs_gamma_type gamma_type_dst, + MATFLOAT luminance_limits_dst[3], MATFLOAT luma_limits_src[3], MATFLOAT luma_limits_dst[3], + MATFLOAT(*func_pq_to_pq)(MATFLOAT), int en_norm, int en_soft_clip); + +/* CCT handling functions */ +#define CS_CCT_MIN 1000 +#define CS_CCT_MAX 20000 +#define CS_CCT_INC 100 +#define CS_CCT_SIZE ((CS_CCT_MAX - CS_CCT_MIN) / CS_CCT_INC + 1) + +int cs_xy_to_cct(MATFLOAT white_xy[2]); +void cs_cct_to_xy(int cct, MATFLOAT xy[2]); +void cs_csc(struct s_color_space *ptr_cs_src, struct s_color_space *ptr_cs_dst, + MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], int en_chad); +int cs_is_space(struct s_color_space *ptr_color_space, + enum cs_color_space_type color_space_type, enum cs_gamma_type gamma_type); + +void cs_init_type(MATFLOAT luminance_limits[2], + enum cs_color_space_type color_space_type, enum cs_gamma_type gamma_type, + struct s_color_space *ptr_color_space); +void cs_init_BT709(MATFLOAT luminance_limits[2], struct s_color_space *ptr_color_space); +void cs_init_BT2100(MATFLOAT luminance_limits[2], struct s_color_space *ptr_color_space); +void cs_rgb_to_ycbcr2020(MATFLOAT rgb_inp[3], MATFLOAT ycbcr_out[3]); + +MATFLOAT cs_ootf_gamma_peak(MATFLOAT gamma, MATFLOAT luminance_peak); +MATFLOAT cs_ootf_gamma_amb(MATFLOAT gamma, MATFLOAT luminance_ambient); +MATFLOAT cs_gamma_adjust_sdr(MATFLOAT gamma, MATFLOAT luminance_peak); +MATFLOAT cs_gamma_adjust(MATFLOAT gamma, MATFLOAT luminance_peak, MATFLOAT luminance_amb); + +void cs_chad_gains(MATFLOAT rgbw_xy[8], MATFLOAT w_xy[2], MATFLOAT rgb_gain[3]); +void cs_genmat_cct(struct s_color_space *ptr_cs, int cct_shift, int norm, MATFLOAT mat_cct[3][3]); + +/* HSV functions */ +int cs_rgb_to_vsh(MATFLOAT rgb[3], MATFLOAT vsh[3]); +void cs_vsh_to_rgb(MATFLOAT vsh[3], MATFLOAT rgb[3]); + +/* YUV functions */ +void cs_yuv_to_ysh(MATFLOAT yuv_inp[3], MATFLOAT ysh_out[3]); +void cs_ysh_to_yuv(MATFLOAT ysh_inp[3], MATFLOAT yuv_out[3]); + +/* CIELAB functions */ +#define CS_LAB_E 0.008856 +#define CS_LAB_K 903.3 + +void cs_rgb_to_lab(MATFLOAT rgb[3], MATFLOAT lab[3], struct s_color_space *ptr_color_space); +void cs_lab_to_rgb(MATFLOAT lab[3], MATFLOAT rgb[3], struct s_color_space *ptr_color_space); +void cs_xyz_to_lab(MATFLOAT xyz[3], MATFLOAT lab[3], MATFLOAT white_xyz[3]); +void cs_lab_to_xyz(MATFLOAT lab[3], MATFLOAT xyz[3], MATFLOAT white_xyz[3]); +MATFLOAT cs_de94(MATFLOAT lab0[3], MATFLOAT lab1[3]); + +/* HLG functions */ +MATFLOAT cs_gamma_hlg(MATFLOAT val, enum cs_gamma_dir gamma_dir); +void cs_hlg_ootf(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT luminance_peak, MATFLOAT system_gamma); +void cs_hlg_ootf_inv(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT luminance_peak, MATFLOAT gamma); +void cs_hlg_oetf(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT luminance_peak, MATFLOAT system_gamma); +void cs_hlg_eotf(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT luminance_limits[3], + MATFLOAT system_gamma, MATFLOAT beta); +MATFLOAT cs_hlg_system_gamma(MATFLOAT peak_luminance); + +#if 0 +void cs_pq_to_hlg(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT luminance_peak, MATFLOAT system_gamma); +void cs_hlg_to_pq(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT vec_luminance[3], + MATFLOAT system_gamma, MATFLOAT beta); +void cs_sdr_to_hlg(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT en_709_2020); +#endif + +#ifdef __cplusplus +} +#endif diff --git a/src/amd/gmlib/gm/csc_api_funcs.c b/src/amd/gmlib/gm/csc_api_funcs.c new file mode 100755 index 00000000000..e76a6574620 --- /dev/null +++ b/src/amd/gmlib/gm/csc_api_funcs.c @@ -0,0 +1,75 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + *---------------------------------------------------------------------- + * File Name : csc_api_funcs.c + * Purpose : Color Space Conversion 3DLUT functions + * Author : Vladimir Lachine (vlachine@amd.com) + * Date : June 09, 2023 + * Version : 1.2 + *---------------------------------------------------------------------- + * + */ + +#ifndef GM_SIM +#pragma code_seg("PAGED3PC") +#pragma data_seg("PAGED3PD") +#pragma const_seg("PAGED3PR") +#endif + +#include "csc_api_funcs.h" + +void csc_api_set_def(struct s_csc_api_opts *ptr_csc_api_opts) +{ + cs_set_opts_def(&ptr_csc_api_opts->cs_opts_src); + cs_set_opts_def(&ptr_csc_api_opts->cs_opts_dst); + ptr_csc_api_opts->en_chad = 0; + + /* 3DLUT */ + ptr_csc_api_opts->en_merge_3dlut = 0; + ptr_csc_api_opts->num_pnts_3dlut = 17; + ptr_csc_api_opts->bitwidth_3dlut = 12; + ptr_csc_api_opts->ptr_3dlut_rgb = 0; +} + +int csc_api_gen_map(struct s_csc_api_opts *ptr_csc_api_opts, struct s_csc_map *ptr_csc_map) +{ + cs_init(&ptr_csc_api_opts->cs_opts_src, &ptr_csc_map->color_space_src); + cs_init(&ptr_csc_api_opts->cs_opts_dst, &ptr_csc_map->color_space_dst); + + ptr_csc_map->en_chad = ptr_csc_api_opts->en_chad; + + return csc_init_map(ptr_csc_map); +} + +int csc_api_gen_3dlut(struct s_csc_api_opts *ptr_csc_api_opts, struct s_csc_map *ptr_csc_map) +{ + int index = 0; + int value_max = (1 << ptr_csc_api_opts->bitwidth_3dlut) - 1; + int nir, nig, nib; + + if (ptr_csc_api_opts->ptr_3dlut_rgb == 0) + return -1; /* something wrong */ + + for (nir = 0; nir < ptr_csc_api_opts->num_pnts_3dlut; nir++) + for (nig = 0; nig < ptr_csc_api_opts->num_pnts_3dlut; nig++) + for (nib = 0; nib < ptr_csc_api_opts->num_pnts_3dlut; nib++) { + unsigned short rgb[3]; + MATFLOAT rgb_inp[3], rgb_out[3]; + + rgb[0] = ptr_csc_api_opts->en_merge_3dlut ? ptr_csc_api_opts->ptr_3dlut_rgb[index + 0] : + (nir * value_max) / (ptr_csc_api_opts->num_pnts_3dlut - 1); + rgb[1] = ptr_csc_api_opts->en_merge_3dlut ? ptr_csc_api_opts->ptr_3dlut_rgb[index + 1] : + (nig * value_max) / (ptr_csc_api_opts->num_pnts_3dlut - 1); + rgb[2] = ptr_csc_api_opts->en_merge_3dlut ? ptr_csc_api_opts->ptr_3dlut_rgb[index + 2] : + (nib * value_max) / (ptr_csc_api_opts->num_pnts_3dlut - 1); + + cs_short2flt_rgb(rgb, rgb_inp, value_max); + csc_rgb_to_rgb(ptr_csc_map, rgb_inp, rgb_out); + cs_flt2short_rgb(rgb_out, &ptr_csc_api_opts->ptr_3dlut_rgb[index], value_max); + index += 3; + } + + return 0; +} diff --git a/src/amd/gmlib/gm/csc_api_funcs.h b/src/amd/gmlib/gm/csc_api_funcs.h new file mode 100755 index 00000000000..522b16b0477 --- /dev/null +++ b/src/amd/gmlib/gm/csc_api_funcs.h @@ -0,0 +1,41 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + *---------------------------------------------------------------------- + * File Name : csc_api_funcs.h + * Purpose : Color Space Conversion 3DLUT functions + * Author : Vladimir Lachine (vlachine@amd.com) + * Date : June 09, 2023 + * Version : 1.2 + *---------------------------------------------------------------------- + * + */ + +#pragma once + +#include "csc_funcs.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct s_csc_api_opts { /* csc parameters */ + int en_chad; /* enable/disable chromatic adaptation: {0,1}=0 */ + struct s_cs_opts cs_opts_src; /* Source color space */ + struct s_cs_opts cs_opts_dst; /* Destination color space */ + /* 3DLUT parameters */ + int en_merge_3dlut; + int num_pnts_3dlut; + int bitwidth_3dlut; + unsigned short *ptr_3dlut_rgb; +}; + +void csc_api_set_def(struct s_csc_api_opts *ptr_csc_api_opts); + +int csc_api_gen_map(struct s_csc_api_opts *ptr_csc_api_opts, struct s_csc_map *ptr_csc_map); +int csc_api_gen_3dlut(struct s_csc_api_opts *ptr_csc_api_opts, struct s_csc_map *ptr_csc_map); + +#ifdef __cplusplus +} +#endif diff --git a/src/amd/gmlib/gm/csc_funcs.c b/src/amd/gmlib/gm/csc_funcs.c new file mode 100755 index 00000000000..82e3310ba6f --- /dev/null +++ b/src/amd/gmlib/gm/csc_funcs.c @@ -0,0 +1,56 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + *---------------------------------------------------------------------- + * File Name : csc_funcs.c + * Purpose : Color Space Conversion 3DLUT functions + * Author : Vladimir Lachine (vlachine@amd.com) + * Date : June 09, 2023 + * Version : 1.2 + *---------------------------------------------------------------------- + * + */ + +#ifndef GM_SIM +#pragma code_seg("PAGED3PC") +#pragma data_seg("PAGED3PD") +#pragma const_seg("PAGED3PR") +#endif + +#include "csc_funcs.h" + +void csc_ctor(struct s_csc_map *ptr_csc_map) +{ + csc_set_def(ptr_csc_map); +} + +void csc_dtor(struct s_csc_map *ptr_csc_map) +{ +} + +void csc_set_def(struct s_csc_map *ptr_csc_map) +{ + ptr_csc_map->en_chad = 0; + mat_3x3_unity(ptr_csc_map->mat_csc); +} + +int csc_init_map(struct s_csc_map *ptr_csc_map) +{ + cs_genmat_rgb_to_rgb(ptr_csc_map->color_space_src.rgbw_xy, ptr_csc_map->color_space_dst.rgbw_xy, + ptr_csc_map->mat_csc, ptr_csc_map->en_chad); + + return 0; +} + +int csc_rgb_to_rgb(struct s_csc_map *ptr_csc_map, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3]) +{ + MATFLOAT rgb_tmp[3]; + + cs_nlin_to_lin_rgb(&ptr_csc_map->color_space_src, rgb_inp, rgb_tmp); + mat_eval_3x3(ptr_csc_map->mat_csc, rgb_tmp, rgb_out); + cs_clamp_rgb(rgb_out, 0.0, 1.0); + cs_lin_to_nlin_rgb(&ptr_csc_map->color_space_dst, rgb_out, rgb_out); + + return 0; +} diff --git a/src/amd/gmlib/gm/csc_funcs.h b/src/amd/gmlib/gm/csc_funcs.h new file mode 100755 index 00000000000..0ea4e4b2a65 --- /dev/null +++ b/src/amd/gmlib/gm/csc_funcs.h @@ -0,0 +1,41 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + *---------------------------------------------------------------------- + * File Name : csc_funcs.h + * Purpose : Color Space Conversion 3DLUT functions + * Author : Vladimir Lachine (vlachine@amd.com) + * Date : June 09, 2023 + * Version : 1.2 + *---------------------------------------------------------------------- + * + */ + +#pragma once + +#include "cs_funcs.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct s_csc_map { + int en_chad; /* enable/disable chromatic adaptation: {0,1}=0 */ + struct s_color_space color_space_src; /* Source color space */ + struct s_color_space color_space_dst; /* Destination color space */ + MATFLOAT mat_csc[3][3]; /* color space conversion matrix */ +}; + +/* constructor and destructor */ +void csc_ctor(struct s_csc_map *ptr_csc_map); +void csc_dtor(struct s_csc_map *ptr_csc_map); + +void csc_set_def(struct s_csc_map *ptr_csc_map); +int csc_init_map(struct s_csc_map *ptr_csc_map); + +int csc_rgb_to_rgb(struct s_csc_map *ptr_csc_map, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3]); + +#ifdef __cplusplus +} +#endif diff --git a/src/amd/gmlib/gm/cvd_api_funcs.c b/src/amd/gmlib/gm/cvd_api_funcs.c new file mode 100755 index 00000000000..01de0600135 --- /dev/null +++ b/src/amd/gmlib/gm/cvd_api_funcs.c @@ -0,0 +1,85 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + *---------------------------------------------------------------------- + * File Name : cvd_api_funcs.c + * Purpose : Color Vision Deficiency functions + * Author : Vladimir Lachine (vlachine@amd.com) + * Date : January 21, 2020 + * Version : 1.0 + *---------------------------------------------------------------------- + * + */ + +#ifndef GM_SIM +#pragma code_seg("PAGED3PC") +#pragma data_seg("PAGED3PD") +#pragma const_seg("PAGED3PR") +#endif + +#include "cvd_api_funcs.h" + +void cvd_api_set_def(struct s_cvd_api_opts *ptr_api_cvd_opts) +{ + int nk; + + ptr_api_cvd_opts->mode = ECM_NONE; + for (nk = 0; nk < 3; nk++) + ptr_api_cvd_opts->gain[nk] = 0.0; + + cs_set_opts_def(&ptr_api_cvd_opts->cs_opts); + + ptr_api_cvd_opts->en_merge_3dlut = 0; + ptr_api_cvd_opts->num_pnts_3dlut = 17; + ptr_api_cvd_opts->bitwidth_3dlut = 12; + ptr_api_cvd_opts->ptr_3dlut_rgb = 0; +} + +int cvd_api_gen_map(struct s_cvd_api_opts *ptr_api_cvd_opts, struct s_cvd_map *ptr_cvd_map) +{ + int nk; + + cvd_set_def(ptr_cvd_map); + + ptr_cvd_map->mode = ptr_api_cvd_opts->mode; + for (nk = 0; nk < 3; nk++) + ptr_cvd_map->gain[nk] = ptr_api_cvd_opts->gain[nk]; + + cs_init(&ptr_api_cvd_opts->cs_opts, &ptr_cvd_map->color_space); + + return 0; +} + +int cvd_api_gen_3dlut(struct s_cvd_api_opts *ptr_api_cvd_opts, struct s_cvd_map *ptr_cvd_map) +{ + int index = 0; + int nir, nig, nib; + int value_max; + + if (ptr_api_cvd_opts->ptr_3dlut_rgb == 0) + return -1; /* something wrong */ + + value_max = (1 << ptr_api_cvd_opts->bitwidth_3dlut) - 1; + for (nir = 0; nir < ptr_api_cvd_opts->num_pnts_3dlut; nir++) + for (nig = 0; nig < ptr_api_cvd_opts->num_pnts_3dlut; nig++) + for (nib = 0; nib < ptr_api_cvd_opts->num_pnts_3dlut; nib++) { + unsigned short rgb[3]; + MATFLOAT rgb_inp[3], rgb_out[3]; + + rgb[0] = ptr_api_cvd_opts->en_merge_3dlut ? ptr_api_cvd_opts->ptr_3dlut_rgb[index + 0] : + (nir * value_max) / (ptr_api_cvd_opts->num_pnts_3dlut - 1); + rgb[1] = ptr_api_cvd_opts->en_merge_3dlut ? ptr_api_cvd_opts->ptr_3dlut_rgb[index + 1] : + (nig * value_max) / (ptr_api_cvd_opts->num_pnts_3dlut - 1); + rgb[2] = ptr_api_cvd_opts->en_merge_3dlut ? ptr_api_cvd_opts->ptr_3dlut_rgb[index + 2] : + (nib * value_max) / (ptr_api_cvd_opts->num_pnts_3dlut - 1); + + cs_short2flt_rgb(rgb, rgb_inp, value_max); + cvd_rgb_to_rgb(ptr_cvd_map, rgb_inp, rgb_out); + cs_flt2short_rgb(rgb_out, &ptr_api_cvd_opts->ptr_3dlut_rgb[index], value_max); + + index += 3; + } + + return 0; +} diff --git a/src/amd/gmlib/gm/cvd_api_funcs.h b/src/amd/gmlib/gm/cvd_api_funcs.h new file mode 100755 index 00000000000..16692a470c4 --- /dev/null +++ b/src/amd/gmlib/gm/cvd_api_funcs.h @@ -0,0 +1,42 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + *---------------------------------------------------------------------- + * File Name : cvd_api_funcs.h + * Purpose : Color Vision Deficiency functions + * Author : Vladimir Lachine (vlachine@amd.com) + * Date : January 21, 2020 + * Version : 1.0 + *---------------------------------------------------------------------- + * + */ + +#pragma once + +#include "cvd_funcs.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct s_cvd_api_opts { + /* cvd parameters */ + enum cvd_mode mode; /* CVD mode: 0 - NONE, 1 - 3 sliders, 2 - 1 slider*/ + MATFLOAT gain[3]; /* Compensation Gain: ([0] - Protanopia, [1] - Deuteranopia, [2] - Tritanopia: [0.0,2.0]=0.0 */ + struct s_cs_opts cs_opts; /* Color Space parameters */ + /* 3DLUT parameters */ + int en_merge_3dlut; + int num_pnts_3dlut; + int bitwidth_3dlut; + unsigned short *ptr_3dlut_rgb; +}; + +void cvd_api_set_def(struct s_cvd_api_opts *ptr_api_cvd_opts); + +int cvd_api_gen_map(struct s_cvd_api_opts *ptr_api_cvd_opts, struct s_cvd_map *ptr_cvd_map); +int cvd_api_gen_3dlut(struct s_cvd_api_opts *ptr_api_cvd_opts, struct s_cvd_map *ptr_cvd_map); + +#ifdef __cplusplus +} +#endif diff --git a/src/amd/gmlib/gm/cvd_funcs.c b/src/amd/gmlib/gm/cvd_funcs.c new file mode 100755 index 00000000000..a3878a466de --- /dev/null +++ b/src/amd/gmlib/gm/cvd_funcs.c @@ -0,0 +1,132 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + *---------------------------------------------------------------------- + * File Name : cvd_funcs.c + * Purpose : Color Vision Deficiency functions + * Author : Vladimir Lachine (vlachine@amd.com) + * Date : January 21, 2020 + * Version : 1.0 + *---------------------------------------------------------------------- + * + */ + +#ifndef GM_SIM +#pragma code_seg("PAGED3PC") +#pragma data_seg("PAGED3PD") +#pragma const_seg("PAGED3PR") +#endif + +#include "cvd_funcs.h" + +void cvd_ctor(struct s_cvd_map *ptr_cvd_map) +{ + cvd_set_def(ptr_cvd_map); +} + +void cvd_dtor(struct s_cvd_map *ptr_cvd_map) +{ + cvd_set_def(ptr_cvd_map); +} + +void cvd_set_def(struct s_cvd_map *ptr_cvd_map) +{ + int nk; + + ptr_cvd_map->mode = ECM_NONE; + + for (nk = 0; nk < 3; nk++) + ptr_cvd_map->gain[nk] = 0.0; + +} + +int cvd_rgb_to_rgb(struct s_cvd_map *ptr_cvd_map, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3]) +{ + int rc = 0; + + if (ptr_cvd_map->mode != ECM_NONE) + rc = cvd_rgb_to_rgb_dalton(ptr_cvd_map, rgb_inp, rgb_out); + else + mat_copy(rgb_inp, rgb_out, 3); + + return rc; +} + +void cvd_model_rgb(struct s_color_space *ptr_color_space, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], + enum cvd_type type) +{ + static MATFLOAT cvd_mat_rgb2lms[3][3] = { + {17.8824, 43.5161, 4.11935}, + {3.45565, 27.1554, 3.86714}, + {0.0299566, 0.184309, 1.46709} + }; + static MATFLOAT cvd_mat_lms2rgb[3][3] = { + { 0.080944, -0.130504, 0.116721}, + {-0.0102485, 0.0540194, -0.113615}, + {-0.000365294, -0.00412163, 0.693513} + }; + static MATFLOAT cvd_mat_model[ECVDT_NUM][3][3] = { + {/* protanopia */ {0.0, 2.02324, -2.52581}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0}}, + {/* deuteranopia */ {1.0, 0.0, 0.0}, {0.494207, 0.0, 1.24827}, {0.0, 0.0, 1.0}}, +// {/* tritanopia */ {1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {-0.395913, 0.801109, 0.0}} + {/* tritanopia */ {1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {-0.012245, 0.0720345, 0.0}} + }; + + MATFLOAT lms_inp[3], lms_out[3]; + + mat_eval_3x3(cvd_mat_rgb2lms, rgb_inp, lms_inp); + mat_eval_3x3(cvd_mat_model[type], lms_inp, lms_out); + mat_eval_3x3(cvd_mat_lms2rgb, lms_out, rgb_out); + cs_clamp_rgb(rgb_out, 0.0, 1.0); +} + +int cvd_rgb_to_rgb_dalton(struct s_cvd_map *ptr_cvd_map, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3]) +{ + static MATFLOAT cvd_mat_err[ECVDT_NUM][3][3] = { + {/* protanopia */ {-0.5, 0.0, 0.0}, {1.0, 1.0, 0.0}, {1.0, 0.0, 1.0}}, + {/* deuteranopia */ { 1.0, 1.0, 0.0}, {0.0, -0.5, 0.0}, {0.0, 1.0, 1.0}}, + {/* tritanopia */ { 1.0, 0.0, 1.0}, {0.0, 1.0, 1.0}, {0.0, 0.0, -0.5}} + }; + + MATFLOAT rgb_inp_lin[3], rgb_out_lin[3]; + MATFLOAT rgb_err_map[ECVDT_NUM][3]; + MATFLOAT err_map; + MATFLOAT gain; + int nc, nk; + + cs_gamma_rgb(rgb_inp, rgb_inp_lin, ptr_cvd_map->color_space.gamma_parm, EGD_NONLIN_2_LIN); + mat_copy(rgb_inp_lin, rgb_out_lin, 3); + + for (nk = 0; nk < 3; nk++) { + MATFLOAT rgb_cvd[3], rgb_err[3]; + + cvd_model_rgb(&ptr_cvd_map->color_space, rgb_inp_lin, rgb_cvd, nk); + for (nc = 0; nc < 3; nc++) + rgb_err[nc] = rgb_inp_lin[nc] - rgb_cvd[nc]; + mat_eval_3x3(cvd_mat_err[nk], rgb_err, rgb_err_map[nk]); + } + + if (ptr_cvd_map->mode == ECM_DALTON_SLD3) { /* ECM_DALTON_SLD3 */ + for (nk = 0; nk < 3; nk++) { + gain = ptr_cvd_map->gain[nk] * 0.5; + for (nc = 0; nc < 3; nc++) + rgb_out_lin[nc] += rgb_err_map[nk][nc] * gain; + } + } else { /* ECM_DALTON_SLD1 */ + for (nc = 0; nc < 3; nc++) { + if (ptr_cvd_map->gain[0] <= 1.0) + err_map = ptr_cvd_map->gain[0] * rgb_err_map[0][nc]; + else if (ptr_cvd_map->gain[0] <= 2.0) + err_map = rgb_err_map[0][nc] + (ptr_cvd_map->gain[0] - 1.0) * (rgb_err_map[1][nc] - rgb_err_map[0][nc]); + else + err_map = rgb_err_map[1][nc] + (ptr_cvd_map->gain[0] - 2.0) * (rgb_err_map[2][nc] - rgb_err_map[1][nc]); + rgb_out_lin[nc] += err_map; + } + } + + cs_clamp_rgb(rgb_out_lin, 0.0, 1.0); + cs_gamma_rgb(rgb_out_lin, rgb_out, ptr_cvd_map->color_space.gamma_parm, EGD_LIN_2_NONLIN); + + return 0; +} diff --git a/src/amd/gmlib/gm/cvd_funcs.h b/src/amd/gmlib/gm/cvd_funcs.h new file mode 100755 index 00000000000..ea263746de9 --- /dev/null +++ b/src/amd/gmlib/gm/cvd_funcs.h @@ -0,0 +1,57 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + *---------------------------------------------------------------------- + * File Name : cvd_funcs.h + * Purpose : Color Vision Deficiency functions + * Author : Vladimir Lachine (vlachine@amd.com) + * Date : January 21, 2020 + * Version : 1.0 + *---------------------------------------------------------------------- + * + */ + +#pragma once + +#include "cs_funcs.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum cvd_mode { + ECM_NONE = 0, /* NONE */ + ECM_DALTON_SLD3 = 1, /* DALTONIZATION 3 control sliders */ + ECM_DALTON_SLD1 = 2, /* DALTONIZATION 1 control slider */ + ECM_NUM = 3 +}; + +enum cvd_type { + ECVDT_PROTANOPIA = 0, /* protanopia */ + ECVDT_DEUTERANOPIA = 1, /* deuteranopia */ + ECVDT_TRITANOPIA = 2, /* tritanopia */ + ECVDT_NUM = 3 +}; + +struct s_cvd_map { + /* input parameters */ + enum cvd_mode mode; /* Enable/disable CVD: {0,1,2}=0 */ + MATFLOAT gain[3]; /* Compensation Gain: ([0] - Protanopia, [1] - Deuteranopia, [2] - Tritanopia: [0.0,2.0]=0.0 */ + struct s_color_space color_space; /* Color Space (primary RGBW chromaticity, gamma, and Luminance min/max) */ +}; + +/* constructor and destructor */ +void cvd_ctor(struct s_cvd_map *ptr_cvd_map); +void cvd_dtor(struct s_cvd_map *ptr_cvd_map); + +void cvd_set_def(struct s_cvd_map *ptr_cvd_map); + +int cvd_rgb_to_rgb(struct s_cvd_map *ptr_cvd_map, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3]); +void cvd_model_rgb(struct s_color_space *ptr_color_space, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], + enum cvd_type type); +int cvd_rgb_to_rgb_dalton(struct s_cvd_map *ptr_cvd_map, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3]); + +#ifdef __cplusplus +} +#endif diff --git a/src/amd/gmlib/gm/gm_api_funcs.c b/src/amd/gmlib/gm/gm_api_funcs.c new file mode 100755 index 00000000000..a16e4dc45b9 --- /dev/null +++ b/src/amd/gmlib/gm/gm_api_funcs.c @@ -0,0 +1,194 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + *---------------------------------------------------------------------- + * File Name : gm_api_funcs.c + * Purpose : Gamut Mapping API functions + * Author : Vladimir Lachine (vlachine@amd.com) + * Date : November 12, 2024 + * Version : 3.1 + *---------------------------------------------------------------------- + * + */ + +#ifndef GM_SIM +#pragma code_seg("PAGED3PC") +#pragma data_seg("PAGED3PD") +#pragma const_seg("PAGED3PR") +#endif + +#include "gm_api_funcs.h" + +/* non library helper functions */ +/* + // SESSION START + struct s_gamut_map gamut_map; + gm_ctor(&gamut_map, gm_api_alloc, gm_api_free); // constructor - once per session + + struct s_gm_opts gm_opts; + gm_api_set_def(&gm_opts); // set default mapping + gm_api_gen_map(&gm_opts, &gamut_map); // generate default mapping + + gm_opts.ptr_3dlut_rgb = (unsigned short *)gamut_map.ptr_func_alloc( + 3 * sizeof(unsigned short) * gm_opts.num_pnts_3dlut * gm_opts.num_pnts_3dlut * gm_opts.num_pnts_3dlut); // allocate 3DLUT memory + + SOURCE OR TARGET GAMUT IS CHANGED EVENT + { + // ................... + // set parameters of src gamut, dst gamut and gamut mapping + // ................... + gm_opts.update_msk = GM_UPDATE_SRC; // GM_UPDATE_SRC - + update source gamut, GM_UPDATE_DST - update destination gamut or mapping parameters has been changed + // or + gm_opts.update_msk = GM_UPDATE_DST; // GM_UPDATE_SRC - u + pdate source gamut, GM_UPDATE_DST - update destination gamut or mapping parameters has been changed + + int rc = gm_api_gen_map(&gm_opts, &gamut_map); + if (rc == 0) { + rc = gm_api_gen_3dlut(&gm_opts, &gamut_map); // generate 3DLUT +// ................. +// load 3DLUT to HW registers +// ................. + } + } + + // SESSION END + gamut_map.ptr_func_free(gm_opts.ptr_3dlut_rgb); // free 3DLUT memory + gm_dtor(&gamut_map); // destructor - once per session +*/ + +int gm_api_gen_map(struct s_gm_opts *ptr_gm_opts, struct s_gamut_map *ptr_gamut_map) +{ + int rc; + + /* initialize gamut mapping staructure from api gamut options */ + if (ptr_gm_opts->update_msk & GM_UPDATE_DST) + gm_api_init(ptr_gm_opts, ptr_gamut_map); + + /* init src and dst gamuts */ + rc = gm_init_gamuts(ptr_gamut_map, &ptr_gm_opts->cs_opts_src, &ptr_gm_opts->cs_opts_dst, + ptr_gm_opts->mode, ptr_gm_opts->update_msk); + + /* generate gamut edge and other internal data */ + if (rc == 0) + gm_gen_map(ptr_gamut_map, ptr_gm_opts->update_msk); + + ptr_gm_opts->update_msk = 0; + + return rc; +} + +int gm_api_gen_3dlut(struct s_gm_opts *ptr_gm_opts, struct s_gamut_map *ptr_gamut_map) +{ + if (ptr_gm_opts->ptr_3dlut_rgb) { + gm_gen_3dlut(ptr_gamut_map, ptr_gm_opts->num_pnts_3dlut, + ptr_gm_opts->bitwidth_3dlut, ptr_gm_opts->en_merge_3dlut, ptr_gm_opts->ptr_3dlut_rgb); + return 0; + } + return -1; /* something wrong */ +} + +void gm_api_set_def(struct s_gm_opts *ptr_gm_opts) +{ + int nk; + + ptr_gm_opts->gamut_map_mode = EGMM_NONE; + ptr_gm_opts->en_tm_scale_color = 1; + ptr_gm_opts->hue_rot_mode = EHRM_NONE; + ptr_gm_opts->mode = 0; + ptr_gm_opts->step_samp = 0.0005; + ptr_gm_opts->map_type = EMT_SEG; + ptr_gm_opts->num_hue_pnts = 180; + ptr_gm_opts->num_edge_pnts = 121; + ptr_gm_opts->num_int_pnts = 33; + ptr_gm_opts->org2_perc_c = GM_ORG2_PERC; + + for (nk = 0; nk < GM_NUM_PRIM; nk++) { + ptr_gm_opts->vec_org1_factor[nk] = gm_vec_org13_factor_def[nk][0]; + ptr_gm_opts->vec_org3_factor[nk] = gm_vec_org13_factor_def[nk][1]; + } + + ptr_gm_opts->reserve = 0; + ptr_gm_opts->show_pix_mode = ESPM_NONE; + + for (nk = 0; nk < 2; nk++) + ptr_gm_opts->show_pix_hue_limits[nk] = 0.0; + + cs_set_opts_def(&ptr_gm_opts->cs_opts_src); + cs_set_opts_def(&ptr_gm_opts->cs_opts_dst); + + ptr_gm_opts->update_msk = GM_UPDATE_SRC | GM_UPDATE_DST; + + ptr_gm_opts->en_merge_3dlut = 0; + ptr_gm_opts->num_pnts_3dlut = 17; + ptr_gm_opts->bitwidth_3dlut = 12; +} + +void gm_api_init(struct s_gm_opts *ptr_gm_opts, struct s_gamut_map *ptr_gamut_map) +{ + int nk; + + gm_set_def(ptr_gamut_map); + + ptr_gamut_map->gamut_map_mode = ptr_gm_opts->gamut_map_mode; + ptr_gamut_map->en_tm_scale_color = ptr_gm_opts->en_tm_scale_color; + ptr_gamut_map->hue_rot_mode = ptr_gm_opts->hue_rot_mode; + ptr_gamut_map->mode = ptr_gm_opts->mode; + ptr_gamut_map->org2_perc_c = ptr_gm_opts->org2_perc_c; + + for (nk = 0; nk < GM_NUM_PRIM; nk++) { + /* Factor of Origin1 for M,R,Y,G,C,B = 1.3, 1.3, 1.3, 1.3, 1.2, 1.0 */ + ptr_gamut_map->vec_org1_factor[nk] = ptr_gm_opts->vec_org1_factor[nk]; + /* Factor of Origin3 for M,R,Y,G,C,B = 1.05, 1.1, 1.1, 1.05, 1.01, 1.06 */ + ptr_gamut_map->vec_org3_factor[nk] = ptr_gm_opts->vec_org3_factor[nk]; + } + + ptr_gamut_map->step_samp = ptr_gm_opts->step_samp; /* default is 0.0005 */ + ptr_gamut_map->map_type = ptr_gm_opts->map_type; /* default is EMT_SEG */ + ptr_gamut_map->num_hue_pnts = ptr_gm_opts->num_hue_pnts; /* default is 181 */ + ptr_gamut_map->num_edge_pnts = ptr_gm_opts->num_edge_pnts; /* default is 121 */ + ptr_gamut_map->num_int_pnts = ptr_gm_opts->num_int_pnts; /* default is 33 */ + + ptr_gamut_map->reserve = ptr_gm_opts->reserve; + ptr_gamut_map->show_pix_mode = ptr_gm_opts->show_pix_mode; + + for (nk = 0; nk < 2; nk++) + ptr_gamut_map->show_pix_hue_limits[nk] = ptr_gm_opts->show_pix_hue_limits[nk]; +} + +#ifndef GM_SIM +#ifndef LINUX_DM +#include "dm_services.h" +#else +/* TBD: include for LINUX_DM */ +#endif /* LINUX_DM */ +#else +#include +#endif /* GM_SIM */ + +void *gm_api_alloc(unsigned int size_bytes, void* mem_ctx) +{ +#ifndef GM_SIM +#ifndef LINUX_DM + return dm_alloc(size_bytes); +#else + /* TBD: alloc() for LINUX_DM */ +#endif /* LINUX_DM */ +#else + return malloc(size_bytes); +#endif /* GM_SIM */ +} + +void gm_api_free(void *ptr_mem, void* mem_ctx) +{ +#ifndef GM_SIM +#ifndef LINUX_DM + dm_free(ptr_mem); +#else + /* TBD: free() for LINUX_DM */ +#endif /* LINUX_DM */ +#else + free(ptr_mem); +#endif /* GM_SIM */ +} diff --git a/src/amd/gmlib/gm/gm_api_funcs.h b/src/amd/gmlib/gm/gm_api_funcs.h new file mode 100755 index 00000000000..902d4ba8382 --- /dev/null +++ b/src/amd/gmlib/gm/gm_api_funcs.h @@ -0,0 +1,79 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + *---------------------------------------------------------------------- + * File Name : gm_api_funcs.h + * Purpose : Gamut Mapping API functions + * Author : Vladimir Lachine (vlachine@amd.com) + * Date : November 12, 2024 + * Version : 3.1 + *---------------------------------------------------------------------- + * +*/ + +#pragma once + +#include "gm_funcs.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct s_gm_opts { + enum gm_gamut_map_mode gamut_map_mode; + /* Gamut Map Mode: 0 - no gamut map, 1 - Tone Map BT2390-4, 2 - TM+CHTO, 3 - TM+CHSO, 4 - TM+CHCI */ + enum gm_hue_rot_mode hue_rot_mode; + /* Hue Rotation Mode: 0 - none, 1 - hue rotation, 2 - chroma compression, 3 - hue rotation and chroma compression */ + int en_tm_scale_color; + /* Enable/Disable Color Scaling (valid for Tone Mapping mode only): {0,1} = 1 */ + unsigned int mode; + /* mode = 0 : Reserved for modifications of the Gamut Map algo */ + /* CHTO tuning parameters */ + MATFLOAT org2_perc_c; + /* Origin2 percentage gap for chroma [0.7,095] = 0.9 */ + MATFLOAT vec_org1_factor[GM_NUM_PRIM]; + /* Factor of Origin1 for M,R,Y,G,C,B [1.0,1.4] = 1.3, 1.3, 1.3, 1.3, 1.2, 1.0 */ + MATFLOAT vec_org3_factor[GM_NUM_PRIM]; + /* Factor of Origin3 for M,R,Y,G,C,B [1.01,1,2] = 1.05, 1.2, 1.05, 1.05, 1.01, 1.05 */ + MATFLOAT step_samp; + /* Sampling precision in IC space for edge search [0.00001,0.001]=0.0001 */ + enum gm_map_type map_type; + /* Map type: {0,1,2} = 0 : 0 - segments intersection SEG, 1 - radius sampling RAD, 2 hybrid - SEG+RAD */ + int num_hue_pnts; + /* Number of hue grid points: [90,360]=360 */ + int num_edge_pnts; + /* Number of edge IC grid points: [91, 181] = 181 */ + int num_int_pnts; + /* Number of intensity grid points for primary hues: [5,33] = 33 */ + /* show pixel parameters */ + int reserve; + /* Reserved for debugging purpose = 0 */ + enum gm_show_pix_mode show_pix_mode; + /* EShowPixMode: [0,8]=0 : show pixel debugging mode */ + MATFLOAT show_pix_hue_limits[2]; + /* Show Pixel mode hue ranges */ + /* color space parameters */ + struct s_cs_opts cs_opts_src; + struct s_cs_opts cs_opts_dst; + int update_msk; + /* Update mask: GM_UPDATE_SRC - update source gamut, GM_UPDATE_DST - update destination gamut */ + /* 3DLUT parameters */ + int en_merge_3dlut; + int num_pnts_3dlut; + int bitwidth_3dlut; + unsigned short *ptr_3dlut_rgb; +}; + +int gm_api_gen_map(struct s_gm_opts *ptr_gm_opts, struct s_gamut_map *ptr_gamut_map); +int gm_api_gen_3dlut(struct s_gm_opts *ptr_gm_opts, struct s_gamut_map *ptr_gamut_map); + +void gm_api_set_def(struct s_gm_opts *ptr_gm_opts); +void gm_api_init(struct s_gm_opts *ptr_gm_opts, struct s_gamut_map *ptr_gamut_map); + +void *gm_api_alloc(unsigned int size_bytes, void* mem_ctx); /* alloc array */ +void gm_api_free(void *ptr_mem, void* mem_ctx); /* free array */ + +#ifdef __cplusplus +} +#endif diff --git a/src/amd/gmlib/gm/gm_funcs.c b/src/amd/gmlib/gm/gm_funcs.c new file mode 100755 index 00000000000..ebc72f90b1f --- /dev/null +++ b/src/amd/gmlib/gm/gm_funcs.c @@ -0,0 +1,1492 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + *---------------------------------------------------------------------- + * File Name : gm_funcs.c + * Purpose : Gamut Mapping functions + * Author : Vladimir Lachine (vlachine@amd.com) + * Date : November 12, 2024 + * Version : 3.1 + *--------------------------------------------------------------------- + * + */ + +#ifndef GM_SIM +#pragma code_seg("PAGED3PC") +#pragma data_seg("PAGED3PD") +#pragma const_seg("PAGED3PR") +#endif + +#include "gm_funcs.h" + +static float gm_lin2pq[GM_PQTAB_NUMPNTS]; +static float gm_pq2lin[GM_PQTAB_NUMPNTS]; + +void gm_ctor(struct s_gamut_map *ptr_gamut_map, void*(*ptr_func_alloc)(unsigned int, void*), void(*ptr_func_free)(void *, void*), void* mem_context) +{ + ptr_gamut_map->ptr_func_alloc = ptr_func_alloc; + ptr_gamut_map->ptr_func_free = ptr_func_free; + ptr_gamut_map->memory_context = mem_context; + ptr_gamut_map->ptr_edge_ic = 0; + ptr_gamut_map->ptr_hr_src_hc = 0; + ptr_gamut_map->ptr_hr_dst_hc = 0; + ptr_gamut_map->ptr_org2_ic = 0; + ptr_gamut_map->ptr_org3_ic = 0; + ptr_gamut_map->ptr_cusp_src_ic = 0; + ptr_gamut_map->ptr_cusp_dst_ic = 0; + + gm_gen_pq_lut(gm_lin2pq, GM_PQTAB_NUMPNTS, EGD_LIN_2_NONLIN); + gm_gen_pq_lut(gm_pq2lin, GM_PQTAB_NUMPNTS, EGD_NONLIN_2_LIN); + gm_set_def(ptr_gamut_map); +} + +void gm_dtor(struct s_gamut_map *ptr_gamut_map) +{ + gm_free_mem(ptr_gamut_map); + + ptr_gamut_map->ptr_func_alloc = 0; + ptr_gamut_map->ptr_func_free = 0; +} + +void gm_alloc_mem(struct s_gamut_map *ptr_gamut_map) +{ + if (ptr_gamut_map->gamut_map_mode > EGMM_TM) { + if (ptr_gamut_map->map_type != EMT_RAD) + if (ptr_gamut_map->ptr_edge_ic == 0) + ptr_gamut_map->ptr_edge_ic = (MATFLOAT *)ptr_gamut_map->ptr_func_alloc( + ptr_gamut_map->num_hue_pnts * ptr_gamut_map->num_edge_pnts * 2 * sizeof(MATFLOAT), + ptr_gamut_map->memory_context); + + if (ptr_gamut_map->ptr_org2_ic == 0) + ptr_gamut_map->ptr_org2_ic = (MATFLOAT *)ptr_gamut_map->ptr_func_alloc( + ptr_gamut_map->num_hue_pnts * 2 * sizeof(MATFLOAT), + ptr_gamut_map->memory_context); + + if (ptr_gamut_map->ptr_org3_ic == 0) + ptr_gamut_map->ptr_org3_ic = (MATFLOAT *)ptr_gamut_map->ptr_func_alloc( + ptr_gamut_map->num_hue_pnts * 2 * sizeof(MATFLOAT), + ptr_gamut_map->memory_context); + } + + if (ptr_gamut_map->hue_rot_mode != EHRM_NONE) { + if (ptr_gamut_map->ptr_hr_src_hc == 0) + ptr_gamut_map->ptr_hr_src_hc = (MATFLOAT *)ptr_gamut_map->ptr_func_alloc( + GM_NUM_PRIM * ptr_gamut_map->num_int_pnts * 2 * sizeof(MATFLOAT), + ptr_gamut_map->memory_context); + + if (ptr_gamut_map->ptr_hr_dst_hc == 0) + ptr_gamut_map->ptr_hr_dst_hc = (MATFLOAT *)ptr_gamut_map->ptr_func_alloc( + GM_NUM_PRIM * ptr_gamut_map->num_int_pnts * 2 * sizeof(MATFLOAT), + ptr_gamut_map->memory_context); + + } + + if (ptr_gamut_map->ptr_cusp_src_ic == 0) + ptr_gamut_map->ptr_cusp_src_ic = (MATFLOAT *)ptr_gamut_map->ptr_func_alloc( + ptr_gamut_map->num_hue_pnts * 2 * sizeof(MATFLOAT), + ptr_gamut_map->memory_context); + + if (ptr_gamut_map->ptr_cusp_dst_ic == 0) + ptr_gamut_map->ptr_cusp_dst_ic = (MATFLOAT *)ptr_gamut_map->ptr_func_alloc( + ptr_gamut_map->num_hue_pnts * 2 * sizeof(MATFLOAT), + ptr_gamut_map->memory_context); +} + +void gm_free_mem(struct s_gamut_map *ptr_gamut_map) +{ + if (ptr_gamut_map->ptr_edge_ic) { + ptr_gamut_map->ptr_func_free(ptr_gamut_map->ptr_edge_ic, ptr_gamut_map->memory_context); + ptr_gamut_map->ptr_edge_ic = 0; + } + + if (ptr_gamut_map->ptr_hr_src_hc) { + ptr_gamut_map->ptr_func_free(ptr_gamut_map->ptr_hr_src_hc, ptr_gamut_map->memory_context); + ptr_gamut_map->ptr_hr_src_hc = 0; + } + + if (ptr_gamut_map->ptr_hr_dst_hc) { + ptr_gamut_map->ptr_func_free(ptr_gamut_map->ptr_hr_dst_hc, ptr_gamut_map->memory_context); + ptr_gamut_map->ptr_hr_dst_hc = 0; + } + + if (ptr_gamut_map->ptr_org2_ic) { + ptr_gamut_map->ptr_func_free(ptr_gamut_map->ptr_org2_ic, ptr_gamut_map->memory_context); + ptr_gamut_map->ptr_org2_ic = 0; + } + + if (ptr_gamut_map->ptr_org3_ic) { + ptr_gamut_map->ptr_func_free(ptr_gamut_map->ptr_org3_ic, ptr_gamut_map->memory_context); + ptr_gamut_map->ptr_org3_ic = 0; + } + + if (ptr_gamut_map->ptr_cusp_src_ic) { + ptr_gamut_map->ptr_func_free(ptr_gamut_map->ptr_cusp_src_ic, ptr_gamut_map->memory_context); + ptr_gamut_map->ptr_cusp_src_ic = 0; + } + + if (ptr_gamut_map->ptr_cusp_dst_ic) { + ptr_gamut_map->ptr_func_free(ptr_gamut_map->ptr_cusp_dst_ic, ptr_gamut_map->memory_context); + ptr_gamut_map->ptr_cusp_dst_ic = 0; + } +} + +void gm_set_def(struct s_gamut_map *ptr_gamut_map) +{ + int nk; + + ptr_gamut_map->gamut_map_mode = EGMM_NONE; + ptr_gamut_map->en_tm_scale_color = 1; + ptr_gamut_map->hue_rot_mode = EHRM_NONE; + ptr_gamut_map->mode = 0; + ptr_gamut_map->num_hue_pnts = GM_NUM_HUE; + ptr_gamut_map->num_edge_pnts = GM_NUM_EDGE; + ptr_gamut_map->num_int_pnts = GM_NUM_INT; + ptr_gamut_map->step_samp = GM_STEP_SAMP; + ptr_gamut_map->edge_type = EET_RAD; + ptr_gamut_map->map_type = EMT_SEG; + ptr_gamut_map->org2_perc_c = GM_ORG2_PERC; + for (nk = 0; nk < GM_NUM_PRIM; nk++) { + ptr_gamut_map->vec_org1_factor[nk] = gm_vec_org13_factor_def[nk][0]; + ptr_gamut_map->vec_org3_factor[nk] = gm_vec_org13_factor_def[nk][1]; + } + ptr_gamut_map->reserve = 0; + ptr_gamut_map->show_pix_mode = ESPM_NONE; + for (nk = 0; nk < 2; nk++) + ptr_gamut_map->show_pix_hue_limits[nk] = 0.0; +} + +int gm_init_gamuts(struct s_gamut_map *ptr_gamut_map, struct s_cs_opts *ptr_cs_opts_src, + struct s_cs_opts *ptr_cs_opts_dst, unsigned int gm_mode, int update_msk) +{ + if (update_msk & GM_UPDATE_SRC) { /* init and generate prim and cusp points for source gamut */ + cs_init(ptr_cs_opts_src, &ptr_gamut_map->color_space_src); + cs_genprim_itp(&ptr_gamut_map->color_space_src, GM_NUM_PRIM, (MATFLOAT *)gm_vec_cusp_rgb, + ptr_gamut_map->vec_prim_src_ich); + } + + if (update_msk & GM_UPDATE_DST) { /* init and generate prim and cusp points for target gamut */ + cs_init(ptr_cs_opts_dst, &ptr_gamut_map->color_space_dst); + cs_genprim_itp(&ptr_gamut_map->color_space_dst, GM_NUM_PRIM, (MATFLOAT *)gm_vec_cusp_rgb, + ptr_gamut_map->vec_prim_dst_ich); + } + + /* calculate Luma Min/Max for Tone Mapping */ + if ((update_msk & GM_UPDATE_SRC) || (update_msk & GM_UPDATE_DST)) { + MATFLOAT luma_rng_src = ptr_gamut_map->color_space_src.luma_limits[1] - + ptr_gamut_map->color_space_src.luma_limits[0]; + ptr_gamut_map->lum_min = (ptr_gamut_map->color_space_dst.luma_limits[0] - + ptr_gamut_map->color_space_src.luma_limits[0]) / luma_rng_src; + ptr_gamut_map->lum_max = (ptr_gamut_map->color_space_dst.luma_limits[1] - + ptr_gamut_map->color_space_src.luma_limits[0]) / luma_rng_src; + } + + if (update_msk & GM_UPDATE_DST) { + gm_free_mem(ptr_gamut_map); + gm_alloc_mem(ptr_gamut_map); + } + + if (ptr_gamut_map->hue_rot_mode != EHRM_NONE) { /* generate prim for intensity points */ + /* memory for src cusp points is reallocated if GM_UPDATE_DST */ + if ((update_msk & GM_UPDATE_SRC) || (update_msk & GM_UPDATE_DST)) + gm_genprim_hc(&ptr_gamut_map->color_space_src, ptr_gamut_map->ptr_hr_src_hc, + ptr_gamut_map->num_int_pnts, ptr_gamut_map->color_space_dst.luma_limits, + ptr_gamut_map->lum_min, ptr_gamut_map->lum_max); + if (update_msk & GM_UPDATE_DST) + gm_genprim_hc(&ptr_gamut_map->color_space_dst, ptr_gamut_map->ptr_hr_dst_hc, + ptr_gamut_map->num_int_pnts, ptr_gamut_map->color_space_dst.luma_limits, 0.0, 1.0); /* no TM */ + } + + /* memory for src cusp points is reallocated if GM_UPDATE_DST */ + if ((update_msk & GM_UPDATE_SRC) || (update_msk & GM_UPDATE_DST)) + gm_gencusp_ic(ptr_gamut_map, 0); /* generate cusp points for source gamut */ + + if (update_msk & GM_UPDATE_DST) + gm_gencusp_ic(ptr_gamut_map, 1); /* generate cusp points for target gamut */ + + ptr_gamut_map->mode = gm_mode; + ptr_gamut_map->hue_max = 2.0 * mat_get_pi() * (1.0 - 1.0 / (MATFLOAT)ptr_gamut_map->num_hue_pnts); + ptr_gamut_map->org1 = mat_denorm(GM_ORG1_FACTOR, ptr_gamut_map->color_space_dst.luma_limits[0], + ptr_gamut_map->color_space_dst.luma_limits[2]); + ptr_gamut_map->org3 = mat_denorm(GM_ORG3_FACTOR, ptr_gamut_map->color_space_dst.luma_limits[0], + ptr_gamut_map->color_space_dst.luma_limits[2]); + + return 0; +} + +int gm_check_gamut(struct s_gamut_map *ptr_gamut_map) +{ + struct s_color_space* ptr_cs_src = &ptr_gamut_map->color_space_src; + struct s_color_space* ptr_cs_dst = &ptr_gamut_map->color_space_dst; + + if (ptr_gamut_map->gamut_map_mode != EGMM_NONE) + if ((ptr_cs_src->luminance_limits[0] > ptr_cs_dst->luminance_limits[0]) || + (ptr_cs_src->luminance_limits[1] < ptr_cs_dst->luminance_limits[1])) { + ptr_gamut_map->gamut_map_mode = EGMM_NONE; + ptr_gamut_map->hue_rot_mode = EHRM_NONE; + return -1; /* non valid luminance limits */ + } + + return 0; /* valid parameters */ +} + +void gm_gencusp_ic(struct s_gamut_map *ptr_gamut_map, int color_space) +{ + struct s_color_space *ptr_color_space = color_space ? &ptr_gamut_map->color_space_dst : &ptr_gamut_map->color_space_src; + MATFLOAT *ptr_cusp_ic = color_space ? ptr_gamut_map->ptr_cusp_dst_ic : ptr_gamut_map->ptr_cusp_src_ic; + int num_phases = ptr_gamut_map->num_hue_pnts / GM_NUM_PRIM; + int index = 0; + MATFLOAT *ptr_hue = (MATFLOAT *)ptr_gamut_map->ptr_func_alloc(ptr_gamut_map->num_hue_pnts * sizeof(MATFLOAT), + ptr_gamut_map->memory_context); + MATFLOAT *ptr_ic = (MATFLOAT *)ptr_gamut_map->ptr_func_alloc(ptr_gamut_map->num_hue_pnts * 2 * sizeof(MATFLOAT), + ptr_gamut_map->memory_context); + MATFLOAT rgb[3], itp[3]; + int np, ni, nc; + + for (np = 0; np < GM_NUM_PRIM; np++) { + for (ni = 0; ni < num_phases; ni++) { + MATFLOAT phase = (MATFLOAT)ni / (MATFLOAT)num_phases; + + int ind0 = np; + int ind1 = (ind0 + 1) % GM_NUM_PRIM; + for (nc = 0; nc < 3; nc++) { + MATFLOAT val0 = gm_vec_cusp_rgb[ind0][nc]; + MATFLOAT val1 = gm_vec_cusp_rgb[ind1][nc]; + + rgb[nc] = val0 + (val1 - val0) * phase; + } + cs_gamma_rgb(rgb, rgb, ptr_color_space->gamma_parm, EGD_NONLIN_2_LIN); /* TBD */ + cs_denorm_rgb(rgb, ptr_color_space->luminance_limits[0], ptr_color_space->luminance_limits[2]); + cs_clamp_rgb(rgb, ptr_color_space->luminance_limits[0], ptr_color_space->luminance_limits[1]); + cs_rgb_to_itp(ptr_color_space, rgb, itp); + + if (color_space == 0) { /* tm and hr for source gamut */ + if (ptr_gamut_map->gamut_map_mode != EGMM_NONE) { + if ((ptr_gamut_map->lum_min > 0.0) || (ptr_gamut_map->lum_max < 1.0)) + itp[0] = gm_tm_luma(itp[0], ptr_gamut_map->color_space_src.luma_limits, + ptr_gamut_map->lum_min, ptr_gamut_map->lum_max); + if (ptr_gamut_map->hue_rot_mode != EHRM_NONE) + gm_hr_itp(ptr_gamut_map, itp, itp, 0); + } + } + + ptr_ic[2 * index + 0] = itp[0]; + ptr_ic[2 * index + 1] = mat_radius(itp[2], itp[1]); + ptr_hue[index] = mat_angle(itp[2], itp[1]); + index++; + } + } + + gm_resample_hue_ic(ptr_hue, ptr_ic, ptr_cusp_ic, ptr_gamut_map->num_hue_pnts, ptr_gamut_map->num_hue_pnts); + + ptr_gamut_map->ptr_func_free(ptr_ic, ptr_gamut_map->memory_context); + ptr_gamut_map->ptr_func_free(ptr_hue, ptr_gamut_map->memory_context); +} + +void gm_gen_edge_hue(struct s_gamut_map *ptr_gamut_map, int hue_ind) +{ + MATFLOAT fHue = mat_index_to_flt(hue_ind, ptr_gamut_map->hue_max, ptr_gamut_map->num_hue_pnts); + + gm_genedge(&ptr_gamut_map->color_space_dst, ptr_gamut_map->color_space_dst.luma_limits, + ptr_gamut_map->num_edge_pnts, ptr_gamut_map->edge_type, ptr_gamut_map->step_samp, fHue, + &ptr_gamut_map->ptr_edge_ic[hue_ind * ptr_gamut_map->num_edge_pnts * 2], + ptr_gamut_map->mode & GM_PQTAB_GBD); + + /* correct edge for target cusp point - optional */ + if (ptr_gamut_map->mode & GM_CUSP_ADJUST) + gm_edgecusp_adjust(&ptr_gamut_map->ptr_edge_ic[hue_ind * ptr_gamut_map->num_edge_pnts * 2], + ptr_gamut_map->num_edge_pnts, &ptr_gamut_map->ptr_cusp_dst_ic[hue_ind * 2]); +} + +/* resample to uniform hue */ +void gm_resample_hue_ic(MATFLOAT *ptr_hue, MATFLOAT *ptr_ic_inp, MATFLOAT *ptr_ic_out, int num_hue_pnts_inp, int num_hue_pnts_out) +{ + const MATFLOAT gm_2pi = 2.0 * mat_get_pi(); + int index_2pi = mat_get_hue_index_2pi(ptr_hue, num_hue_pnts_inp); + int ind1 = index_2pi; + int ind0 = (ind1 > 0) ? ind1 - 1 : num_hue_pnts_inp - 1; + MATFLOAT tar_inc_out = gm_2pi / (MATFLOAT)num_hue_pnts_out; + MATFLOAT tar_acc_out = 0.0; + MATFLOAT tar_inc_inp = ptr_hue[ind1] - ptr_hue[ind0]; + int ni; + + if (tar_inc_inp < 0.0) + tar_inc_inp += gm_2pi; + + for (ni = 0; ni < num_hue_pnts_out; ni++) { + MATFLOAT hue = ptr_hue[ind1]; + MATFLOAT delta_src, phs_src; + + if ((ind1 == index_2pi) && (ni > num_hue_pnts_out / 2)) + hue += gm_2pi; + + while (tar_acc_out >= hue) { + ind0 = (ind0 + 1) % num_hue_pnts_inp; + ind1 = (ind1 + 1) % num_hue_pnts_inp; + hue = ptr_hue[ind1]; + if ((ind1 == index_2pi) && (ni > num_hue_pnts_out / 2)) { + hue += gm_2pi; + } + tar_inc_inp = ptr_hue[ind1] - ptr_hue[ind0]; + + if (tar_inc_inp < 0.0) + tar_inc_inp += gm_2pi; + } + delta_src = tar_acc_out - ptr_hue[ind0]; + if (delta_src < 0.0) + delta_src += gm_2pi; + phs_src = delta_src / tar_inc_inp; + + ptr_ic_out[2 * ni + 0] = ptr_ic_inp[2 * ind0 + 0] + (ptr_ic_inp[2 * ind1 + 0] - ptr_ic_inp[2 * ind0 + 0]) * phs_src; + ptr_ic_out[2 * ni + 1] = ptr_ic_inp[2 * ind0 + 1] + (ptr_ic_inp[2 * ind1 + 1] - ptr_ic_inp[2 * ind0 + 1]) * phs_src; + + tar_acc_out += tar_inc_out; + } +} + +/* calculate hue for primary colors for normilized uniform intensity */ +void gm_genprim_hc(struct s_color_space *ptr_color_space, MATFLOAT *ptr_hr_hc, int num_int_pnts, + MATFLOAT luma_limits[3], MATFLOAT lum_min, MATFLOAT lum_max) +{ + MATFLOAT step = 1.0 / (MATFLOAT)(num_int_pnts - 1); + MATFLOAT vec_prim_ich[GM_NUM_INT][3]; + MATFLOAT prim_rgb[3], rgb[3], itp_src[3]; + int nk, ni, nc; + + for (nk = 0; nk < GM_NUM_PRIM; nk++) { + mat_copy((MATFLOAT *)gm_vec_cusp_rgb[nk], prim_rgb, 3); + for (ni = 0; ni < num_int_pnts; ni++) { + for (nc = 0; nc < 3; nc++) + rgb[nc] = prim_rgb[nc] * (MATFLOAT)ni * step; + /* generate gamut prim points */ + cs_gamma_rgb(rgb, rgb, ptr_color_space->gamma_parm, EGD_NONLIN_2_LIN); + cs_denorm_rgb(rgb, ptr_color_space->luminance_limits[0], ptr_color_space->luminance_limits[2]); + cs_clamp_rgb(rgb, ptr_color_space->luminance_limits[0], ptr_color_space->luminance_limits[1]); + cs_rgb_to_itp(ptr_color_space, rgb, itp_src); + if ((lum_min > 0.0) || (lum_max < 1.0)) + itp_src[0] = gm_tm_luma(itp_src[0], ptr_color_space->luma_limits, lum_min, lum_max); + cs_itp_to_ich(itp_src, vec_prim_ich[ni]); + vec_prim_ich[ni][0] = mat_norm(vec_prim_ich[ni][0], luma_limits[0], luma_limits[2]); + /* normilize to [0.0,1.0] from target luma limits */ + vec_prim_ich[ni][0] = MAT_CLAMP(vec_prim_ich[ni][0], 0.0, 1.0); + } + /* update Intensity=0.0 point */ + vec_prim_ich[0][0] = 0.0; + vec_prim_ich[0][1] = 0.0; + vec_prim_ich[0][2] = vec_prim_ich[1][2]; + /* update Intensity=1.0 point */ + vec_prim_ich[num_int_pnts - 1][0] = 1.0; + vec_prim_ich[num_int_pnts - 1][1] = 0.0; + vec_prim_ich[num_int_pnts - 1][2] = vec_prim_ich[num_int_pnts - 2][2]; + /* resample to uniform intensity */ + gm_resample_hc(vec_prim_ich, &ptr_hr_hc[nk * num_int_pnts * 2], num_int_pnts, num_int_pnts); + } +} + +/* calculate origin1 and origin1 factor */ +void gm_genorg13_factor(struct s_gamut_map* ptr_gamut_map, MATFLOAT* ptr_org13_factor) +{ + MATFLOAT vec_org13_factor_prim[GM_NUM_PRIM * 2]; + int ni; + + for (ni = 0; ni < GM_NUM_PRIM; ni++) { + vec_org13_factor_prim[2 * ni + 0] = ptr_gamut_map->vec_org1_factor[ni]; + vec_org13_factor_prim[2 * ni + 1] = ptr_gamut_map->vec_org3_factor[ni]; + } + gm_resample_hue_ic(&ptr_gamut_map->vec_prim_dst_ich[2 * GM_NUM_PRIM], vec_org13_factor_prim, + ptr_org13_factor, GM_NUM_PRIM, ptr_gamut_map->num_hue_pnts); +} + +void gm_genorigin23_hue(struct s_gamut_map* ptr_gamut_map, MATFLOAT* ptr_org13_factor, int hue_ind) +{ + MATFLOAT hue = mat_index_to_flt(hue_ind, ptr_gamut_map->hue_max, ptr_gamut_map->num_hue_pnts); + MATFLOAT cusp_ich_src[3], cusp_ich_dst[3]; + MATFLOAT org_13[2]; + + cusp_ich_src[0] = ptr_gamut_map->ptr_cusp_src_ic[2 * hue_ind + 0]; + cusp_ich_src[1] = ptr_gamut_map->ptr_cusp_src_ic[2 * hue_ind + 1]; + cusp_ich_src[2] = hue; + + cusp_ich_dst[0] = ptr_gamut_map->ptr_cusp_dst_ic[2 * hue_ind + 0]; + cusp_ich_dst[1] = ptr_gamut_map->ptr_cusp_dst_ic[2 * hue_ind + 1]; + cusp_ich_dst[2] = hue; + + /* get Org1 */ + org_13[0] = (ptr_org13_factor[2 * hue_ind + 0] >= 1.0) ? + ptr_gamut_map->org1 * ptr_org13_factor[2 * hue_ind + 0] : + ptr_gamut_map->org1 + (cusp_ich_dst[0] - ptr_gamut_map->org1) * ptr_org13_factor[2 * hue_ind + 0]; + org_13[0] = MAT_CLAMP(org_13[0], ptr_gamut_map->org1, cusp_ich_dst[0]); + /* get Org3 */ + org_13[1] = ptr_gamut_map->org3 * ptr_org13_factor[2 * hue_ind + 1]; + /* calculate Origin2 and Origin3 */ + gm_getorigin23(&ptr_gamut_map->color_space_src, &ptr_gamut_map->color_space_dst, hue, org_13, ptr_gamut_map->org2_perc_c, + cusp_ich_src, cusp_ich_dst, &ptr_gamut_map->ptr_org2_ic[2 * hue_ind], &ptr_gamut_map->ptr_org3_ic[2 * hue_ind], + ptr_gamut_map->mode & GM_PQTAB_GBD); +} + +void gm_getorigin23(struct s_color_space *ptr_color_space_src, struct s_color_space *ptr_color_space_dst, + MATFLOAT hue, MATFLOAT org_13_factor[2], MATFLOAT org2_perc_c, + MATFLOAT cusp_ic_src[2], MATFLOAT cusp_ic_dst[2], + MATFLOAT origin2_ic[2], MATFLOAT origin3_ic[2], int en_pq_lut) +{ + + if ((cusp_ic_src[0] <= cusp_ic_dst[0]) || (cusp_ic_src[1] <= cusp_ic_dst[1])) { + origin2_ic[0] = org_13_factor[0]; + origin2_ic[1] = 0.0; + origin3_ic[0] = org_13_factor[1]; + origin3_ic[1] = (origin3_ic[0] - origin2_ic[0]) * cusp_ic_dst[1] / (cusp_ic_dst[0] - origin2_ic[0]); + return; + } + + MATFLOAT slope = (cusp_ic_src[0] - cusp_ic_dst[0]) / (cusp_ic_src[1] - cusp_ic_dst[1]); + MATFLOAT offset = cusp_ic_dst[0] - slope * cusp_ic_dst[1]; + + /* get Origin2 point */ + origin2_ic[0] = org_13_factor[0]; + origin2_ic[1] = (origin2_ic[0] - offset) / slope; + if (origin2_ic[1] < 0.0) { + origin2_ic[0] = origin2_ic[0] - origin2_ic[1] * slope; + origin2_ic[1] = 0.0; + } else { + MATFLOAT ic_tmp[2]; + MATFLOAT ic_dst[2] = { origin2_ic[0], origin2_ic[1] }; + MATFLOAT ic_src[2] = { origin2_ic[0], origin2_ic[1] }; + MATFLOAT inc_ic[2] = { 0.0, GM_STEP_SAMP * 10.0 }; + MATFLOAT hue_sin_cos[2] = { mat_sin(hue), mat_cos(hue) }; + + gm_sample_edge_ic(ptr_color_space_dst, hue_sin_cos, inc_ic, ic_dst, en_pq_lut); + gm_sample_edge_ic(ptr_color_space_src, hue_sin_cos, inc_ic, ic_src, en_pq_lut); + if (ic_src[1] < ic_dst[1]) { + ic_tmp[0] = ic_src[0]; + ic_tmp[1] = ic_src[1]; + } else { + ic_tmp[0] = ic_dst[0]; + ic_tmp[1] = ic_dst[1]; + } + if (origin2_ic[1] > org2_perc_c * ic_tmp[1]) { + origin2_ic[1] = org2_perc_c * ic_tmp[1]; + slope = (cusp_ic_src[0] - origin2_ic[0]) / (cusp_ic_src[1] - origin2_ic[1]); + offset = origin2_ic[0] - slope * origin2_ic[1]; + } + } + /* get Origin3 point */ + origin3_ic[0] = org_13_factor[1]; + origin3_ic[1] = (origin3_ic[0] - offset) / slope; +} + +/* resmapling for uniform normilized Intensity in a range [0.0,1.0] */ +void gm_resample_hc(MATFLOAT vec_ich_inp[][3], MATFLOAT *ptr_hc_out, int num_int_pnts_inp, int num_int_pnts_out) +{ + MATFLOAT tar_inc_out = 1.0 / (MATFLOAT)(num_int_pnts_out - 1); + MATFLOAT tar_inc_inp = vec_ich_inp[1][0] - vec_ich_inp[0][0]; + MATFLOAT tar_acc_out = 0.0; + MATFLOAT phs_inp; + int ind0 = 0; + int ind1 = 1; + int ni; + + for (ni = 0; ni < num_int_pnts_out; ni++) { + while ((tar_acc_out >= vec_ich_inp[ind1][0]) && (ind1 > ind0)) { + ind0 = MAT_MIN(ind0 + 1, num_int_pnts_inp - 1); + ind1 = MAT_MIN(ind1 + 1, num_int_pnts_inp - 1); + tar_inc_inp = vec_ich_inp[ind1][0] - vec_ich_inp[ind0][0]; + } + phs_inp = (tar_inc_inp == 0.0) ? 0.0 : (tar_acc_out - vec_ich_inp[ind0][0]) / tar_inc_inp; + ptr_hc_out[ni * 2 + 0] = vec_ich_inp[ind0][2] + (vec_ich_inp[ind1][2] - vec_ich_inp[ind0][2]) * phs_inp; + ptr_hc_out[ni * 2 + 1] = vec_ich_inp[ind0][1] + (vec_ich_inp[ind1][1] - vec_ich_inp[ind0][1]) * phs_inp; + tar_acc_out += tar_inc_out; + } +} + +int gm_rgb_to_rgb(struct s_gamut_map* ptr_gamut_map, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3]) +{ /* rgb_inp - linear space, linear space */ + MATFLOAT itp_inp[3], itp_out[3]; + int zone = 0; + + if (ptr_gamut_map->gamut_map_mode != EGMM_NONE) { + gm_rgb_to_itp(&ptr_gamut_map->color_space_src, rgb_inp, itp_inp, ptr_gamut_map->mode & GM_PQTAB_3DLUT); + zone = gm_map_itp(ptr_gamut_map, itp_inp, itp_out); + gm_itp_to_rgb(&ptr_gamut_map->color_space_dst, itp_out, rgb_out, ptr_gamut_map->mode & GM_PQTAB_3DLUT); + } + else + mat_copy(rgb_inp, rgb_out, 3); + + return zone; +} + +/* input and output lumas are in a range [luma_limits[0], luma_limits[1]] */ +MATFLOAT gm_tm_itp(MATFLOAT itp_inp[3], MATFLOAT itp_out[3], MATFLOAT luma_limits[3], + MATFLOAT lum_min, MATFLOAT lum_max, int en_tm_scale_color, int en_tm_scale_luma) +{ + MATFLOAT color_scale = 1.0; + MATFLOAT luma_inp = itp_inp[0]; + + if (en_tm_scale_luma) /* LUMA scaling */ + itp_out[0] = gm_scale_luma(luma_inp, luma_limits, lum_min, lum_max); + else /* LUMA correction as in BT.2390 */ + itp_out[0] = gm_tm_luma(luma_inp, luma_limits, lum_min, lum_max); + + /* CHROMA correction as in BT.2390 */ + if (en_tm_scale_color && (itp_out[0] != luma_inp)) { + color_scale = (itp_out[0] < luma_inp) ? itp_out[0] / luma_inp : luma_inp / itp_out[0]; + itp_out[1] = itp_inp[1] * color_scale; + itp_out[2] = itp_inp[2] * color_scale; + } + else { + itp_out[1] = itp_inp[1]; + itp_out[2] = itp_inp[2]; + } + + return color_scale; +} + +/* input and output lumas are in a range [luma_limits[0], luma_limits[1]] */ +MATFLOAT gm_tm_luma(MATFLOAT luma, MATFLOAT luma_limits[3], MATFLOAT lum_min, MATFLOAT lum_max) +{ + const MATFLOAT cfEpsilon = 0.000001; + MATFLOAT ks = (1.5 * lum_max) - 0.5; + MATFLOAT b = lum_min; + MATFLOAT e0, e1, e2, e3, e4; + + /* Input luma must be normilized to [0.0,1.0] */ + e0 = luma; + e1 = mat_norm(e0, luma_limits[0], luma_limits[2]); + e1 = mat_clamp(e1, 0.0, 1.0); + + if (e1 < ks) { + e2 = e1; + } else { + MATFLOAT t = ((1.0 - ks) <= cfEpsilon) ? (e1 - ks) : ((e1 - ks) / (1.0 - ks)); + MATFLOAT t2 = t * t; + MATFLOAT t3 = t2 * t; + + e2 = (((2.0 * t3) - (3.0 * t2) + 1.0) * ks) + ((t3 - (2.0 * t2) + t) * (1.0 - ks)) + (((-2.0 * t3) + (3.0 * t2)) * lum_max); + } + e3 = e2 + b * mat_pow((1.0 - e2), 4.0); + + /* Output luma must be denormilized back to [i_afLumaLim[0], i_afLumaLim[1]] */ + e4 = mat_denorm(e3, luma_limits[0], luma_limits[2]); + e4 = mat_clamp(e4, luma_limits[0], luma_limits[1]); + + return e4; +} + +/* input and output lumas are in a range [luma_limits[0], luma_limits[1]] */ +MATFLOAT gm_scale_luma(MATFLOAT luma, MATFLOAT luma_limits[3], MATFLOAT lum_min, MATFLOAT lum_max) +{ + MATFLOAT e0, e1, e2, e3, e4; + + /* Input luma must be normilized to [0.0,1.0] */ + e0 = luma; + e1 = mat_norm(e0, luma_limits[0], luma_limits[2]); + e1 = mat_clamp(e1, 0.0, 1.0); + + e2 = (e1 - lum_min) * (lum_max - lum_min); + e3 = e2 + lum_min; + + /* Output luma must be denormilized back to [i_afLumaLim[0], i_afLumaLim[1]] */ + e4 = mat_denorm(e3, luma_limits[0], luma_limits[2]); + e4 = mat_clamp(e4, luma_limits[0], luma_limits[1]); + + return e4; +} + +int gm_map_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3]) +{ + int zone = 0; + MATFLOAT itp_tm[3], itp_hr[3]; + + /* tone map */ + if ((ptr_gamut_map->lum_min > 0.0) || (ptr_gamut_map->lum_max < 1.0)) + gm_tm_itp(itp_inp, itp_tm, ptr_gamut_map->color_space_src.luma_limits, + ptr_gamut_map->lum_min, ptr_gamut_map->lum_max, + (ptr_gamut_map->gamut_map_mode == EGMM_TM) ? ptr_gamut_map->en_tm_scale_color : 0, + (ptr_gamut_map->mode & GM_SCALE_LUMA) ? 1 : 0); + else + mat_copy(itp_inp, itp_tm, 3); + + /* hue rotation */ + if (ptr_gamut_map->hue_rot_mode != EHRM_NONE) + gm_hr_itp(ptr_gamut_map, itp_tm, itp_hr, 0); + else + mat_copy(itp_tm, itp_hr, 3); + + /* color map */ + switch (ptr_gamut_map->gamut_map_mode) { + case EGMM_TM_CHCI: + zone = gm_map_chci_itp(ptr_gamut_map, itp_hr, itp_out); + break; + case EGMM_TM_CHSO: + zone = gm_map_chso_itp(ptr_gamut_map, itp_hr, itp_out); + break; + case EGMM_TM_CHTO: + zone = gm_map_chto_itp(ptr_gamut_map, itp_hr, itp_out); + break; + case EGMM_TM: + default: + mat_copy(itp_hr, itp_out, 3); + break; + } + + return zone; +} + +int gm_map_chto_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3]) +{ + const MATFLOAT gm_2pi = 2.0 * mat_get_pi(); + int zone; + int pnt_map = -1; + int vec_hue_ind[2]; + MATFLOAT hue, hue_phs; + MATFLOAT origin2_ic[2], origin3_ic[2]; + + if (gm_is_valid_itp(&ptr_gamut_map->color_space_dst, itp_inp, ptr_gamut_map->mode & GM_PQTAB_3DLUT)) { + mat_copy(itp_inp, itp_out, 3); + return 0; + } + + hue = mat_angle(itp_inp[2], itp_inp[1]); + hue_phs = gm_hue_to_index_phase(hue, gm_2pi, ptr_gamut_map->num_hue_pnts, vec_hue_ind); + gm_interp_ic(vec_hue_ind, hue_phs, ptr_gamut_map->ptr_org2_ic, origin2_ic); + gm_interp_ic(vec_hue_ind, hue_phs, ptr_gamut_map->ptr_org3_ic, origin3_ic); + + zone = gm_get_zone(itp_inp, origin2_ic, origin3_ic, ptr_gamut_map->color_space_dst.luma_limits); + if ((ptr_gamut_map->mode & GM_ZONE1_FLEX) && (zone == 1)) { + /* correct origin2 for zone 1 to prevent noise bursting for dim content */ + MATFLOAT int0 = ptr_gamut_map->color_space_dst.luma_limits[0]; + MATFLOAT int1 = origin2_ic[0]; + MATFLOAT range_int = int1 - int0; + MATFLOAT thresh_int = (int1 + int0) / 2.0; + MATFLOAT phase; + + if (itp_inp[0] < thresh_int) { + phase = (itp_inp[0] - int0) / range_int; + origin2_ic[0] = itp_inp[0] + (int1 - itp_inp[0]) * phase; + } else { + phase = (int1 - itp_inp[0]) / range_int; + origin2_ic[0] = int1 + (itp_inp[0] - int1) * phase; + } + } + + switch (ptr_gamut_map->map_type) { + case EMT_SEG: + pnt_map = gm_map_seg_itp(ptr_gamut_map, itp_inp, itp_out, zone, origin2_ic, origin3_ic, vec_hue_ind, hue_phs); + break; + case EMT_RAD: + pnt_map = gm_map_rad_itp(ptr_gamut_map, itp_inp, itp_out, zone, origin2_ic, origin3_ic, hue); + break; + case EMT_SEGRAD: + pnt_map = gm_map_segrad_itp(ptr_gamut_map, itp_inp, itp_out, zone, origin2_ic, origin3_ic, hue, vec_hue_ind, hue_phs); + break; + default: + mat_copy(itp_inp, itp_out, 3); + break; + } + + return zone; +} + +int gm_map_chso_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3]) +{ + const MATFLOAT gm_2pi = 2.0 * mat_get_pi(); + int zone = 1; + int pnt_map = -1; + int vec_hue_ind[2]; + MATFLOAT hue, hue_phs; + MATFLOAT origin2_ic[2], origin3_ic[2]; + + if (gm_is_valid_itp(&ptr_gamut_map->color_space_dst, itp_inp, ptr_gamut_map->mode & GM_PQTAB_3DLUT)) { + mat_copy(itp_inp, itp_out, 3); + return 0; + } + + hue = mat_angle(itp_inp[2], itp_inp[1]); + hue_phs = gm_hue_to_index_phase(hue, gm_2pi, ptr_gamut_map->num_hue_pnts, vec_hue_ind); + gm_interp_ic(vec_hue_ind, hue_phs, ptr_gamut_map->ptr_org2_ic, origin2_ic); + origin2_ic[1] = 0.0; + origin3_ic[0] = itp_inp[0]; + origin3_ic[1] = mat_radius(itp_inp[2], itp_inp[1]); /* chroma */ + + switch (ptr_gamut_map->map_type) { + case EMT_SEG: + pnt_map = gm_map_seg_itp(ptr_gamut_map, itp_inp, itp_out, zone, origin2_ic, origin3_ic, vec_hue_ind, hue_phs); + break; + case EMT_RAD: + pnt_map = gm_map_rad_itp(ptr_gamut_map, itp_inp, itp_out, zone, origin2_ic, origin3_ic, hue); + break; + case EMT_SEGRAD: + pnt_map = gm_map_segrad_itp(ptr_gamut_map, itp_inp, itp_out, zone, origin2_ic, origin3_ic, hue, vec_hue_ind, hue_phs); + break; + default: + mat_copy(itp_inp, itp_out, 3); + break; + } + + return zone; +} + +int gm_map_chci_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3]) +{ + const MATFLOAT gm_2pi = 2.0 * mat_get_pi(); + int zone = 1; + int pnt_map = -1; + MATFLOAT origin2_ic[2] = { itp_inp[0], 0.0 }; + MATFLOAT origin3_ic[2] = { itp_inp[0], 0.0 }; + int vec_hue_ind[2]; + MATFLOAT hue, hue_phs; + + if (gm_is_valid_itp(&ptr_gamut_map->color_space_dst, itp_inp, ptr_gamut_map->mode & GM_PQTAB_3DLUT)) { + mat_copy(itp_inp, itp_out, 3); + return 0; + } + + hue = mat_angle(itp_inp[2], itp_inp[1]); + hue_phs = gm_hue_to_index_phase(hue, gm_2pi, ptr_gamut_map->num_hue_pnts, vec_hue_ind); + switch (ptr_gamut_map->map_type) { + case EMT_SEG: + pnt_map = gm_map_seg_itp(ptr_gamut_map, itp_inp, itp_out, zone, origin2_ic, origin3_ic, vec_hue_ind, hue_phs); + break; + case EMT_RAD: + pnt_map = gm_map_rad_itp(ptr_gamut_map, itp_inp, itp_out, zone, origin2_ic, origin3_ic, hue); + break; + case EMT_SEGRAD: + pnt_map = gm_map_segrad_itp(ptr_gamut_map, itp_inp, itp_out, zone, origin2_ic, origin3_ic, hue, vec_hue_ind, hue_phs); + break; + default: + mat_copy(itp_inp, itp_out, 3); + break; + } + + return zone; +} + +/* direction : 0 - src to dst (forward), 1 - dst to src (backward) */ +void gm_hr_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3], int direction) +{ + MATFLOAT ich_inp[3], ich_out[3]; + + cs_itp_to_ich(itp_inp, ich_inp); + gm_hr_ich(ptr_gamut_map, ich_inp, ich_out, direction); + cs_ich_to_itp(ich_out, itp_out); +} + +/* direction : 0 - src to dst (forward), 1 - dst to src (backward) */ +void gm_hr_ich(struct s_gamut_map *ptr_gamut_map, MATFLOAT ich_inp[3], MATFLOAT ich_out[3], int direction) +{ + MATFLOAT *ptr_hr_src_hc = direction ? ptr_gamut_map->ptr_hr_dst_hc : ptr_gamut_map->ptr_hr_src_hc; + MATFLOAT *ptr_hr_dst_hc = direction ? ptr_gamut_map->ptr_hr_src_hc : ptr_gamut_map->ptr_hr_dst_hc; + MATFLOAT rot_hs_cg[2]; + + /* get hue shift and chroma gain parameeters */ + gm_get_hr_parms(ich_inp, ptr_gamut_map->color_space_dst.luma_limits, ptr_hr_src_hc, ptr_hr_dst_hc, ptr_gamut_map->num_int_pnts, rot_hs_cg); + + ich_out[0] = ich_inp[0]; + ich_out[1] = (ptr_gamut_map->hue_rot_mode & GM_CHROMA_GAIN) ? ich_inp[1] * rot_hs_cg[1] : ich_inp[1]; + ich_out[2] = (ptr_gamut_map->hue_rot_mode & GM_HUE_SHIFT) ? mat_norm_angle(ich_inp[2] + rot_hs_cg[0]) : ich_inp[2]; +} + +void gm_get_hr_parms(MATFLOAT ich[3], MATFLOAT luma_limits[3], MATFLOAT *ptr_hr_src_hc, + MATFLOAT *ptr_hr_dst_hc, int num_int_pnts, MATFLOAT rot_hs_cg[2]) +{ + const MATFLOAT gm_2pi = 2.0 * mat_get_pi(); + MATFLOAT vec_hc_src[2][GM_NUM_PRIM], vec_hc_dst[2][GM_NUM_PRIM]; + MATFLOAT int_src, hue_src, hue_dst, chroma_src, chroma_dst; + int vec_int_ind[2]; + MATFLOAT int_phs; + int vec_hue_ind[2]; + MATFLOAT hue_phs; + int nk, ni; + + hue_src = ich[2]; + int_src = mat_norm(ich[0], luma_limits[0], luma_limits[2]); /* normilize to [0.0,1.0] */ + int_phs = mat_flt_to_index_phase(int_src, 1.0, num_int_pnts, vec_int_ind); + for (nk = 0; nk < GM_NUM_PRIM; nk++) { + int ind0 = (nk * num_int_pnts + vec_int_ind[0]) * 2; + int ind1 = (nk * num_int_pnts + vec_int_ind[1]) * 2; + for (ni = 0; ni < 2; ni++) { + vec_hc_src[ni][nk] = ptr_hr_src_hc[ind0 + ni] + (ptr_hr_src_hc[ind1 + ni] - ptr_hr_src_hc[ind0 + ni]) * int_phs; + vec_hc_dst[ni][nk] = ptr_hr_dst_hc[ind0 + ni] + (ptr_hr_dst_hc[ind1 + ni] - ptr_hr_dst_hc[ind0 + ni]) * int_phs; + } + } + + hue_phs = mat_hue_to_index_phase(hue_src, GM_NUM_PRIM, vec_hc_src[0], gm_2pi, 0, vec_hue_ind); + if (vec_hue_ind[1] == 0) + vec_hc_dst[0][vec_hue_ind[1]] += gm_2pi; /* correct hue for 2pi crossing */ + + /* calulate hue rotation */ + hue_dst = vec_hc_dst[0][vec_hue_ind[0]] + (vec_hc_dst[0][vec_hue_ind[1]] - vec_hc_dst[0][vec_hue_ind[0]]) * hue_phs; + hue_dst = mat_norm_angle(hue_dst); + rot_hs_cg[0] = hue_dst - hue_src; + + /* calculate chroma gain */ + chroma_src = vec_hc_src[1][vec_hue_ind[0]] + (vec_hc_src[1][vec_hue_ind[1]] - vec_hc_src[1][vec_hue_ind[0]]) * hue_phs; + chroma_dst = vec_hc_dst[1][vec_hue_ind[0]] + (vec_hc_dst[1][vec_hue_ind[1]] - vec_hc_dst[1][vec_hue_ind[0]]) * hue_phs; + rot_hs_cg[1] = (chroma_src > 0.0) ? MAT_MIN(chroma_dst / chroma_src, 1.0) : 1.0; +} + +int gm_map_seg_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3], + int zone, MATFLOAT origin2_ic[2], MATFLOAT origin3_ic[2], int vec_hue_ind[2], MATFLOAT hue_phs) +{ + int pnt_map = -1; + + switch (zone) { + case 1: + pnt_map = gm_map_zone1_seg(itp_inp, itp_out, vec_hue_ind, hue_phs, origin2_ic, + ptr_gamut_map->num_edge_pnts, ptr_gamut_map->ptr_edge_ic, 0, ptr_gamut_map->num_edge_pnts - 1); + break; + case 2: + pnt_map = gm_map_zone2_seg(itp_inp, itp_out, vec_hue_ind, hue_phs, origin2_ic, + ptr_gamut_map->num_edge_pnts, ptr_gamut_map->ptr_edge_ic, ptr_gamut_map->num_edge_pnts - 1, 0); + break; + case 3: + pnt_map = gm_map_zone3_seg(itp_inp, itp_out, vec_hue_ind, hue_phs, origin3_ic, + ptr_gamut_map->num_edge_pnts, ptr_gamut_map->ptr_edge_ic, ptr_gamut_map->num_edge_pnts - 1, 0); + break; + default: + mat_copy(itp_inp, itp_out, 3); + break; + } + + return pnt_map; +} + +int gm_map_rad_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3], + int zone, MATFLOAT origin2_ic[2], MATFLOAT origin3_ic[2], MATFLOAT hue) +{ + switch (zone) { + case 1: + gm_map_zone1_rad(&ptr_gamut_map->color_space_dst, itp_inp, itp_out, + ptr_gamut_map->step_samp, origin2_ic, hue, ptr_gamut_map->mode & GM_PQTAB_3DLUT); + break; + case 2: + gm_map_zone2_rad(&ptr_gamut_map->color_space_dst, itp_inp, itp_out, + ptr_gamut_map->step_samp, origin2_ic, hue, ptr_gamut_map->mode & GM_PQTAB_3DLUT); + break; + case 3: + gm_map_zone3_rad(&ptr_gamut_map->color_space_dst, itp_inp, itp_out, + ptr_gamut_map->step_samp, origin3_ic, hue, ptr_gamut_map->mode & GM_PQTAB_3DLUT); + break; + default: + mat_copy(itp_inp, itp_out, 3); + break; + } + + return 1; +} + +int gm_map_segrad_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3], + int zone, MATFLOAT origin2_ic[2], MATFLOAT origin3_ic[2], MATFLOAT hue, int vec_hue_ind[2], MATFLOAT hue_phs) +{ + int pnt_map = -1; + MATFLOAT seg_itp[3]; + + switch (zone) { + case 1: + pnt_map = gm_map_zone1_seg(itp_inp, seg_itp, vec_hue_ind, hue_phs, origin2_ic, + ptr_gamut_map->num_edge_pnts, ptr_gamut_map->ptr_edge_ic, 0, ptr_gamut_map->num_edge_pnts - 1); + gm_map_zone1_rad(&ptr_gamut_map->color_space_dst, seg_itp, itp_out, ptr_gamut_map->step_samp, + origin2_ic, hue, ptr_gamut_map->mode & GM_PQTAB_3DLUT); + break; + case 2: + pnt_map = gm_map_zone2_seg(itp_inp, seg_itp, vec_hue_ind, hue_phs, origin2_ic, + ptr_gamut_map->num_edge_pnts, ptr_gamut_map->ptr_edge_ic, ptr_gamut_map->num_edge_pnts - 1, 0); + gm_map_zone2_rad(&ptr_gamut_map->color_space_dst, seg_itp, itp_out, ptr_gamut_map->step_samp, + origin2_ic, hue, ptr_gamut_map->mode & GM_PQTAB_3DLUT); + break; + case 3: + pnt_map = gm_map_zone3_seg(itp_inp, seg_itp, vec_hue_ind, hue_phs, origin3_ic, + ptr_gamut_map->num_edge_pnts, ptr_gamut_map->ptr_edge_ic, ptr_gamut_map->num_edge_pnts - 1, 0); + gm_map_zone3_rad(&ptr_gamut_map->color_space_dst, seg_itp, itp_out, ptr_gamut_map->step_samp, + origin3_ic, hue, ptr_gamut_map->mode & GM_PQTAB_3DLUT); + break; + default: + mat_copy(itp_inp, itp_out, 3); + break; + } + + return pnt_map; +} + +MATFLOAT gm_hue_to_index_phase(MATFLOAT hue, MATFLOAT hue_max, int num_hue_pnts, int vec_hue_ind[2]) +{ + MATFLOAT hue_step = hue_max / (MATFLOAT)num_hue_pnts; + MATFLOAT hue_max_ind = hue_step * (MATFLOAT)(num_hue_pnts - 1); + MATFLOAT tmp = (MATFLOAT)(num_hue_pnts - 1) / hue_max_ind; + + vec_hue_ind[0] = (int)(hue * tmp); + vec_hue_ind[1] = (vec_hue_ind[0] + 1) % num_hue_pnts; + + return (hue - (MATFLOAT)vec_hue_ind[0] / tmp) / hue_step; +} + +void gm_interp_ic(int vec_hue_ind[2], MATFLOAT hue_phs, MATFLOAT vec_pnt_ic[], MATFLOAT pnt_ic[2]) +{ + int off0 = vec_hue_ind[0] << 1; + int off1 = vec_hue_ind[1] << 1; + + pnt_ic[0] = vec_pnt_ic[off0 + 0] + (vec_pnt_ic[off1 + 0] - vec_pnt_ic[off0 + 0]) * hue_phs; + pnt_ic[1] = vec_pnt_ic[off0 + 1] + (vec_pnt_ic[off1 + 1] - vec_pnt_ic[off0 + 1]) * hue_phs; +} + +void gm_getseg_ic(int vec_hue_ind[2], MATFLOAT hue_phs, int ind_seg, int num_edge_pnts, + MATFLOAT *ptr_edge_ic, MATFLOAT pnt_ic[2]) +{ + int off0 = (vec_hue_ind[0] * num_edge_pnts + ind_seg) << 1; + int off1 = (vec_hue_ind[1] * num_edge_pnts + ind_seg) << 1; + MATFLOAT pnt0_ic[2], pnt1_ic[2]; + + pnt0_ic[0] = ptr_edge_ic[off0 + 0]; + pnt0_ic[1] = ptr_edge_ic[off0 + 1]; + pnt1_ic[0] = ptr_edge_ic[off1 + 0]; + pnt1_ic[1] = ptr_edge_ic[off1 + 1]; + + pnt_ic[0] = pnt0_ic[0] + (pnt1_ic[0] - pnt0_ic[0]) * hue_phs; + pnt_ic[1] = pnt0_ic[1] + (pnt1_ic[1] - pnt0_ic[1]) * hue_phs; +} + +void gm_genedge(struct s_color_space *ptr_color_space, MATFLOAT luma_limits[3], int num_edge_pnts, + enum gm_edge_type edge_type, MATFLOAT step_samp, MATFLOAT hue, MATFLOAT *ptr_edge_ic, int en_pq_lut) +{ + if (edge_type == EET_CHROMA) /* chroma for constant intensity */ + gm_genedge_int(ptr_color_space, luma_limits, num_edge_pnts, hue, step_samp, ptr_edge_ic, en_pq_lut); + else /* intensity and chroma for constant elevaltion angle */ + gm_genedge_rad(ptr_color_space, luma_limits, num_edge_pnts, hue, step_samp, ptr_edge_ic, en_pq_lut); +} + +void gm_genedge_int(struct s_color_space *ptr_color_space, MATFLOAT luma_limits[3], int num_edge_pnts, + MATFLOAT hue, MATFLOAT step_samp, MATFLOAT *ptr_edge_ic, int en_pq_lut) +{ + MATFLOAT hue_sin_cos[2] = { mat_sin(hue), mat_cos(hue) }; + MATFLOAT step_int = luma_limits[2] / (MATFLOAT)(num_edge_pnts - 1); + MATFLOAT pnt_ic[2] = { luma_limits[0], 0.0 }; + MATFLOAT inc_ic[2] = { 0.0, step_samp }; + MATFLOAT vec_chroma_prev[2] = { pnt_ic[1], pnt_ic[1] }; + int np; + + ptr_edge_ic[0] = pnt_ic[0]; + ptr_edge_ic[1] = pnt_ic[1]; + for (np = 1; np < num_edge_pnts - 1; np++) { + pnt_ic[0] += step_int; + pnt_ic[1] = 2.0 * vec_chroma_prev[1] - vec_chroma_prev[0]; /* linear predictor */ + pnt_ic[1] = MAT_MAX(pnt_ic[1], 0.0); + gm_sample_edge_ic(ptr_color_space, hue_sin_cos, inc_ic, pnt_ic, en_pq_lut); + vec_chroma_prev[0] = vec_chroma_prev[1]; + vec_chroma_prev[1] = pnt_ic[1]; + ptr_edge_ic[np * 2 + 0] = pnt_ic[0]; + ptr_edge_ic[np * 2 + 1] = pnt_ic[1]; + } + ptr_edge_ic[(num_edge_pnts - 1) * 2 + 0] = luma_limits[1]; + ptr_edge_ic[(num_edge_pnts - 1) * 2 + 1] = 0.0; +} + +void gm_genedge_rad(struct s_color_space *ptr_color_space, MATFLOAT luma_limits[3], int num_edge_pnts, + MATFLOAT hue, MATFLOAT step_samp, MATFLOAT *ptr_edge_ic, int en_pq_lut) +{ + const MATFLOAT gm_pi = mat_get_pi(); + MATFLOAT hue_sin_cos[2] = { mat_sin(hue), mat_cos(hue) }; + MATFLOAT step_angle = gm_pi / (MATFLOAT)(num_edge_pnts - 1); + MATFLOAT vec_org[2] = { mat_denorm(GM_EDGE_ORG, ptr_color_space->luma_limits[0], ptr_color_space->luma_limits[2]), 0.0 }; + MATFLOAT angle = step_angle; + MATFLOAT radius = vec_org[0] - luma_limits[0]; + MATFLOAT vec_radius_prev[2] = { radius, radius }; + int np; + + ptr_edge_ic[0] = luma_limits[0]; + ptr_edge_ic[1] = 0.0; + for (np = 1; np < num_edge_pnts - 1; np++) { + MATFLOAT ang_sin_cos[2] = { mat_sin(angle), mat_cos(angle) }; + MATFLOAT inc_ic[2] = {-step_samp * ang_sin_cos[1], step_samp * ang_sin_cos[0] }; + MATFLOAT pnt_ic[2]; + + if (np > 1) + radius = 2.0 * vec_radius_prev[1] - vec_radius_prev[0]; /* linear predictor */ + pnt_ic[0] = vec_org[0] - radius * ang_sin_cos[1]; + pnt_ic[1] = radius * ang_sin_cos[0]; + gm_sample_edge_ic(ptr_color_space, hue_sin_cos, inc_ic, pnt_ic, en_pq_lut); + vec_radius_prev[0] = vec_radius_prev[1]; + vec_radius_prev[1] = mat_radius(vec_org[0] - pnt_ic[0], pnt_ic[1]); + ptr_edge_ic[np * 2 + 0] = pnt_ic[0]; + ptr_edge_ic[np * 2 + 1] = pnt_ic[1]; + angle += step_angle; + } + ptr_edge_ic[(num_edge_pnts - 1) * 2 + 0] = luma_limits[1]; + ptr_edge_ic[(num_edge_pnts - 1) * 2 + 1] = 0.0; +} + +void gm_edgecusp_adjust(MATFLOAT *ptr_edge_ic, int num_edge_pnts, MATFLOAT cusp_ic[2]) +{ + int ind0, ind1; + MATFLOAT delta0, delta1; + + for (ind1 = 2 * (num_edge_pnts >> 2); ind1 < 2 * num_edge_pnts; ind1 += 2) { + if (ptr_edge_ic[ind1] >= cusp_ic[0]) { + ind0 = ind1 - 2; + delta1 = ptr_edge_ic[ind1] - cusp_ic[0]; + delta0 = cusp_ic[0] - ptr_edge_ic[ind0]; + if (delta0 < delta1) { + ptr_edge_ic[ind0] = cusp_ic[0]; + ptr_edge_ic[ind0 + 1] = cusp_ic[1]; + } else { + ptr_edge_ic[ind1] = cusp_ic[0]; + ptr_edge_ic[ind1 + 1] = cusp_ic[1]; + } + break; + } + } +} + +void gm_sample_edge_ic(struct s_color_space *ptr_color_space, MATFLOAT hue_sin_cos[2], + MATFLOAT inc_ic[2], MATFLOAT pnt_ic[2], int en_pq_lut) +{ + if (gm_is_valid_ic(ptr_color_space, pnt_ic, hue_sin_cos, en_pq_lut)) { + do { + pnt_ic[0] += inc_ic[0]; + pnt_ic[1] += inc_ic[1]; + } while (gm_is_valid_ic(ptr_color_space, pnt_ic, hue_sin_cos, en_pq_lut)); + pnt_ic[0] -= inc_ic[0]; + pnt_ic[1] -= inc_ic[1]; + } else { + do { + pnt_ic[0] -= inc_ic[0]; + pnt_ic[1] -= inc_ic[1]; + pnt_ic[1] = MAT_MAX(pnt_ic[1], 0.0); /* for zone 3 */ + } while (!gm_is_valid_ic(ptr_color_space, pnt_ic, hue_sin_cos, en_pq_lut) && (pnt_ic[1] > 0.0)); + } +} + + +int gm_get_zone(MATFLOAT itp[3], MATFLOAT origin2_ic[2], MATFLOAT origin3_ic[2], MATFLOAT luma_limits[3]) +{ + MATFLOAT chroma = mat_radius(itp[2], itp[1]); + MATFLOAT slope, offset; + + if (itp[0] < origin2_ic[0]) + return 1; + + slope = (origin3_ic[0] - origin2_ic[0]) / (origin3_ic[1] - origin2_ic[1]); + offset = origin2_ic[0] - slope * origin2_ic[1]; + + if (itp[0] < slope * chroma + offset) + return 2; + + return 3; +} + +int gm_map_zone1_seg(MATFLOAT itp_inp[3], MATFLOAT itp_out[3], int vec_hue_ind[2], MATFLOAT hue_phs, + MATFLOAT origin2_ic[2], int num_edge_pnts, MATFLOAT *ptr_edge_ic, int pnt_fst, int pnt_lst) +{ + int pnt_inc = (pnt_fst < pnt_lst) ? 1 : -1; + MATFLOAT pnt0_ich[3], pnt1_ich[3]; + MATFLOAT pnt_ich[3]; + MATFLOAT vec_seg_ic[2][2]; + MATFLOAT s_ic[2]; + int np; + + cs_itp_to_ich(itp_inp, pnt0_ich); + pnt1_ich[0] = origin2_ic[0]; + pnt1_ich[1] = 0.0; + pnt1_ich[2] = pnt0_ich[2]; + s_ic[0] = pnt1_ich[0] - pnt0_ich[0]; + s_ic[1] = pnt1_ich[1] - pnt0_ich[1]; + + gm_getseg_ic(vec_hue_ind, hue_phs, pnt_fst, num_edge_pnts, ptr_edge_ic, vec_seg_ic[0]); + + for (np = pnt_fst + pnt_inc; (pnt_inc > 0) ? np <= pnt_lst : np >= pnt_lst; np += pnt_inc) { + gm_getseg_ic(vec_hue_ind, hue_phs, np, num_edge_pnts, ptr_edge_ic, vec_seg_ic[1]); + if (gm_seg_intersection(pnt0_ich, pnt1_ich, s_ic, vec_seg_ic[0], vec_seg_ic[1], pnt_ich)) { + pnt_ich[2] = pnt0_ich[2]; + cs_ich_to_itp(pnt_ich, itp_out); + return np; + } + mat_copy(vec_seg_ic[1], vec_seg_ic[0], 2); + } + + mat_copy(itp_inp, itp_out, 3); /* Should not happen */ + + return -1; +} + +int gm_map_zone2_seg(MATFLOAT itp_inp[3], MATFLOAT itp_out[3], int vec_hue_ind[2], MATFLOAT hue_phs, + MATFLOAT origin2_ic[2], int num_edge_pnts, MATFLOAT *ptr_edge_ic, int pnt_fst, int pnt_lst) +{ + int pnt_inc = (pnt_fst < pnt_lst) ? 1 : -1; + MATFLOAT pnt0_ich[3], pnt1_ich[3]; + MATFLOAT pnt_ich[3]; + MATFLOAT vec_seg_ic[2][2]; + MATFLOAT s_ic[2]; + int np; + + cs_itp_to_ich(itp_inp, pnt0_ich); + pnt1_ich[0] = origin2_ic[0]; + pnt1_ich[1] = origin2_ic[1]; + pnt1_ich[2] = pnt0_ich[2]; + s_ic[0] = pnt1_ich[0] - pnt0_ich[0]; + s_ic[1] = pnt1_ich[1] - pnt0_ich[1]; + + gm_getseg_ic(vec_hue_ind, hue_phs, pnt_fst, num_edge_pnts, ptr_edge_ic, vec_seg_ic[0]); + + for (np = pnt_fst + pnt_inc; (pnt_inc > 0) ? np <= pnt_lst : np >= pnt_lst; np += pnt_inc) { + gm_getseg_ic(vec_hue_ind, hue_phs, np, num_edge_pnts, ptr_edge_ic, vec_seg_ic[1]); + if (gm_seg_intersection(pnt0_ich, pnt1_ich, s_ic, vec_seg_ic[0], vec_seg_ic[1], pnt_ich)) { + pnt_ich[2] = pnt0_ich[2]; + cs_ich_to_itp(pnt_ich, itp_out); + return np; + } + mat_copy(vec_seg_ic[1], vec_seg_ic[0], 2); + } + + mat_copy(itp_inp, itp_out, 3); /* Should not happen */ + + return -1; +} + +int gm_map_zone3_seg(MATFLOAT itp_inp[3], MATFLOAT itp_out[3], int vec_hue_ind[2], MATFLOAT hue_phs, + MATFLOAT origin3_ic[2], int num_edge_pnts, MATFLOAT *ptr_edge_ic, int pnt_fst, int pnt_lst) +{ + int pnt_inc = (pnt_fst < pnt_lst) ? 1 : -1; + MATFLOAT pnt0_ich[3], pnt1_ich[3]; + MATFLOAT pnt_ich[3]; + MATFLOAT s_ic[2]; + MATFLOAT vec_seg_ic[2][2]; + MATFLOAT slope, offset; + int np; + + cs_itp_to_ich(itp_inp, pnt0_ich); + slope = (origin3_ic[0] - pnt0_ich[0]) / (origin3_ic[1] - pnt0_ich[1]); + offset = pnt0_ich[0] - slope * pnt0_ich[1]; + pnt0_ich[0] = offset; + pnt0_ich[1] = 0.0; + + pnt1_ich[0] = origin3_ic[0]; + pnt1_ich[1] = origin3_ic[1]; + pnt1_ich[2] = pnt0_ich[2]; + s_ic[0] = pnt1_ich[0] - pnt0_ich[0]; + s_ic[1] = pnt1_ich[1] - pnt0_ich[1]; + + gm_getseg_ic(vec_hue_ind, hue_phs, num_edge_pnts - 1, num_edge_pnts, ptr_edge_ic, vec_seg_ic[0]); + + /* prevent non-intersection for the last segment */ + if (pnt0_ich[0] >= vec_seg_ic[0][0]) { + itp_out[0] = vec_seg_ic[0][0]; + itp_out[1] = 0.0; + itp_out[2] = 0.0; + return num_edge_pnts - 1; + } + + if (pnt_fst != num_edge_pnts - 1) + gm_getseg_ic(vec_hue_ind, hue_phs, pnt_fst, num_edge_pnts, ptr_edge_ic, vec_seg_ic[0]); + + for (np = pnt_fst + pnt_inc; (pnt_inc > 0) ? np <= pnt_lst : np >= pnt_lst; np += pnt_inc) { + gm_getseg_ic(vec_hue_ind, hue_phs, np, num_edge_pnts, ptr_edge_ic, vec_seg_ic[1]); + if (gm_seg_intersection(pnt0_ich, pnt1_ich, s_ic, vec_seg_ic[0], vec_seg_ic[1], pnt_ich)) { + pnt_ich[2] = pnt0_ich[2]; + cs_ich_to_itp(pnt_ich, itp_out); + return np; + } + mat_copy(vec_seg_ic[1], vec_seg_ic[0], 2); + } + + mat_copy(itp_inp, itp_out, 3); /* Should not happen */ + + return -1; +} + +void gm_map_zone1_rad(struct s_color_space *ptr_color_space, MATFLOAT itp_inp[3], MATFLOAT itp_out[3], + MATFLOAT step_samp, MATFLOAT origin2_ic[2], MATFLOAT hue, int en_pq_lut) +{ + MATFLOAT hue_sin_cos[2] = { mat_sin(hue), mat_cos(hue) }; + MATFLOAT chroma = mat_radius(itp_inp[2], itp_inp[1]); + MATFLOAT int_tmp = origin2_ic[0] - itp_inp[0]; + MATFLOAT angle = mat_angle(chroma, int_tmp); + MATFLOAT pnt_ic[2] = { itp_inp[0], chroma }; + MATFLOAT inc_ic[2] = { -step_samp * mat_cos(angle), step_samp * mat_sin(angle) }; + + gm_sample_edge_ic(ptr_color_space, hue_sin_cos, inc_ic, pnt_ic, en_pq_lut); + + itp_out[0] = pnt_ic[0]; + itp_out[1] = pnt_ic[1] * hue_sin_cos[1]; + itp_out[2] = pnt_ic[1] * hue_sin_cos[0]; +} + +void gm_map_zone2_rad(struct s_color_space *ptr_color_space, MATFLOAT itp_inp[3], MATFLOAT itp_out[3], + MATFLOAT step_samp, MATFLOAT origin2_ic[2], MATFLOAT hue, int en_pq_lut) +{ + MATFLOAT hue_sin_cos[2] = { mat_sin(hue), mat_cos(hue) }; + MATFLOAT chroma = mat_radius(itp_inp[2], itp_inp[1]); + MATFLOAT int_tmp = itp_inp[0] - origin2_ic[0]; + MATFLOAT angle = mat_angle(int_tmp, chroma - origin2_ic[1]); + MATFLOAT pnt_ic[2] = { itp_inp[0], chroma }; + MATFLOAT inc_ic[2] = { step_samp * mat_sin(angle), step_samp * mat_cos(angle) }; + + gm_sample_edge_ic(ptr_color_space, hue_sin_cos, inc_ic, pnt_ic, en_pq_lut); + + itp_out[0] = pnt_ic[0]; + itp_out[1] = pnt_ic[1] * hue_sin_cos[1]; + itp_out[2] = pnt_ic[1] * hue_sin_cos[0]; +} + +void gm_map_zone3_rad(struct s_color_space *ptr_color_space, MATFLOAT itp_inp[3], MATFLOAT itp_out[3], + MATFLOAT step_samp, MATFLOAT origin3_ic[2], MATFLOAT hue, int en_pq_lut) +{ + MATFLOAT hue_sin_cos[2] = { mat_sin(hue), mat_cos(hue) }; + MATFLOAT chroma = mat_radius(itp_inp[2], itp_inp[1]); + MATFLOAT int_tmp = origin3_ic[0] - itp_inp[0]; + MATFLOAT angle = mat_angle(int_tmp, origin3_ic[1] - chroma); + MATFLOAT pnt_ic[2] = { itp_inp[0], chroma }; + MATFLOAT inc_ic[2] = { step_samp * mat_sin(angle), step_samp * mat_cos(angle) }; + + gm_sample_edge_ic(ptr_color_space, hue_sin_cos, inc_ic, pnt_ic, en_pq_lut); + + itp_out[0] = pnt_ic[0]; + itp_out[1] = pnt_ic[1] * hue_sin_cos[1]; + itp_out[2] = pnt_ic[1] * hue_sin_cos[0]; +} + +void gm_show_pix(int zone, MATFLOAT itp_src[3], MATFLOAT itp_dst[3], MATFLOAT rgb[3], + enum gm_show_pix_mode show_pix_mode, MATFLOAT hue_limits[2]) +{ + MATFLOAT hue = mat_angle(itp_src[2], itp_src[1]); + + switch (show_pix_mode) { + case ESPM_NOMAP: + if (zone != 0) + mat_set(0.5, rgb, 3); + break; + case ESPM_MAP: + if (zone == 0) + mat_set(0.5, rgb, 3); + break; + case ESPM_MAPZ1: + if (zone != 1) + mat_set(0.5, rgb, 3); + break; + case ESPM_MAPZ2: + if (zone != 2) + mat_set(0.5, rgb, 3); + break; + case ESPM_MAPZ3: + if (zone != 3) + mat_set(0.5, rgb, 3); + break; + case ESPM_NUMZ: + mat_set((MATFLOAT)zone / 3.0, rgb, 3); + break; + case ESPM_HUEINP: + if ((hue < hue_limits[0]) || (hue > hue_limits[1])) + mat_set(0.5, rgb, 3); + break; + case ESPM_HUEOUT: + if ((hue < hue_limits[0]) || (hue > hue_limits[1])) + mat_set(0.5, rgb, 3); + break; + default: + break; + } +} + +void gm_gen_3dlut(struct s_gamut_map* ptr_gamut_map, int num_pnts, int bitwidth, + int en_merge, unsigned short* ptr_3dlut_rgb) +{ + int val_max = (1 << bitwidth) - 1; + int index = 0; + int nir, nig, nib; + unsigned short rgb[3]; + MATFLOAT rgb_src[3], rgb_dst[3]; + MATFLOAT rgb_src_lin[3], rgb_dst_lin[3]; + + #ifdef GM_SIM + #pragma omp parallel for private(index, nig, nib, rgb, rgb_src, rgb_dst, rgb_src_lin, rgb_dst_lin) + #endif + for (nir = 0; nir < num_pnts; nir++) { + index = num_pnts * num_pnts * nir * 3; + rgb[0] = en_merge ? ptr_3dlut_rgb[index + 0] : (nir * val_max) / (num_pnts - 1); + rgb_src[0] = mat_int2flt(rgb[0], val_max); + rgb_src_lin[0] = cs_nlin_to_lin(&ptr_gamut_map->color_space_src, rgb_src[0]); + for (nig = 0; nig < num_pnts; nig++) { + rgb[1] = en_merge ? ptr_3dlut_rgb[index + 1] : (nig * val_max) / (num_pnts - 1); + rgb_src[1] = mat_int2flt(rgb[1], val_max); + rgb_src_lin[1] = cs_nlin_to_lin(&ptr_gamut_map->color_space_src, rgb_src[1]); + for (nib = 0; nib < num_pnts; nib++) { + rgb[2] = en_merge ? ptr_3dlut_rgb[index + 2] : (nib * val_max) / (num_pnts - 1); + rgb_src[2] = mat_int2flt(rgb[2], val_max); + rgb_src_lin[2] = cs_nlin_to_lin(&ptr_gamut_map->color_space_src, rgb_src[2]); + + gm_rgb_to_rgb(ptr_gamut_map, rgb_src_lin, rgb_dst_lin); + cs_lin_to_nlin_rgb(&ptr_gamut_map->color_space_dst, rgb_dst_lin, rgb_dst); + cs_flt2short_rgb(rgb_dst, &ptr_3dlut_rgb[index], val_max); + index += 3; + + } + } + } +} + +void gm_gen_map(struct s_gamut_map* ptr_gamut_map, int update_msk) +{ + if (ptr_gamut_map->gamut_map_mode == EGMM_TM_CHTO) + if (update_msk & (GM_UPDATE_SRC | GM_UPDATE_DST)) { + MATFLOAT* ptr_org13_factor = (MATFLOAT*)ptr_gamut_map->ptr_func_alloc(ptr_gamut_map->num_hue_pnts * 2 * sizeof(MATFLOAT), + ptr_gamut_map->memory_context); + int nh; + + gm_genorg13_factor(ptr_gamut_map, ptr_org13_factor); + #ifdef GM_SIM + #pragma omp parallel for num_threads(10) + #endif + for (nh = 0; nh < ptr_gamut_map->num_hue_pnts; nh++) { + /* generate origin 2 and 3 points per hue slice */ + gm_genorigin23_hue(ptr_gamut_map, ptr_org13_factor, nh); + } + + ptr_gamut_map->ptr_func_free(ptr_org13_factor, ptr_gamut_map->memory_context); + } + + if ((ptr_gamut_map->gamut_map_mode > EGMM_TM) && (ptr_gamut_map->map_type != EMT_RAD)) + if (update_msk & GM_UPDATE_DST) { + int nh; + + #ifdef GM_SIM + #pragma omp parallel for num_threads(10) + #endif + for (nh = 0; nh < ptr_gamut_map->num_hue_pnts; nh++){ + /* generate GBD per hue slice */ + gm_gen_edge_hue(ptr_gamut_map, nh); + } + } +} + +void gm_rgb_to_itp(struct s_color_space* ptr_color_space, MATFLOAT rgb_inp[3], MATFLOAT itp_out[3], int en_pq_lut) +{ /* output may be the same as input */ + MATFLOAT lms[3]; + int nc; + + mat_eval_3x3(ptr_color_space->mat_rgb2lms, rgb_inp, lms); + for (nc = 0; nc < 3; nc++) + lms[nc] = en_pq_lut ? gm_pq_lut(lms[nc], EGD_LIN_2_NONLIN) : + cs_gamma_pq(lms[nc], EGD_LIN_2_NONLIN); + mat_eval_3x3(ptr_color_space->mat_lms2itp, lms, itp_out); +} + +void gm_itp_to_rgb(struct s_color_space* ptr_color_space, MATFLOAT itp_inp[3], MATFLOAT rgb_out[3], int en_pq_lut) +{ /* output may be the same as input */ + MATFLOAT lms[3]; + int nc; + + mat_eval_3x3(ptr_color_space->mat_itp2lms, itp_inp, lms); + for (nc = 0; nc < 3; nc++) + lms[nc] = en_pq_lut ? gm_pq_lut(lms[nc], EGD_NONLIN_2_LIN) : + cs_gamma_pq(lms[nc], EGD_NONLIN_2_LIN); + mat_eval_3x3(ptr_color_space->mat_lms2rgb, lms, rgb_out); +} + +int gm_is_valid_itp(struct s_color_space* ptr_color_space, MATFLOAT itp[3], int en_pq_lut) +{ + MATFLOAT rgb[3]; + + gm_itp_to_rgb(ptr_color_space, itp, rgb, en_pq_lut); + + return cs_is_valid_rgb(rgb, ptr_color_space->luminance_limits[0], ptr_color_space->luminance_limits[1]); +} + +int gm_is_valid_ic(struct s_color_space* ptr_color_space, MATFLOAT pnt_ic[2], MATFLOAT hue_sin_cos[2], int en_pq_lut) +{ + MATFLOAT pnt_itp[3]; + + pnt_itp[0] = pnt_ic[0]; + pnt_itp[1] = pnt_ic[1] * hue_sin_cos[1]; + pnt_itp[2] = pnt_ic[1] * hue_sin_cos[0]; + + return gm_is_valid_itp(ptr_color_space, pnt_itp, en_pq_lut); +} + +void gm_gen_pq_lut(float* ptr_lut, int num_pnts, enum cs_gamma_dir gamma_dir) +{ + int ni; + + if (gamma_dir == EGD_LIN_2_NONLIN) { + MATFLOAT increment = mat_pow(2.0, -32.0) / 128.0; /* also == pow(2,-39) or pow(2,-32)/128 */ + MATFLOAT value = 0.0; + + for (ni = 0; ni < num_pnts; ni++) { + ptr_lut[ni] = (float)cs_gamma_pq(value, gamma_dir); + /* every 128 pts, region changes and delta between pts doubles */ + if ((ni > 0) && (ni % 128 == 0)) + increment *= 2.0; + value += increment; + } + + } + else + for (ni = 0; ni < num_pnts; ni++) + ptr_lut[ni] = (float)cs_gamma_pq((MATFLOAT)ni / (MATFLOAT)(num_pnts - 1), gamma_dir); +} + +MATFLOAT gm_pq_lut(MATFLOAT val, enum cs_gamma_dir gamma_dir) +{ + static const MATFLOAT gm_inc = 1.0 / (MATFLOAT)((long long)1 << 32); + MATFLOAT sign = (val < 0.0) ? -1.0 : 1.0; + MATFLOAT val_abs = MAT_ABS(val); + MATFLOAT val_out, vec_inp[2], phs; + int vec_ind[2]; + + if (gamma_dir == EGD_LIN_2_NONLIN) + if (val_abs >= gm_inc) { + int exp; + MATFLOAT mantissa = mat_frexp(val_abs, &exp); + MATFLOAT tmp = (mantissa - 0.5) * 256.0; + + vec_ind[0] = (int)tmp; + phs = tmp - (MATFLOAT)vec_ind[0]; + vec_ind[0] += (exp + 31) << 7; + vec_ind[1] = vec_ind[0] + 1; + if (vec_ind[1] > GM_PQTAB_NUMPNTS - 1) + vec_ind[1] = GM_PQTAB_NUMPNTS - 1; + vec_inp[0] = gm_lin2pq[vec_ind[0]]; + vec_inp[1] = gm_lin2pq[vec_ind[1]]; + val_out = mat_linear(vec_inp, phs); + } + else + val_out = gm_lin2pq[0]; + else { + MATFLOAT tmp = val_abs * (MATFLOAT)(GM_PQTAB_NUMPNTS - 1); + vec_ind[0] = (int)tmp; + phs = tmp - (MATFLOAT)vec_ind[0]; + vec_ind[1] = vec_ind[0] + 1; + if (vec_ind[1] > GM_PQTAB_NUMPNTS - 1) + vec_ind[1] = GM_PQTAB_NUMPNTS - 1; + vec_inp[0] = gm_pq2lin[vec_ind[0]]; + vec_inp[1] = gm_pq2lin[vec_ind[1]]; + val_out = mat_linear(vec_inp, phs); + } + + return val_out * sign; +} + +int gm_seg_intersection(MATFLOAT p0_xy[2], MATFLOAT p1_xy[2], MATFLOAT s1_xy[2], + MATFLOAT p2_xy[2], MATFLOAT p3_xy[2], MATFLOAT p_xy[2]) +{ + MATFLOAT s2_x = p3_xy[0] - p2_xy[0]; + MATFLOAT s2_y = p3_xy[1] - p2_xy[1]; + MATFLOAT denom = -s2_x * s1_xy[1] + s1_xy[0] * s2_y; + MATFLOAT s0_x, s0_y, s, t; + + if (denom == 0.0) + return 0; /* no collision */ + + s0_x = p0_xy[0] - p2_xy[0]; + s0_y = p0_xy[1] - p2_xy[1]; + + s = (-s1_xy[1] * s0_x + s1_xy[0] * s0_y) / denom; + if ((s < 0.0) || (s > 1.0)) + return 0; /* no collision */ + + t = (s2_x * s0_y - s2_y * s0_x) / denom; + if ((t < 0.0) || (t > 1.0)) + return 0; /* no collision */ + + /* collision detected */ + p_xy[0] = p0_xy[0] + (t * s1_xy[0]); + p_xy[1] = p0_xy[1] + (t * s1_xy[1]); + + return 1; +} \ No newline at end of file diff --git a/src/amd/gmlib/gm/gm_funcs.h b/src/amd/gmlib/gm/gm_funcs.h new file mode 100755 index 00000000000..52ca4db1de6 --- /dev/null +++ b/src/amd/gmlib/gm/gm_funcs.h @@ -0,0 +1,299 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + *---------------------------------------------------------------------- + * File Name : gm_funcs.h + * Purpose : Gamut Mapping functions + * Author : Vladimir Lachine (vlachine@amd.com) + * Date : November 11, 2024 + * Version : 3.1 + *---------------------------------------------------------------------- + * + */ + +#pragma once + +#include "mat_funcs.h" +#include "cs_funcs.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define GM_NUM_PRIM 6 /* number of primary/secondary colors */ +#define GM_NUM_HUE 360 /* default number of hue slices in edge description grid */ +#define GM_NUM_EDGE 181 /* default number of egde points per hue in edge description grid */ +#define GM_NUM_INT 33 /* default number of intensity levels in HueRot grid */ +#define GM_STEP_SAMP 0.0001 /* default accuracy of edge detection procedures (for 14 bits signal) */ +#define GM_EDGE_ORG 0.5 /* default center point for edge description procedure */ +#define GM_ORG1_FACTOR 0.5 /* Origin1 default intensity */ +#define GM_ORG3_FACTOR 1.0 /* Origin3 default intensity */ +#define GM_ORG2_PERC 0.9 + +#define GM_CUSP_ADJUST 0x01 /* Adjust cusp points */ +#define GM_ZONE1_FLEX 0x02 /* Flexible zone 1 */ +#define GM_PQTAB_3DLUT 0x04 +#define GM_PQTAB_GBD 0x08 +#define GM_SCALE_LUMA 0x04 /* Luma scaling */ + +#define GM_UPDATE_SRC 0x01 +#define GM_UPDATE_DST 0x02 + +#define GM_HUE_SHIFT 0x01 +#define GM_CHROMA_GAIN 0x02 + +#define GM_PQTAB_NUMPNTS 4097 + +enum gm_gamut_map_mode { + EGMM_NONE = 0, /* NONE */ + EGMM_TM = 1, /* Tone Map (BT2390-4) */ + EGMM_TM_CHTO = 2, /* Tone Map + CHTO (Constant Hue Triple Origin */ + EGMM_TM_CHSO = 3, /* Tone Map + CHSO (Constant Hue Single Origin */ + EGMM_TM_CHCI = 4 /* Tone Map + CHCI (Constant Hue Constant Intensity) */ +}; + +enum gm_hue_rot_mode { + EHRM_NONE = 0, /* NONE */ + EHRM_HR = 1, /* Hue rotation */ + EHRM_CC = 2, /* Chroma compression */ + EHRM_HR_CC = 3 /* Hue rotation + Chroma compression */ +}; + +enum gm_map_type { + EMT_SEG = 0, /* intensity segment */ + EMT_RAD = 1, /* arc segment */ + EMT_SEGRAD = 2 /* hybrid */ +}; + +enum gm_edge_type { + EET_RAD = 0, /* elevation angle uniform */ + EET_CHROMA = 1 /* intensity uniform */ +}; + +enum gm_show_pix_mode { + ESPM_NONE = 0, /* NONE */ + ESPM_NOMAP = 1, /* Show pixels inside gamut */ + ESPM_MAP = 2, /* Show pixels outside gamut */ + ESPM_MAPZ1 = 3, /* Show pixels outside gamut in zone1 */ + ESPM_MAPZ2 = 4, /* Show pixels outside gamut in zone2 */ + ESPM_MAPZ3 = 5, /* Show pixels outside gamut in zone3 */ + ESPM_NUMZ = 6, /* Show pixels zone number */ + ESPM_HUEINP = 7, /* Show input pixels with hue in range */ + ESPM_HUEOUT = 8 /* Show output pixels with hue in range */ +}; + +struct s_gamut_map { + /* input parameters */ + enum gm_gamut_map_mode gamut_map_mode; + /* Gamut Map Mode: 0 - no gamut map, 1 - Tone Map BT2390-4, 2 - TM+CHTO, 3 - TM+CHSO, 4 - TM+CHCI */ + enum gm_hue_rot_mode hue_rot_mode; + /* Hue Rotation Mode: 0 - none, 1 - hue rotation, 2 - chroma compression, 3 - hue rotation and chroma compression */ + int en_tm_scale_color; + /* Enable/Disable Color Scaling in Tone Mapping mode only: {0,1} = 1 */ + unsigned int mode; + /* Reserved for modifications of the Gamut Map algo */ + struct s_color_space color_space_src; + /* Source color space (primary RGBW chromaticity, gamma, and Luminance min/max) */ + struct s_color_space color_space_dst; + /* Destination color space (primary RGBW chromaticity, gamma and Luminance min/max) */ + /* CHTO input tuning parameters */ + MATFLOAT org2_perc_c; + /* Origin2 percentage gap for chroma [0.0,1.0] = 0.9 */ + MATFLOAT vec_org1_factor[GM_NUM_PRIM]; + /* Factor of Origin1 for M,R,Y,G,C,B [0.0,2.0] = 1.3, 1.3, 1.3, 1.3, 1.2, 1.0 */ + MATFLOAT vec_org3_factor[GM_NUM_PRIM]; + /* Factor of Origin3 for M,R,Y,G,C,B [1.0,1.5] = 1.05, 1.2, 1.05, 1.05, 1.01, 1.05 */ + /* GM input tuning parameters */ + int num_hue_pnts; + /* Number of hue grid points: [90,360]=360 */ + int num_edge_pnts; + /* Number of edge IC grid points: [91, 181] = 181 */ + int num_int_pnts; + /* Number of intensity grid points for primary hues: [5,33] = 33 */ + enum gm_edge_type edge_type;/* Edge type: {0,1} = 0 : 0 - radius based EET_RAD, 1 - chroma based EET_CHROMA */ + enum gm_map_type map_type; + /* Map type: {0,1,2} = 0 : 0 - segments intersection SEG, 1 - radius sampling RAD, 2 hybrid - SEG+RAD */ + MATFLOAT step_samp; + /* Sampling precision in IC space for edge search [0.00001,0.001]=0.0001 */ + int reserve; + /* Reserved for debugging purpose */ + enum gm_show_pix_mode show_pix_mode; + /* SHow Pix Mode: [0,8]=0 : show pixel debugging mode */ + MATFLOAT show_pix_hue_limits[2]; /* Show Pixel mode hue ranges */ + /* calculated variables */ + MATFLOAT lum_min; + /* minLum (BT2390-4) in PQ non-linear space */ + MATFLOAT lum_max; + /* maxLum (BT2390-4) in PQ non-linear space */ + MATFLOAT vec_prim_src_ich[3 * GM_NUM_PRIM]; + /* ich for M,R,Y,G,C,B primaries of source gamut */ + MATFLOAT vec_prim_dst_ich[3 * GM_NUM_PRIM]; + /* ich for M,R,Y,G,C,B primaries of target gamut */ + MATFLOAT *ptr_cusp_src_ic; + /* Intensity and chroma of Cusp num_hue_pnts points for source gamut */ + MATFLOAT *ptr_cusp_dst_ic; + /* Intensity and chroma of Cusp num_hue_pnts points for target gamut */ + MATFLOAT *ptr_org2_ic; + /* Intensity and chroma of Origin2 for num_hue_pnts points */ + MATFLOAT *ptr_org3_ic; + /* Intensity and chroma of Origin3 for num_hue_pnts points */ + MATFLOAT *ptr_hr_src_hc; + /* Source Primary Hue and Chroma for (GM_NUM_PRIM * num_int_pnts) points */ + MATFLOAT *ptr_hr_dst_hc; + /* Target Primary Hue and Chroma for (GM_NUM_PRIM * num_int_pnts) points */ + MATFLOAT *ptr_edge_ic; + /* Target gamut edge for (num_hue_pnts * num_edge_pnts) points */ + void *(*ptr_func_alloc)(unsigned int, void*); + /* allocate memory function */ + void (*ptr_func_free)(void*, void*); + /* deallocate memory function */ + void* memory_context; + /*memory management context*/ + MATFLOAT hue_max; + MATFLOAT org1; + MATFLOAT org3; + /* internally calculated constant */ +}; + +void gm_ctor(struct s_gamut_map *ptr_gamut_map, + void*(*ptr_func_alloc)(unsigned int, void*), + void(*ptr_func_free)(void*, void*), + void* mem_context); /* constructor */ +void gm_dtor(struct s_gamut_map *ptr_gamut_map); /* destructor */ +void gm_alloc_mem(struct s_gamut_map *ptr_gamut_map); +void gm_free_mem(struct s_gamut_map *ptr_gamut_map); + +/* initialization functions */ +void gm_set_def(struct s_gamut_map *gamut_map); +int gm_init_gamuts(struct s_gamut_map *ptr_gamut_map, struct s_cs_opts *ptr_cs_opts_src, + struct s_cs_opts *ptr_cs_opts_dst, unsigned int gm_mode, int update_msk); +int gm_check_gamut(struct s_gamut_map *ptr_gamut_map); +void gm_gencusp_ic(struct s_gamut_map *ptr_gamut_map, int color_space); /* color_space : 0 - source, 1 - target */ + +/* gamut map description generation functions */ +void gm_gen_edge_hue(struct s_gamut_map* ptr_gamut_map, int hue_ind); + +/* resampling functions */ +void gm_resample_hc(MATFLOAT vec_ich_inp[][3], MATFLOAT *ptr_hc_out, + int num_int_pnts_src, int num_int_pnts_dst); +void gm_resample_hue_ic(MATFLOAT *ptr_hue, MATFLOAT *ptr_ic_inp, + MATFLOAT *ptr_ic_out, int num_hue_pnts_inp, int num_hue_pnts_out); +void gm_genprim_hc(struct s_color_space *ptr_color_space, MATFLOAT *ptr_hr_hc, + int num_int_pnts, MATFLOAT luma_limits[3], MATFLOAT lum_min, MATFLOAT lum_max); + +/* Origin2 and Origin3 generation functions */ +void gm_genorg13_factor(struct s_gamut_map* ptr_gamut_map, MATFLOAT* ptr_org13_factor); +void gm_genorigin23_hue(struct s_gamut_map* ptr_gamut_map, MATFLOAT* ptr_org13_factor, int hue_ind); +void gm_getorigin23(struct s_color_space* ptr_color_space_src, struct s_color_space* ptr_color_space_dst, + MATFLOAT hue, MATFLOAT org_13_factor[2], MATFLOAT org2_perc_c,MATFLOAT cusp_ic_src[2], + MATFLOAT cusp_ic_dst[2], MATFLOAT origin2_ic[2], MATFLOAT origin3_ic[2], int en_pq_lut); + +/* gamut map functions */ +int gm_rgb_to_rgb(struct s_gamut_map *ptr_gamut_map, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3]); +MATFLOAT gm_tm_itp(MATFLOAT itp_inp[3], MATFLOAT itp_out[3], MATFLOAT luma_limits[3], + MATFLOAT lum_min, MATFLOAT lum_max, int en_tm_scale_color, int en_tm_scale_luma); +MATFLOAT gm_tm_luma(MATFLOAT luma, MATFLOAT luma_limits[3], MATFLOAT lum_min, MATFLOAT lum_max); +MATFLOAT gm_scale_luma(MATFLOAT luma, MATFLOAT luma_limits[3], MATFLOAT lum_min, MATFLOAT lum_max); +int gm_map_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3]); +int gm_map_chto_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3]); +int gm_map_chso_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3]); +int gm_map_chci_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3]); + +/* hue rotation functions */ +void gm_hr_itp(struct s_gamut_map *gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3], int direction); +void gm_hr_ich(struct s_gamut_map *ptr_gamut_map, MATFLOAT ich_inp[3], MATFLOAT ich_out[3], int direction); +void gm_get_hr_parms(MATFLOAT ich[3], MATFLOAT luma_limits[3], MATFLOAT *ptr_hr_src_hc, + MATFLOAT *ptr_hr_dst_hc, int num_int_pnts, MATFLOAT rot_hs_cg[2]); + +/* segments intersection functions */ +int gm_map_seg_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], + MATFLOAT itp_out[3], int zone, MATFLOAT origin2_ic[2], MATFLOAT origin3_ic[2], int vec_hue_ind[2], MATFLOAT hue_phs); +int gm_map_rad_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], + MATFLOAT itp_out[3], int zone, MATFLOAT origin2_ic[2], MATFLOAT origin3_ic[2], MATFLOAT hue); +int gm_map_segrad_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], + MATFLOAT itp_out[3], int zone, MATFLOAT origin2_ic[2], + MATFLOAT origin3_ic[2], MATFLOAT hue, int vec_hue_ind[2], MATFLOAT hue_phs); + +/* interpolate Ic between two hues */ +MATFLOAT gm_hue_to_index_phase(MATFLOAT hue, MATFLOAT hue_max, int num_hue_pnts, int vec_hue_ind[2]); +void gm_interp_ic(int vec_hue_ind[2], MATFLOAT hue_phs, + MATFLOAT vec_pnt_ic[], MATFLOAT pnt_ic[2]); +void gm_getseg_ic(int vec_hue_ind[2], MATFLOAT hue_phs, + int ind, int num_edge_pnts, MATFLOAT *ptr_edge_ic, MATFLOAT pnt_ic[2]); + +/* Edge generation functions */ +void gm_genedge(struct s_color_space *ptr_color_space, MATFLOAT luma_limits[3], + int num_edge_pnts, enum gm_edge_type edge_type, MATFLOAT step_samp, MATFLOAT hue, + MATFLOAT *ptr_edge_ic, int en_pq_lut); +void gm_genedge_int(struct s_color_space *ptr_color_space, MATFLOAT luma_limits[3], + int num_edge_pnts, MATFLOAT hue, MATFLOAT step_samp, MATFLOAT *ptr_edge_ic, + int en_pq_lut); +void gm_genedge_rad(struct s_color_space *ptr_color_space, MATFLOAT luma_limits[3], + int num_edge_pnts, MATFLOAT hue, MATFLOAT step_samp, MATFLOAT *ptr_edge_ic, + int en_pq_lut); +void gm_sample_edge_ic(struct s_color_space *ptr_color_space, + MATFLOAT hue_cos_sin[2], MATFLOAT inc_ic[2], MATFLOAT pnt_ic[2], + int en_pq_lut); +void gm_edgecusp_adjust(MATFLOAT *ptr_edge_ic, int num_edge_pnts, MATFLOAT cusp_ic[2]); + +/* Gamut Map related functions */ +int gm_get_zone(MATFLOAT itp[3], MATFLOAT origin2_ic[2], MATFLOAT origin3_ic[2], MATFLOAT luma_limits[3]); +int gm_map_zone1_seg(MATFLOAT itp_inp[3], MATFLOAT itp_out[3], int vec_hue_ind[2], + MATFLOAT hue_phs, MATFLOAT origin2_ic[2], int num_edge_pnts, MATFLOAT *ptr_edge_ic, int pnt_map, int pnt_inc); +int gm_map_zone2_seg(MATFLOAT itp_inp[3], MATFLOAT itp_out[3], int vec_hue_ind[2], + MATFLOAT hue_phs, MATFLOAT origin2_ic[2], int num_edge_pnts, MATFLOAT *ptr_edge_ic, int pnt_map, int pnt_inc); +int gm_map_zone3_seg(MATFLOAT itp_inp[3], MATFLOAT itp_out[3], int vec_hue_ind[2], + MATFLOAT hue_phs, MATFLOAT origin3_ic[2], int num_edge_pnts, MATFLOAT *ptr_edge_ic, int pnt_map, int pnt_inc); +void gm_map_zone1_rad(struct s_color_space *ptr_color_space, MATFLOAT itp_inp[3], + MATFLOAT itp_out[3], MATFLOAT step_samp, MATFLOAT origin2_ic[2], MATFLOAT hue, int num_itr); +void gm_map_zone2_rad(struct s_color_space *ptr_color_space, MATFLOAT itp_inp[3], + MATFLOAT itp_out[3], MATFLOAT step_samp, MATFLOAT origin2_ic[2], MATFLOAT hue, int num_itr); +void gm_map_zone3_rad(struct s_color_space *ptr_color_space, MATFLOAT itp_inp[3], + MATFLOAT itp_out[3], MATFLOAT step_samp, MATFLOAT origin3_ic[2], MATFLOAT hue, int num_itr); + +/* Show Pixel debugging functions */ +void gm_show_pix(int zone, MATFLOAT itp_src[3], MATFLOAT itp_dst[3], + MATFLOAT rgb[3], enum gm_show_pix_mode show_pix_mode, MATFLOAT hue_limits[2]); + +void gm_rgb_to_itp(struct s_color_space* ptr_color_space, MATFLOAT rgb_inp[3], MATFLOAT itp_out[3], int en_pq_lut); +void gm_itp_to_rgb(struct s_color_space* ptr_color_space, MATFLOAT itp_inp[3], MATFLOAT rgb_out[3], int en_pq_lut); + +int gm_is_valid_itp(struct s_color_space* ptr_color_space, MATFLOAT itp[3], int en_pq_lut); +int gm_is_valid_ic(struct s_color_space* ptr_color_space, MATFLOAT pnt_ic[2], MATFLOAT hue_sin_cos[2], int en_pq_lut); + +void gm_gen_pq_lut(float* ptr_lut, int num_pnts, enum cs_gamma_dir gamma_dir); +MATFLOAT gm_pq_lut(MATFLOAT val, enum cs_gamma_dir gamma_dir); +int gm_seg_intersection(MATFLOAT p0_xy[2], MATFLOAT p1_xy[2], MATFLOAT s1_xy[2], + MATFLOAT p2_xy[2], MATFLOAT p3_xy[2], MATFLOAT p_xy[2]); + + +/* MULTI-THREADING */ +/* for multi-threading implementation the following function must be overwritten */ +void gm_gen_map(struct s_gamut_map* ptr_gamut_map, int update_msk); +void gm_gen_3dlut(struct s_gamut_map* ptr_gamut_map, int num_pnts, + int bitwidth, int en_merge, unsigned short* ptr_3dlut_rgb); +/* end MULTI-THREADING */ + +/* global constants */ +static const MATFLOAT gm_vec_org13_factor_def[GM_NUM_PRIM][2] = { + {1.3, 1.05}, /* M */ + {1.3, 1.10}, /* R */ + {1.3, 1.10}, /* Y */ + {1.3, 1.05}, /* G */ + {1.2, 1.01}, /* C */ + {1.0, 1.06} /* B */ +}; + +static const MATFLOAT gm_vec_cusp_rgb[GM_NUM_PRIM][3] = { + {1.0, 0.0, 1.0}, /* M */ + {1.0, 0.0, 0.0}, /* R */ + {1.0, 1.0, 0.0}, /* Y */ + {0.0, 1.0, 0.0}, /* G */ + {0.0, 1.0, 1.0}, /* C */ + {0.0, 0.0, 1.0} /* B */ +}; + +#ifdef __cplusplus +} +#endif diff --git a/src/amd/gmlib/gm/mat_funcs.c b/src/amd/gmlib/gm/mat_funcs.c new file mode 100755 index 00000000000..dd1d0042c89 --- /dev/null +++ b/src/amd/gmlib/gm/mat_funcs.c @@ -0,0 +1,918 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + *---------------------------------------------------------------------- + * File Name : mat_funcs.c + * Purpose : Mathematical functions + * Author : Vladimir Lachine (vlachine@amd.com) + * Date : September 20, 2023 + * Version : 1.2 + *---------------------------------------------------------------------- + */ + +#ifndef GM_SIM +#pragma code_seg("PAGED3PC") +#pragma data_seg("PAGED3PD") +#pragma const_seg("PAGED3PR") +#endif + +#include "mat_funcs.h" +#include + +float mat_fast_log(float x); + +void mat_eval_3x3(MATFLOAT mat[3][3], MATFLOAT vec_inp[3], MATFLOAT vec_out[3]) +{ + int ni, nj; + + mat_3x1_zero(vec_out); + for (ni = 0; ni < 3; ni++) + for (nj = 0; nj < 3; nj++) + vec_out[ni] += mat[ni][nj] * vec_inp[nj]; +} + +void mat_eval_3x3_off(MATFLOAT mat[3][3], MATFLOAT vec_off[3], MATFLOAT vec_inp[3], MATFLOAT vec_out[3]) +{ + int nc; + + mat_eval_3x3(mat, vec_inp, vec_out); + for (nc = 0; nc < 3; nc++) + vec_out[nc] += vec_off[nc]; +} + +void mat_eval_off_3x3_off(MATFLOAT vec_off_inp[3], MATFLOAT mat[3][3], + MATFLOAT vec_off_out[3], MATFLOAT vec_inp[3], MATFLOAT vec_out[3]) +{ + MATFLOAT val_tmp[3]; + int nc; + + for (nc = 0; nc < 3; nc++) + val_tmp[nc] = vec_inp[nc] + vec_off_inp[nc]; + mat_eval_3x3(mat, val_tmp, vec_out); + for (nc = 0; nc < 3; nc++) + vec_out[nc] += vec_off_out[nc]; +} + +void mat_mul3x3(MATFLOAT mat2[3][3], MATFLOAT mat1[3][3], MATFLOAT mat2x1[3][3]) +{ + int ni, nj, nk; + + mat_3x3_zero(mat2x1); + for (ni = 0; ni < 3; ni++) + for (nj = 0; nj < 3; nj++) + for (nk = 0; nk < 3; nk++) + mat2x1[ni][nj] += mat2[ni][nk] * mat1[nk][nj]; +} + +int mat_inv3x3(MATFLOAT mat_inp[3][3], MATFLOAT mat_out[3][3]) +{ +/* +* Calculate the determinant of matrix A and determine if the +* the matrix is singular as limited by the MATFLOAT precision +* MATFLOATing-point data representation. +*/ + MATFLOAT det = 0.0; + MATFLOAT pos = 0.0; + MATFLOAT neg = 0.0; + MATFLOAT temp; + + temp = mat_inp[0][0] * mat_inp[1][1] * mat_inp[2][2]; + if (temp >= 0.0) + pos += temp; + else + neg += temp; + temp = mat_inp[0][1] * mat_inp[1][2] * mat_inp[2][0]; + if (temp >= 0.0) + pos += temp; + else + neg += temp; + temp = mat_inp[0][2] * mat_inp[1][0] * mat_inp[2][1]; + if (temp >= 0.0) + pos += temp; + else + neg += temp; + temp = -mat_inp[0][2] * mat_inp[1][1] * mat_inp[2][0]; + if (temp >= 0.0) + pos += temp; + else + neg += temp; + temp = -mat_inp[0][1] * mat_inp[1][0] * mat_inp[2][2]; + if (temp >= 0.0) + pos += temp; + else + neg += temp; + temp = -mat_inp[0][0] * mat_inp[1][2] * mat_inp[2][1]; + if (temp >= 0.0) + pos += temp; + else + neg += temp; + det = pos + neg; + + /* Is the submatrix A singular? */ + if ((det == 0.0) || (MAT_ABS(det / (pos - neg)) < PRECISION_LIMIT)) + return 0; /* Matrix M has no mat_inpverse */ + + /* Calculate inverse(A) = adj(A) / det(A) */ + mat_out[0][0] = (mat_inp[1][1] * mat_inp[2][2] - mat_inp[1][2] * mat_inp[2][1]) / det; + mat_out[1][0] = -(mat_inp[1][0] * mat_inp[2][2] - mat_inp[1][2] * mat_inp[2][0]) / det; + mat_out[2][0] = (mat_inp[1][0] * mat_inp[2][1] - mat_inp[1][1] * mat_inp[2][0]) / det; + mat_out[0][1] = -(mat_inp[0][1] * mat_inp[2][2] - mat_inp[0][2] * mat_inp[2][1]) / det; + mat_out[1][1] = (mat_inp[0][0] * mat_inp[2][2] - mat_inp[0][2] * mat_inp[2][0]) / det; + mat_out[2][1] = -(mat_inp[0][0] * mat_inp[2][1] - mat_inp[0][1] * mat_inp[2][0]) / det; + mat_out[0][2] = (mat_inp[0][1] * mat_inp[1][2] - mat_inp[0][2] * mat_inp[1][1]) / det; + mat_out[1][2] = -(mat_inp[0][0] * mat_inp[1][2] - mat_inp[0][2] * mat_inp[1][0]) / det; + mat_out[2][2] = (mat_inp[0][0] * mat_inp[1][1] - mat_inp[0][1] * mat_inp[1][0]) / det; + + return 1; +} + +void mat_3x1_zero(MATFLOAT vec_out[3]) +{ + int nc; + + for (nc = 0; nc < 3; nc++) + vec_out[nc] = 0.0; +} + +void mat_3x3_zero(MATFLOAT mat_out[3][3]) +{ + int ni, nj; + + for (ni = 0; ni < 3; ni++) + for (nj = 0; nj < 3; nj++) + mat_out[ni][nj] = 0.0; +} + +void mat_3x3_unity(MATFLOAT mat_out[3][3]) +{ + int ni, nj; + + for (ni = 0; ni < 3; ni++) + for (nj = 0; nj < 3; nj++) + mat_out[ni][nj] = (ni == nj) ? 1.0f : 0.0f; +} + +void mat_copy3x3(MATFLOAT mat_inp[3][3], MATFLOAT mat_out[3][3]) +{ + int ni, nj; + + for (ni = 0; ni < 3; ni++) + for (nj = 0; nj < 3; nj++) + mat_out[ni][nj] = mat_inp[ni][nj]; +} + +int mat_round(MATFLOAT val) +{ + int sign = MAT_ZSGN(val); + int val_out = (int)(MAT_ABS(val) + 0.5); + + return sign * val_out; +} + +MATFLOAT mat_int2flt(int val, int val_max) +{ + return (MATFLOAT)val / (MATFLOAT)val_max; +} + +int mat_flt2int(MATFLOAT val_inp, int val_max) +{ + MATFLOAT val_tmp = val_inp * (MATFLOAT)val_max; + int val_out = mat_round(val_tmp); + + return MAT_CLAMP(val_out, 0, val_max); +} + +void mat_gen_mat_off(MATFLOAT mat_inp[3][3], MATFLOAT vec_off_inp[3], + MATFLOAT vec_off_out[3], MATFLOAT mat_res[3][3], MATFLOAT vec_off_res[3]) +{ + int nc; + + /* construct transform. The 'inoff' is merged into output offset. */ + if (vec_off_out) + for (nc = 0; nc < 3; nc++) + vec_off_res[nc] = vec_off_out[nc]; + else + mat_3x1_zero(vec_off_res); + + if (mat_inp) + mat_copy3x3(mat_inp, mat_res); + else + mat_3x3_unity(mat_res); + + if (vec_off_inp) + for (nc = 0; nc < 3; nc++) + vec_off_res[nc] -= (mat_res[nc][0] * vec_off_inp[0] + mat_res[nc][1] * + vec_off_inp[1] + mat_res[nc][2] * vec_off_inp[2]); +} + +void mat_scl_off(MATFLOAT vec_off_inp[3], MATFLOAT vec_off_out[3], int bitwidth) +{ /* output may be the same as input */ + int nc; + + for (nc = 0; nc < 3; nc++) + vec_off_out[nc] = vec_off_inp[nc] * (MATFLOAT)(1 << bitwidth); +} + +void mat_cvt_cs(int vec_inp[3], int vec_out[3], int bitwidth, + MATFLOAT mat[3][3], MATFLOAT vec_off[3], int is_clip) +{ + int nc, ni; + + for (nc = 0; nc < 3; nc++) { + MATFLOAT sum = vec_off[nc]; + + for (ni = 0; ni < 3; ni++) + sum += mat[nc][ni] * (MATFLOAT)vec_inp[ni]; + int nValue = mat_round(sum); + if (is_clip) { + const int cnMaxValue = (1 << bitwidth) - 1; + + MAT_CLAMP(nValue, 0, cnMaxValue); + } + vec_out[nc] = nValue; + } +} + +MATFLOAT mat_norm_angle(MATFLOAT angle) +{ + MATFLOAT pi2 = 2.0f * mat_get_pi(); + MATFLOAT angle_out = angle; + + if (angle_out < 0.0f) + angle_out += pi2; + else if (angle_out >= pi2) + angle_out -= pi2; + + return angle_out; +} + +MATFLOAT mat_clamp(MATFLOAT val_inp, MATFLOAT val_min, MATFLOAT val_max) +{ + return MAT_CLAMP(val_inp, val_min, val_max); +} + +int mat_is_valid(MATFLOAT val_inp, MATFLOAT val_min, MATFLOAT val_max) +{ + return ((mat_is_number(val_inp) == 0) || (val_inp < val_min) || (val_inp > val_max)) ? 0 : 1; +} + +int mat_is_valid_vec(MATFLOAT vec_inp[], int size, MATFLOAT val_min, MATFLOAT val_max) +{ + int ni; + + for (ni = 0; ni < size; ni++) + if (mat_is_valid(vec_inp[ni], val_min, val_max) == 0) + return 0; + + return 1; +} + +int mat_is_number(MATFLOAT val) +{ /* Check if this is not NaN */ + return (val == val); +} + +MATFLOAT mat_norm(MATFLOAT val_inp, MATFLOAT val_min, MATFLOAT val_rng) +{ /* map to [0.0,1.0] */ + return (val_inp - val_min) / val_rng; +} + +MATFLOAT mat_denorm(MATFLOAT val_inp, MATFLOAT val_min, MATFLOAT val_rng) +{ /* map from [0.0,1.0] */ + return val_inp * val_rng + val_min; +} + +void mat_copy(MATFLOAT vec_inp[], MATFLOAT vec_out[], int size) +{ + int nc; + + for (nc = 0; nc < size; nc++) + vec_out[nc] = vec_inp[nc]; +} + +void mat_set(MATFLOAT val_inp, MATFLOAT vec_out[], int size) +{ + int nc; + + for (nc = 0; nc < size; nc++) + vec_out[nc] = val_inp; +} + +int mat_flt_to_index(MATFLOAT val_inp, MATFLOAT val_max, int num_pnts) +{ + MATFLOAT step = val_max / (MATFLOAT)(num_pnts - 1); + + return (int)(val_inp / step); +} + +MATFLOAT mat_index_to_flt(int index, MATFLOAT val_max, int num_pnts) +{ + MATFLOAT step = val_max / (MATFLOAT)(num_pnts - 1); + + return (MATFLOAT)index * step; +} + +MATFLOAT mat_flt_to_index_phase(MATFLOAT val_inp, MATFLOAT val_max, int num_pnts, int vec_ind[2]) +{ + MATFLOAT step = val_max / (MATFLOAT)(num_pnts - 1); + MATFLOAT tmp = val_inp / step; + + vec_ind[0] = (int)tmp; + vec_ind[1] = vec_ind[0] + 1; + if (vec_ind[1] > num_pnts - 1) + vec_ind[1] = num_pnts - 1; + + return tmp - (MATFLOAT)vec_ind[0]; +} + +MATFLOAT mat_vec_to_index_phase(MATFLOAT val_inp, MATFLOAT vec_val[], int num_pnts, int vec_ind[2]) +{ + int ind0, ind1; + + /* calculate indexes */ + for (ind0 = num_pnts - 1; ind0 >= 0; ind0--) { + if (val_inp >= vec_val[ind0]) + break; + } + ind1 = MAT_MIN(ind0 + 1, num_pnts - 1); + + vec_ind[0] = ind0; + vec_ind[1] = ind1; + + return (vec_val[ind0] == vec_val[ind1]) ? 0.0 : (val_inp - vec_val[ind0]) / (vec_val[ind1] - vec_val[ind0]); +} + +int mat_int_to_index(int val_inp, int val_max, int num_indexes) +{ + return val_inp * (num_indexes - 1) / val_max; +} + +int mat_index_to_int(int index, int val_max, int num_indexes) +{ + return index * val_max / (num_indexes - 1); +} + +MATFLOAT mat_int_to_index_phase(int val_inp, int val_max, int num_indexes, int vec_val_ind[2]) +{ + MATFLOAT step = (MATFLOAT)val_max / (MATFLOAT)(num_indexes - 1); + + vec_val_ind[0] = mat_int_to_index(val_inp, val_max, num_indexes); + vec_val_ind[1] = MAT_MIN(vec_val_ind[0] + 1, num_indexes - 1); + + return (val_inp - mat_index_to_int(vec_val_ind[0], val_max, num_indexes)) / step; +} + +int mat_get_hue_index_2pi(MATFLOAT vec_hue[], int num_hue_pnts) +{ /* find a point crossing 2PI */ + int index_2pi; + + for (index_2pi = num_hue_pnts - 1; index_2pi >= 1; index_2pi--) + if (vec_hue[index_2pi] < vec_hue[index_2pi - 1]) + break; + + return index_2pi; +} + +MATFLOAT mat_hue_to_index_phase(MATFLOAT val_inp, int num_hue_pnts, + MATFLOAT vec_val[], MATFLOAT val_max, int index_max, int vec_ind_out[2]) +{ + int ind0, ind1; + MATFLOAT step, delta; + + /* calculate indexes */ + ind1 = index_max; + while (val_inp >= vec_val[ind1]) { + ind1 = (ind1 + 1) % num_hue_pnts; + if (ind1 == index_max) + break; + } + ind0 = (ind1 > 0) ? ind1 - 1 : num_hue_pnts - 1; + + /* calculate phase */ + step = vec_val[ind1] - vec_val[ind0]; + if (step < 0.0) + step += val_max; + delta = val_inp - vec_val[ind0]; + if (delta < 0.0) + delta += val_max; + + vec_ind_out[0] = ind0; + vec_ind_out[1] = ind1; + + return delta / step; +} + +int mat_seg_intersection(MATFLOAT p0_xy[2], MATFLOAT p1_xy[2], + MATFLOAT p2_xy[2], MATFLOAT p3_xy[2], MATFLOAT p_xy[2]) +{ + MATFLOAT s1_x = p1_xy[0] - p0_xy[0]; + MATFLOAT s1_y = p1_xy[1] - p0_xy[1]; + MATFLOAT s2_x = p3_xy[0] - p2_xy[0]; + MATFLOAT s2_y = p3_xy[1] - p2_xy[1]; + MATFLOAT denom = -s2_x * s1_y + s1_x * s2_y; + MATFLOAT s0_x, s0_y, s, t; + + if (denom == 0.0) + return 0; /* no collision */ + + s0_x = p0_xy[0] - p2_xy[0]; + s0_y = p0_xy[1] - p2_xy[1]; + + s = (-s1_y * s0_x + s1_x * s0_y) / denom; + if ((s < 0.0) || (s > 1.0)) + return 0; /* no collision */ + + t = (s2_x * s0_y - s2_y * s0_x) / denom; + if ((t < 0.0) || (t > 1.0)) + return 0; /* no collision */ + + /* collision detected */ + p_xy[0] = p0_xy[0] + (t * s1_x); + p_xy[1] = p0_xy[1] + (t * s1_y); + + return 1; +} + +MATFLOAT mat_linear(MATFLOAT vec_inp[2], MATFLOAT phs) +{ + return vec_inp[0] + (vec_inp[1] - vec_inp[0]) * phs; +} + +MATFLOAT mat_bilinear(MATFLOAT vec_inp[2][2], MATFLOAT vec_phs[2]) +{ + int ni; + MATFLOAT vec_tmp[2]; + + for (ni = 0; ni < 2; ni++) + vec_tmp[ni] = mat_linear(vec_inp[ni], vec_phs[0]); + + return mat_linear(vec_tmp, vec_phs[1]); +} + +MATFLOAT mat_trilinear(MATFLOAT vec_inp[2][2][2], MATFLOAT vec_phs[3]) +{ + int ni; + MATFLOAT vec_tmp[2]; + + for (ni = 0; ni < 2; ni++) + vec_tmp[ni] = mat_bilinear(vec_inp[ni], vec_phs); + + return mat_linear(vec_tmp, vec_phs[2]); +} + +MATFLOAT mat_tetra(MATFLOAT vec_inp[2][2][2], MATFLOAT vec_phs[3]) +{ + MATFLOAT fx = vec_phs[2]; + MATFLOAT fy = vec_phs[1]; + MATFLOAT fz = vec_phs[0]; + MATFLOAT vec_c[3]; + MATFLOAT value; + int nc; + + if (fx > fy) { + if (fy > fz) { /* T0: x > y > z */ + vec_c[0] = vec_inp[1][0][0] - vec_inp[0][0][0]; + vec_c[1] = vec_inp[1][1][0] - vec_inp[1][0][0]; + vec_c[2] = vec_inp[1][1][1] - vec_inp[1][1][0]; + } else if (fx > fz) { /* T5: x > z > y */ + vec_c[0] = vec_inp[1][0][0] - vec_inp[0][0][0]; + vec_c[1] = vec_inp[1][1][1] - vec_inp[1][0][1]; + vec_c[2] = vec_inp[1][0][1] - vec_inp[1][0][0]; + } else { /* T4: z > x > y */ + vec_c[0] = vec_inp[1][0][1] - vec_inp[0][0][1]; + vec_c[1] = vec_inp[1][1][1] - vec_inp[1][0][1]; + vec_c[2] = vec_inp[0][0][1] - vec_inp[0][0][0]; + } + } else { + if (fx > fz) { /* T1: y > x > z */ + vec_c[0] = vec_inp[1][1][0] - vec_inp[0][1][0]; + vec_c[1] = vec_inp[0][1][0] - vec_inp[0][0][0]; + vec_c[2] = vec_inp[1][1][1] - vec_inp[1][1][0]; + } else if (fy > fz) { /* T2: y > z > x */ + vec_c[0] = vec_inp[1][1][1] - vec_inp[0][1][1]; + vec_c[1] = vec_inp[0][1][0] - vec_inp[0][0][0]; + vec_c[2] = vec_inp[0][1][1] - vec_inp[0][1][0]; + } else { /* T3: z > y > x */ + vec_c[0] = vec_inp[1][1][1] - vec_inp[0][1][1]; + vec_c[1] = vec_inp[0][1][1] - vec_inp[0][0][1]; + vec_c[2] = vec_inp[0][0][1] - vec_inp[0][0][0]; + } + } + + value = vec_inp[0][0][0]; + for (nc = 0; nc < 3; nc++) + value += vec_c[nc] * vec_phs[2 - nc]; + + return MAT_CLAMP(value, 0.0, 1.0); +} + +MATFLOAT mat_cubic(MATFLOAT vec_inp[4], MATFLOAT phs) +{ + return vec_inp[1] + 0.5 * phs * (vec_inp[2] - vec_inp[0] + + phs * (2.0 * vec_inp[0] - 5.0 * vec_inp[1] + 4.0 * vec_inp[2] - vec_inp[3] + + phs * (3.0 * (vec_inp[1] - vec_inp[2]) + vec_inp[3] - vec_inp[0]))); +} + +MATFLOAT mat_mse(MATFLOAT val1[], MATFLOAT val2[], int size) +{ + MATFLOAT err = 0.0; + int nc; + + for (nc = 0; nc < size; nc++) { + MATFLOAT err_tmp = val1[nc] - val2[nc]; + + err += err_tmp * err_tmp; + } + + return mat_sqrt(err); +} + +MATFLOAT mat_sshape(MATFLOAT val, MATFLOAT gamma) +{ + MATFLOAT k = 0.5 * mat_pow(0.5, -gamma); + MATFLOAT val_out = (val <= 0.5) ? k * mat_pow(val, gamma) : 1.0 - k * mat_pow((1.0 - val), gamma); + + return val_out; +} + +MATFLOAT mat_radius_vec(MATFLOAT vec_val[], MATFLOAT vec_org[], int size) +{ + MATFLOAT radius = 0.0; + int ni; + + for (ni = 0; ni < size; ni++) + radius += (vec_val[ni] - vec_org[ni]) * (vec_val[ni] - vec_org[ni]); + + return mat_sqrt(radius); +} + +void mat_gain_vec(MATFLOAT vec_inp[], MATFLOAT vec_out[], MATFLOAT vec_org[], int size, MATFLOAT gain) +{ + int ni; + + for (ni = 0; ni < 3; ni++) + vec_out[ni] = vec_org[ni] + (vec_inp[ni] - vec_org[ni]) * gain; +} + +MATFLOAT mat_get_pi(void) +{ +#ifdef GM_MAT_MATH + return (MATFLOAT)acos(-1.0); +#else + return 3.14159265358979323; +#endif +} + +MATFLOAT mat_angle(MATFLOAT y, MATFLOAT x) +{ + return mat_norm_angle(mat_atan2(y, x)); +} + +MATFLOAT mat_radius(MATFLOAT y, MATFLOAT x) +{ + return mat_sqrt(y * y + x * x); +} + +MATFLOAT mat_pow(MATFLOAT val0, MATFLOAT val1) +{ + return (MATFLOAT)pow(val0, val1); +} + +MATFLOAT mat_atan2(MATFLOAT y, MATFLOAT x) +{ + return (MATFLOAT)atan2(y, x); +} + +MATFLOAT mat_cos(MATFLOAT val) +{ + return (MATFLOAT)cos(val); +} + +MATFLOAT mat_sin(MATFLOAT val) +{ + return (MATFLOAT)sin(val); +} + +MATFLOAT mat_log2(MATFLOAT val) +{ + return (MATFLOAT)(mat_log(val) / mat_log(2.0)); +} + +MATFLOAT mat_log10(MATFLOAT val) +{ + return (MATFLOAT)(mat_log(val) / mat_log(10.0)); +} + +MATFLOAT mat_frexp(MATFLOAT val, int *exponent) +{ + return (MATFLOAT)frexp(val, exponent); +} + +#ifndef GM_MAT_MATH +static const unsigned char root_recip_table[128] = { + 0x69, 0x66, 0x63, 0x61, 0x5E, 0x5B, 0x59, 0x57, /* for x =(2.0 ... 3.99)*(4^n) */ + 0x54, 0x52, 0x50, 0x4D, 0x4B, 0x49, 0x47, 0x45, /* (exponent is even) */ + 0x43, 0x41, 0x3F, 0x3D, 0x3B, 0x39, 0x37, 0x36, + 0x34, 0x32, 0x30, 0x2F, 0x2D, 0x2C, 0x2A, 0x28, + 0x27, 0x25, 0x24, 0x22, 0x21, 0x1F, 0x1E, 0x1D, + 0x1B, 0x1A, 0x19, 0x17, 0x16, 0x15, 0x14, 0x12, + 0x11, 0x10, 0x0F, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, + 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, + 0xFE, 0xFA, 0xF6, 0xF3, 0xEF, 0xEB, 0xE8, 0xE4, /* for x =(1.0 ... 1.99)*(4^n) */ + 0xE1, 0xDE, 0xDB, 0xD7, 0xD4, 0xD1, 0xCE, 0xCB, /* (exponent is odd) */ + 0xC9, 0xC6, 0xC3, 0xC0, 0xBE, 0xBB, 0xB8, 0xB6, + 0xB3, 0xB1, 0xAF, 0xAC, 0xAA, 0xA8, 0xA5, 0xA3, + 0xA1, 0x9F, 0x9D, 0x9B, 0x99, 0x97, 0x95, 0x93, + 0x91, 0x8F, 0x8D, 0x8B, 0x89, 0x87, 0x86, 0x84, + 0x82, 0x80, 0x7F, 0x7D, 0x7B, 0x7A, 0x78, 0x77, + 0x75, 0x74, 0x72, 0x71, 0x6F, 0x6E, 0x6C, 0x6B +}; + +/* + * find a reciprocal of square-root of x, using a similar method. + * an approximation is found, using the 6 MSBs of the mantissa, + * and the LSB of the exponent. + * The exponent mapping is a bit tricker than in the RECIPS case: + * we want + * 125,126 -> 127 + * 127,128 -> 126 + * 129,130 -> 125 + * 131,132 -> 124 + * + * So, we can take original exponent, add 131, then >>1, then + * take the 1's complement. + * The result is accurate +/- 1 lsb in float precision. I'm not + * sure exactly what the full range of this is, it should + * work for any values >0, except for denormals. + * + * iterative method: + * Cavanagh, J. 1984. Digital Computer Arithmetic. McGraw-Hill. Page 278. + */ +float mat_fast_rsqrt(float val) +{ + union { + float fval; + unsigned int uval; + } u; + unsigned int new_mant; + float rsqa, rprod; + + u.fval = val; + u.uval &= 0x7FFFFFFF; /* can't have sign */ + val = u.fval * 0.5f; + + new_mant = root_recip_table[(u.uval >> 17) & 0x7F]; + /* + * create modified exponent ; drop in new mantissa + */ + u.uval = (~((u.uval + 0x41800000) >> 1) & 0x7F800000) + (new_mant << 15); + rsqa = u.fval; + /* + * note: we could do + * rsqa *= 1.5f - rsqa*rsqa * x + * but there are cases where x is very small + * (zero or denormal) and rsqa*rsqa could overflow. We generate + * the wrong answer in these cases, but at least it isn't a NaN. + */ + rprod = val * rsqa; + rsqa *= 1.5f - rprod * rsqa; + rprod = val * rsqa; + rsqa *= 1.5f - rprod * rsqa; + rprod = val * rsqa; + rsqa *= 1.5f - rprod * rsqa; + + return rsqa; +} + +#define Declare_Special_Float(cnst) { union { unsigned int ui; float f; } u; u.ui = (cnst); return u.f; } +float FLT_INF(void); +float FLT_MINF(void); +float FLT_NAN(void); +float FLT_INF(void) Declare_Special_Float(0x7F800000); +float FLT_MINF(void) Declare_Special_Float(0xFF800000); +float FLT_NAN(void) Declare_Special_Float(0x7F800001); +/* + * table below is + * a = log(x+1), b = exp(-a); + * comment shows range of x to which each line applies. + */ +static const float log_tab[64] = { + 0.000000000f, 1.000000000f, /* 0 to 0.0111657 */ + 0.022311565f, 0.977935498f, /* ... to 0.0340233 */ + 0.044580154f, 0.956398938f, /* ... to 0.0572837 */ + 0.066807851f, 0.935374915f, /* ... to 0.0810282 */ + 0.089004092f, 0.914841830f, /* ... to 0.1052765 */ + 0.111178130f, 0.894779348f, /* ... to 0.1300487 */ + 0.133338988f, 0.875168370f, /* ... to 0.1553661 */ + 0.155495435f, 0.855990985f, /* ... to 0.1812505 */ + 0.177655950f, 0.837230423f, /* ... to 0.2077248 */ + 0.199828684f, 0.818871027f, /* ... to 0.2348125 */ + 0.222021341f, 0.800898272f, /* ... to 0.2625375 */ + 0.244241118f, 0.783298744f, /* ... to 0.2909245 */ + 0.266494602f, 0.766060139f, /* ... to 0.3199984 */ + 0.288787603f, 0.749171310f, /* ... to 0.3497841 */ + 0.311125100f, 0.732622219f, /* ... to 0.3803064 */ + 0.333510906f, 0.716404086f, /* ... to 0.4115894 */ + 0.355947524f, 0.700509379f, /* ... to 0.4436560 */ + 0.378435910f, 0.684931867f, /* ... to 0.4765275 */ + 0.400975198f, 0.669666670f, /* ... to 0.5102230 */ + 0.423562229f, 0.654710433f, /* ... to 0.5447579 */ + 0.446191430f, 0.640061233f, /* ... to 0.5801435 */ + 0.468854219f, 0.625718795f, /* ... to 0.6163859 */ + 0.491538733f, 0.611684450f, /* ... to 0.6534842 */ + 0.514229417f, 0.597961196f, /* ... to 0.6914296 */ + 0.536906660f, 0.584553682f, /* ... to 0.7302038 */ + 0.559546530f, 0.571468149f, /* ... to 0.7697776 */ + 0.582120657f, 0.558712272f, /* ... to 0.8101096 */ + 0.604596078f, 0.546295042f, /* ... to 0.8511456 */ + 0.626935601f, 0.534226378f, /* ... to 0.8928175 */ + 0.649098098f, 0.522516823f, /* ... to 0.9350435 */ + 0.671039402f, 0.511176983f, /* ... to 0.9777287 */ + 0.693147182f, 0.500000000f, /* ....to 0.9999999 */ +}; + +/* + * FAST LN function + * + * (1) split the number into its base-2 exponent 'e', and + * a mantissa 'xm' in range 1.0 .. 1.99999 + * + * (2) using a cubic, find y0 = approx. ln(xm) + * (3) scale this, round it to a table index 0...31. + * From the table, get a log value, (which will be added to the result) + * and a scale factor. + * Multiply xm by the scale factor, result xe is very close to 1. + * + * (4) find ye = log(xe) using a taylor series around xe=1 + * (5) result is is yt+ye+log(2)*exp, where yt is from the table (1st col) + * and exp is the original exponent. + * Note that multiplying the input by the second column of the the table, + * and adding the 1st column of the table to the result, has no net effect. + */ +float mat_fast_log(float x) +{ + union { + float f; + unsigned int ui; + } u; + float xm1, xe, ye; + int tabind; + int ex; + + u.f = x; + ex = ((u.ui >> 23) & 0x1FF) - 127; + if ((ex <= -127) || (ex >= 128)) { + if ((ex & 0xFF) == 1) + return FLT_MINF(); /* was 0.0 or -0.0 (or denormal) */ + return FLT_NAN(); + } + u.ui -= ex << 23; + /* + * now u.f is in range 1.0 ... 1.99999 + */ + xm1 = u.f - 1.0f; /* 0. 1.0 */ + /* + * The table above and the cubic below were generated together + */ + tabind = MAT_ROUND(((xm1 * 0.1328047513f - 0.4396575689f) * xm1 * xm1 + xm1) * 44.75f); + /* + * tabind is in range 0..31. + * multiply u.f by the second value in the table, subtract 1 + */ + xe = u.f * log_tab[2 * tabind + 1] - 1.0f; /* result is +/- .0114 */ + + /* + * find the log(xe+1) using taylor series; add to (a) amount from exponent + * (b) amount from table + */ + ye = ((-0.25f * xe + 0.333333333f) * xe - 0.5f) * xe * xe; + ye += xe; + return 0.693147182f * (float)ex + log_tab[2 * tabind] + ye; +} + +static const float exp_table[16] = +{ + /* (1/6) * 2^(i/16.), to float precision */ + 0.166666672f, 0.174045637f, 0.181751296f, 0.189798102f, + 0.198201180f, 0.206976309f, 0.216139928f, 0.225709260f, + 0.235702261f, 0.246137694f, 0.257035136f, 0.268415064f, + 0.280298799f, 0.292708695f, 0.305668026f, 0.319201082f +}; + +/* + * FAST_EXP does an exponential function. + * This is done using a table lookup to + * get close and a taylor series to + * get accurate. + * + * if y = exp(x) = (2^m)*(P^n)*exp(f), where P = 2^(1/16), + * + * then x = ln(2^m) + ln(P^n) + f + * = ln(P^(16*m+n)) + f + * = ln(P) * [ 16*m +n ] +f + * let k = ln(P) = ln(2)/16 = 0.043321698785 + * + * so x = k*[16*m + n] + f + * + * For a given x, we find m,n,f such that: + * m is an integer + * n is in integer 0..15 + * f is as close to zero as possible: +/- k/2 + * + * Then we find y = (2^m)*(P^n)*exp(f) + * + * where 2^m is an exponent adjustment, P^n is a table lookup + * and exp(f) is calculated. The 4th term in the series + * for exp(f) is at most k^4/(16*24) = 9.17e-9, so we only + * need to do up to the 3rd order. + * + * One more quirk: + * exp(f) is evaluated via + * 6*exp(f) = ((f + 3)*f + 6)*f + 6 + * + * To compensate, the numbers in the P^n table are really 1/6 as + * big as they should be. + * + * Example: exp(13.2) + * 13.2 * (1/k) = 304.697, round to 305 => m*16+n = 305 + * f = 13.2 - k * 305 = -0.013118 + * m = 19, n = 1 + * + * 6*exp(f) = ((f + 3)*f + 6)*f + 6 = 5.921805 + * exp_table[n] * (6*exp(f)) = .174046 * 5.921805 = 1.030664 + * multiply that by 2^m (=5.24288e5) -> 5.40365e5 + * + */ +float mat_fast_exp(float x) +{ + int m, n; + union { + unsigned ui; + float f; + } u; + + n = MAT_ROUND(x * 23.08312065f); /* 16/log(2) */ + /* + * range check on n now + */ + if ((n <= -2016) || (n >= 2048)) { + if (n < 0) + return 0.0f; + else + return FLT_INF(); + } + x -= (float)n * 0.043321698785f; /* log(2)/16. */ + + m = (n >> 4); + x = ((x + 3.0f) * x + 6.0f) * x + 6.0f; + u.f = x * exp_table[n & 15]; + u.ui += (m << 23); /* exponent adjust */ + + return u.f; +} +#endif + +MATFLOAT mat_sqrt(MATFLOAT val) +{ +#ifndef GM_MAT_MATH + return 1.0 / (MATFLOAT)mat_fast_rsqrt((float)val); +#else + return (MATFLOAT)sqrt(val); +#endif +} + +MATFLOAT mat_log(MATFLOAT val) +{ /* base e */ +#ifdef GM_MAT_MATH + return (MATFLOAT)log(val); +#else + return (MATFLOAT)mat_fast_log((float)val); +#endif +} + +MATFLOAT mat_exp(MATFLOAT val) +{ +#ifdef GM_MAT_MATH + return (MATFLOAT)exp(val); +#else + return (MATFLOAT)mat_fast_exp((float)val); +#endif +} + +unsigned int mat_index_3dlut(int ind_r, int ind_g, int ind_b, int num_pnts, enum mat_order_3dlut order) +{ + unsigned int index; + + switch (order) { + case MAT_ORDER_RGB: + index = (ind_b * num_pnts + ind_g) * num_pnts + ind_r; + break; + case MAT_ORDER_BGR: + default: + index = (ind_r * num_pnts + ind_g) * num_pnts + ind_b; + break; + } + + return index; +} diff --git a/src/amd/gmlib/gm/mat_funcs.h b/src/amd/gmlib/gm/mat_funcs.h new file mode 100755 index 00000000000..43e793b44ca --- /dev/null +++ b/src/amd/gmlib/gm/mat_funcs.h @@ -0,0 +1,143 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + *---------------------------------------------------------------------- + * File Name : mat_funcs.h + * Purpose : Mathematical functions + * Author : Vladimir Lachine (vlachine@amd.com) + * Date : September 20, 2023 + * Version : 1.2 + */ + +#pragma once + +#ifdef __cplusplus + extern "C" { +#endif + +#define MATFLOAT double + +/* precision for matrix inversion */ +#define PRECISION_LIMIT (1.0e-15) + +/* absolute value of a */ +#define MAT_ABS(a) (((a) < 0) ? -(a) : (a)) + +/* find minimum of a and b */ +#define MAT_MIN(a, b) (((a) < (b)) ? (a) : (b)) + +/* find maximum of a and b */ +#define MAT_MAX(a, b) (((a) > (b)) ? (a) : (b)) + +/* clip to range */ +#define MAT_CLAMP(v, l, h) ((v) < (l) ? (l) : ((v) > (h) ? (h) : v)) + +/* round a to nearest int */ +#define MAT_ROUND(a) (int)((a) + 0.5f) + +/* take sign of a, either -1, 0, or 1 */ +#define MAT_ZSGN(a) (((a) < 0) ? -1 : (a) > 0 ? 1 : 0) + +/* take binary sign of a, either -1, or 1 if >= 0 */ +#define MAT_SGN(a) (((a) < 0) ? -1 : 1) + +/* swap a and b (see Gem by Wyvill) */ +#define MAT_SWAP(a, b) { a ^ = b; b ^ = a; a ^= b; } + +/* linear interpolation from l (when a=0) to h (when a=1) */ +/* (equal to (a*h)+((1-a)*l) */ +#define MAT_LERP(a, l, h) ((l) + (((h) - (l)) * (a))) + +/* vector operations */ +void mat_eval_3x3(MATFLOAT mat[3][3], MATFLOAT vec_inp[3], MATFLOAT vec_out[3]); +void mat_eval_3x3_off(MATFLOAT mat[3][3], MATFLOAT vec_off[3], MATFLOAT vec_inp[3], MATFLOAT vec_out[3]); +void mat_eval_off_3x3_off(MATFLOAT vec_off_inp[3], MATFLOAT mat[3][3], + MATFLOAT vec_off_out[3], MATFLOAT vec_inp[3], MATFLOAT vec_out[3]); +void mat_mul3x3(MATFLOAT mat2[3][3], MATFLOAT mat1[3][3], MATFLOAT mat2x1[3][3]); +int mat_inv3x3(MATFLOAT mat_inp[3][3], MATFLOAT mat_out[3][3]); + +void mat_3x1_zero(MATFLOAT vec_out[3]); +void mat_3x3_zero(MATFLOAT mat_out[3][3]); +void mat_3x3_unity(MATFLOAT mat_out[3][3]); +void mat_copy3x3(MATFLOAT mat_inp[3][3], MATFLOAT mat_out[3][3]); + +int mat_round(MATFLOAT val); + +MATFLOAT mat_int2flt(int val, int val_max); +int mat_flt2int(MATFLOAT val, int val_max); + +void mat_gen_mat_off(MATFLOAT mat_inp[3][3], MATFLOAT vec_off_inp[3], + MATFLOAT vec_off_out[3], MATFLOAT mat_res[3][3], MATFLOAT vec_off_res[3]); +void mat_scl_off(MATFLOAT vec_off_inp[3], MATFLOAT vec_off_out[3], int bitwidth); +void mat_cvt_cs(int vec_inp[3], int vec_out[3], int bitwidth, MATFLOAT mat[3][3], MATFLOAT vec_off[3], int is_clip); + +MATFLOAT mat_norm_angle(MATFLOAT angle); + +MATFLOAT mat_clamp(MATFLOAT val_inp, MATFLOAT val_min, MATFLOAT val_max); +int mat_is_valid(MATFLOAT val_inp, MATFLOAT val_min, MATFLOAT val_max); +int mat_is_valid_vec(MATFLOAT val_inp[], int size, MATFLOAT val_min, MATFLOAT val_max); +int mat_is_number(MATFLOAT val); +MATFLOAT mat_norm(MATFLOAT val_inp, MATFLOAT val_min, MATFLOAT val_rng); +MATFLOAT mat_denorm(MATFLOAT val_inp, MATFLOAT val_min, MATFLOAT val_rng); + +void mat_copy(MATFLOAT vec_inp[], MATFLOAT vec_out[], int size); +void mat_set(MATFLOAT val_inp, MATFLOAT vec_out[], int size); + +int mat_flt_to_index(MATFLOAT val_inp, MATFLOAT val_max, int num_pnts); +MATFLOAT mat_index_to_flt(int index, MATFLOAT val_max, int num_pnts); +MATFLOAT mat_flt_to_index_phase(MATFLOAT val_inp, MATFLOAT val_max, int num_pnts, int vec_ind[2]); +MATFLOAT mat_vec_to_index_phase(MATFLOAT val_inp, MATFLOAT vec_val[], int num_pnts, int vec_ind[2]); + +int mat_int_to_index(int val_inp, int val_max, int num_indexes); +int mat_index_to_int(int index, int val_max, int num_indexes); +MATFLOAT mat_int_to_index_phase(int val_inp, int val_max, int num_indexes, int vec_val_ind[2]); +int mat_get_hue_index_2pi(MATFLOAT vec_hue[], int num_hue_pnts); +MATFLOAT mat_hue_to_index_phase(MATFLOAT val_inp, int num_hue_pnts, + MATFLOAT vec_val[], MATFLOAT val_max, int index_max, int vec_ind_out[2]); + +int mat_seg_intersection(MATFLOAT p0_xy[2], MATFLOAT p1_xy[2], + MATFLOAT p2_xy[2], MATFLOAT p3_xy[2], MATFLOAT p_xy[2]); + +MATFLOAT mat_linear(MATFLOAT vec_inp[2], MATFLOAT phs); +MATFLOAT mat_bilinear(MATFLOAT vec_inp[2][2], MATFLOAT vec_phs[2]); +MATFLOAT mat_trilinear(MATFLOAT vec_inp[2][2][2], MATFLOAT vec_phs[3]); +MATFLOAT mat_tetra(MATFLOAT vec_inp[2][2][2], MATFLOAT vec_phs[3]); +MATFLOAT mat_cubic(MATFLOAT vec_inp[4], MATFLOAT phs); + +MATFLOAT mat_mse(MATFLOAT val1[], MATFLOAT val2[], int size); +MATFLOAT mat_sshape(MATFLOAT val, MATFLOAT gamma); +MATFLOAT mat_get_pi(void); + +MATFLOAT mat_angle(MATFLOAT y, MATFLOAT x); +MATFLOAT mat_radius(MATFLOAT y, MATFLOAT x); +MATFLOAT mat_radius_vec(MATFLOAT val[], MATFLOAT org[], int size); +void mat_gain_vec(MATFLOAT vec_inp[], MATFLOAT vec_out[], MATFLOAT vec_org[], int size, MATFLOAT gain); + +MATFLOAT mat_pow(MATFLOAT val0, MATFLOAT val1); +MATFLOAT mat_atan2(MATFLOAT y, MATFLOAT x); +MATFLOAT mat_cos(MATFLOAT val); +MATFLOAT mat_sin(MATFLOAT val); +MATFLOAT mat_sqrt(MATFLOAT val); +MATFLOAT mat_log(MATFLOAT val); +MATFLOAT mat_log2(MATFLOAT val); +MATFLOAT mat_log10(MATFLOAT val); +MATFLOAT mat_frexp(MATFLOAT val, int *exponent); + +#ifndef GM_MAT_MATH +float mat_fast_rsqrt(float val); +float mat_fast_exp(float x); +#endif + +MATFLOAT mat_exp(MATFLOAT val); + +enum mat_order_3dlut { + MAT_ORDER_RGB = 0, + MAT_ORDER_BGR = 1 +}; + +unsigned int mat_index_3dlut(int ind_r, int ind_g, int ind_b, int num_pnts, enum mat_order_3dlut order); + +#ifdef __cplusplus +} +#endif diff --git a/src/amd/gmlib/meson.build b/src/amd/gmlib/meson.build new file mode 100755 index 00000000000..4098b5f8786 --- /dev/null +++ b/src/amd/gmlib/meson.build @@ -0,0 +1,65 @@ +# Copyright 2022 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +c_args_gm = cc.get_supported_arguments([ + '-Wall', + '-Wextra', + '-Wno-unused', + '-Wno-unused-parameter', + '-Wno-unused-command-line-argument', + '-Wno-ignored-qualifiers', + '-Wno-missing-field-initializers', + '-Wno-self-assign', + '-Wno-implicit-fallthrough', + '-Werror=comment', + '-Werror=missing-braces', + '-Werror=override-init', + '-Werror=enum-conversion', + '-Werror=enum-compare', + '-Werror=maybe-uninitialized', +]) + +c_args_gm += [ + '-DGM_SIM', +] + +gm_files = files( + 'tonemap_adaptor.h', + 'tonemap_adaptor.c', + 'gm/csc_api_funcs.c', + 'gm/csc_api_funcs.h', + 'gm/csc_funcs.c', + 'gm/csc_funcs.h', + 'gm/cs_funcs.c', + 'gm/cs_funcs.h', + 'gm/cvd_api_funcs.c', + 'gm/cvd_api_funcs.h', + 'gm/cvd_funcs.c', + 'gm/cvd_funcs.h', + 'gm/gm_api_funcs.c', + 'gm/gm_api_funcs.h', + 'gm/gm_funcs.c', + 'gm/gm_funcs.h', + 'gm/mat_funcs.c', + 'gm/mat_funcs.h', + 'ToneMapGenerator/inc/ToneMapGenerator.h', + 'ToneMapGenerator/inc/ToneMapTypes.h', + 'ToneMapGenerator/src/inc/AGMGenerator.h', + 'ToneMapGenerator/src/inc/CSCGenerator.h', + 'ToneMapGenerator/src/src/AGMGenerator.c', + 'ToneMapGenerator/src/src/ToneMapGenerator.c', +) + +inc_amd_gm = include_directories( + 'gm', + 'ToneMapGenerator/inc', + 'ToneMapGenerator/src/inc', +) + +libgm = static_library( + 'libgm.a', + gm_files, + install : false, + c_args : c_args_gm, + include_directories : inc_amd_gm +) diff --git a/src/amd/gmlib/tonemap_adaptor.c b/src/amd/gmlib/tonemap_adaptor.c new file mode 100755 index 00000000000..825c0363b94 --- /dev/null +++ b/src/amd/gmlib/tonemap_adaptor.c @@ -0,0 +1,78 @@ +/* Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + * Authors: AMD + * + */ + +#include +#include +#include +#include +#include "ToneMapGenerator.h" +#include "AGMGenerator.h" +#include "tonemap_adaptor.h" + +static void VPEFree3DLut(void* memToFree, void* pDevice) +{ + free(memToFree); +} + +static void* VPEAlloc3DLut(unsigned int allocSize, void* pDevice) +{ + return calloc(1, allocSize); +} + +void* tm_create(void) +{ + struct ToneMapGenerator* p_tmGenerator = (struct ToneMapGenerator*)calloc(1, sizeof(struct ToneMapGenerator)); + if (!p_tmGenerator) + return NULL; + + p_tmGenerator->tmAlgo = TMG_A_AGM; + p_tmGenerator->memAllocSet = false; + p_tmGenerator->agmGenerator.initalized = false; + + return (void*)p_tmGenerator; +} + +void tm_destroy(void** pp_tmGenerator) +{ + struct ToneMapGenerator* p_tmGenerator; + + if (!pp_tmGenerator || ((*pp_tmGenerator) == NULL)) + return; + + p_tmGenerator = *pp_tmGenerator; + AGMGenerator_Exit(&p_tmGenerator->agmGenerator); + + free(p_tmGenerator); + *pp_tmGenerator = NULL; +} + +int tm_generate3DLut(struct tonemap_param* pInparam, void* pformattedLutData) +{ + enum TMGReturnCode result; + struct ToneMappingParameters tmParams; + + tmParams.lutData = (uint16_t *)pformattedLutData; + + ToneMapGenerator_SetInternalAllocators( + (struct ToneMapGenerator*)pInparam->tm_handle, + (TMGAlloc)(VPEAlloc3DLut), + (TMGFree)(VPEFree3DLut), + (void*)(NULL)); + + result = ToneMapGenerator_GenerateToneMappingParameters( + (struct ToneMapGenerator*)pInparam->tm_handle, + &pInparam->streamMetaData, + &pInparam->dstMetaData, + pInparam->inputContainerGamma, + pInparam->outputContainerGamma, + pInparam->outputContainerPrimaries, + pInparam->lutDim, + &tmParams + ); + + return (int)result; +} diff --git a/src/amd/gmlib/tonemap_adaptor.h b/src/amd/gmlib/tonemap_adaptor.h new file mode 100755 index 00000000000..41777a24977 --- /dev/null +++ b/src/amd/gmlib/tonemap_adaptor.h @@ -0,0 +1,33 @@ +/* Copyright 2025 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + * + * Authors: AMD + * + */ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include "ToneMapGenerator/inc/ToneMapTypes.h" + +struct tonemap_param +{ + void* tm_handle; + struct ToneMapHdrMetaData streamMetaData; + struct ToneMapHdrMetaData dstMetaData; + enum ToneMapTransferFunction inputContainerGamma; + enum ToneMapTransferFunction outputContainerGamma; + enum ToneMapColorPrimaries outputContainerPrimaries; + unsigned short lutDim; +}; + +void* tm_create(void); +void tm_destroy(void** pp_tmGenerator); +int tm_generate3DLut(struct tonemap_param* pInparam, void* pformattedLutData); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/src/amd/meson.build b/src/amd/meson.build index b0d9cb3c5bf..146f2bed97d 100644 --- a/src/amd/meson.build +++ b/src/amd/meson.build @@ -27,4 +27,5 @@ endif if with_gallium_radeonsi subdir('vpelib') + subdir('gmlib') endif